osm_lcm/lcm_helm_conn.py

   1 ##
   2 # Copyright 2020 Telefonica Investigacion y Desarrollo, S.A.U.
   3 #
   4 # Licensed under the Apache License, Version 2.0 (the "License");
   5 # you may not use this file except in compliance with the License.
   6 # You may obtain a copy of the License at
   7 #
   8 #    http://www.apache.org/licenses/LICENSE-2.0
   9 #
  10 # Unless required by applicable law or agreed to in writing, software
  11 # distributed under the License is distributed on an "AS IS" BASIS,
  12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
  13 # implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16 #
  17 ##
  18 import functools
  19 import yaml
  20 import asyncio
  21 import socket
  22 import uuid
  23
  24 from grpclib.client import Channel
  25
  26 from osm_lcm.frontend_pb2 import PrimitiveRequest
  27 from osm_lcm.frontend_pb2 import SshKeyRequest, SshKeyReply
  28 from osm_lcm.frontend_grpc import FrontendExecutorStub
  29
  30 from n2vc.n2vc_conn import N2VCConnector
  31 from n2vc.k8s_helm_conn import K8sHelmConnector
  32 from n2vc.exceptions import N2VCBadArgumentsException, N2VCException, N2VCExecutionException
  33
  34 from osm_lcm.lcm_utils import deep_get
  35
  36
  37 def retryer(max_wait_time=60, delay_time=10):
  38     def wrapper(func):
  39         retry_exceptions = (
  40             ConnectionRefusedError
  41         )
  42
  43         @functools.wraps(func)
  44         async def wrapped(*args, **kwargs):
  45             wait_time = max_wait_time
  46             while wait_time > 0:
  47                 try:
  48                     return await func(*args, **kwargs)
  49                 except retry_exceptions:
  50                     wait_time = wait_time - delay_time
  51                     await asyncio.sleep(delay_time)
  52                     continue
  53             else:
  54                 return ConnectionRefusedError
  55         return wrapped
  56     return wrapper
  57
  58
  59 class LCMHelmConn(N2VCConnector):
  60     _KUBECTL_OSM_NAMESPACE = "osm"
  61     _KUBECTL_OSM_CLUSTER_NAME = "_system-osm-k8s"
  62     _EE_SERVICE_PORT = 50050
  63
  64     # Time beetween retries
  65     _EE_RETRY_DELAY = 10
  66     # Initial max retry time
  67     _MAX_INITIAL_RETRY_TIME = 300
  68     # Other retry time
  69     _MAX_RETRY_TIME = 30
  70
  71     def __init__(self,
  72                  db: object,
  73                  fs: object,
  74                  log: object = None,
  75                  loop: object = None,
  76                  url: str = None,
  77                  username: str = None,
  78                  vca_config: dict = None,
  79                  on_update_db=None, ):
  80         """
  81         Initialize EE helm connector.
  82         """
  83
  84         # parent class constructor
  85         N2VCConnector.__init__(
  86             self,
  87             db=db,
  88             fs=fs,
  89             log=log,
  90             loop=loop,
  91             url=url,
  92             username=username,
  93             vca_config=vca_config,
  94             on_update_db=on_update_db,
  95         )
  96
  97         self.log.debug("Initialize helm N2VC connector")
  98
  99         # TODO - Obtain data from configuration
 100         self._ee_service_port = self._EE_SERVICE_PORT
 101
 102         self._retry_delay = self._EE_RETRY_DELAY
 103         self._max_retry_time = self._MAX_RETRY_TIME
 104         self._initial_retry_time = self._MAX_INITIAL_RETRY_TIME
 105
 106         # initialize helm connector
 107         self._k8sclusterhelm = K8sHelmConnector(
 108             kubectl_command=self.vca_config.get("kubectlpath"),
 109             helm_command=self.vca_config.get("helmpath"),
 110             fs=self.fs,
 111             log=self.log,
 112             db=self.db,
 113             on_update_db=None,
 114         )
 115
 116         self._system_cluster_id = None
 117         self.log.info("Helm N2VC connector initialized")
 118
 119     # TODO - ¿reuse_ee_id?
 120     async def create_execution_environment(self,
 121                                            namespace: str,
 122                                            db_dict: dict,
 123                                            reuse_ee_id: str = None,
 124                                            progress_timeout: float = None,
 125                                            total_timeout: float = None,
 126                                            artifact_path: str = None,
 127                                            vca_type: str = None) -> (str, dict):
 128         """
 129         Creates a new helm execution environment deploying the helm-chat indicated in the
 130         attifact_path
 131         :param str namespace: This param is not used, all helm charts are deployed in the osm
 132         system namespace
 133         :param dict db_dict: where to write to database when the status changes.
 134             It contains a dictionary with {collection: str, filter: {},  path: str},
 135                 e.g. {collection: "nsrs", filter: {_id: <nsd-id>, path:
 136                 "_admin.deployed.VCA.3"}
 137         :param str reuse_ee_id: ee id from an older execution. TODO - right now this params is not used
 138         :param float progress_timeout:
 139         :param float total_timeout:
 140         :param str artifact_path  path of package content
 141         :param str vca_type  Type of vca, not used as assumed of type helm
 142         :returns str, dict: id of the new execution environment including namespace.helm_id
 143         and credentials object set to None as all credentials should be osm kubernetes .kubeconfig
 144         """
 145
 146         self.log.info(
 147             "create_execution_environment: namespace: {}, artifact_path: {}, db_dict: {}, "
 148             "reuse_ee_id: {}".format(
 149                 namespace, artifact_path, db_dict, reuse_ee_id)
 150         )
 151
 152         # Validate artifact-path is provided
 153         if artifact_path is None or len(artifact_path) == 0:
 154             raise N2VCBadArgumentsException(
 155                 message="artifact_path is mandatory", bad_args=["artifact_path"]
 156             )
 157
 158         # Validate artifact-path exists
 159
 160         # remove / in charm path
 161         while artifact_path.find("//") >= 0:
 162             artifact_path = artifact_path.replace("//", "/")
 163
 164         # check charm path
 165         if self.fs.file_exists(artifact_path):
 166             helm_chart_path = artifact_path
 167         else:
 168             msg = "artifact path does not exist: {}".format(artifact_path)
 169             raise N2VCBadArgumentsException(message=msg, bad_args=["artifact_path"])
 170
 171         if artifact_path.startswith("/"):
 172             full_path = self.fs.path + helm_chart_path
 173         else:
 174             full_path = self.fs.path + "/" + helm_chart_path
 175
 176         try:
 177             # Call helm conn install
 178             # Obtain system cluster id from database
 179             system_cluster_uuid = self._get_system_cluster_id()
 180
 181             self.log.debug("install helm chart: {}".format(full_path))
 182             helm_id = await self._k8sclusterhelm.install(system_cluster_uuid, kdu_model=full_path,
 183                                                          namespace=self._KUBECTL_OSM_NAMESPACE,
 184                                                          db_dict=db_dict,
 185                                                          timeout=progress_timeout)
 186
 187             ee_id = "{}.{}".format(self._KUBECTL_OSM_NAMESPACE, helm_id)
 188             return ee_id, None
 189         except Exception as e:
 190             self.log.error("Error deploying chart ee: {}".format(e), exc_info=True)
 191             raise N2VCException("Error deploying chart ee: {}".format(e))
 192
 193     async def register_execution_environment(self, namespace: str, credentials: dict, db_dict: dict,
 194                                              progress_timeout: float = None, total_timeout: float = None) -> str:
 195         # nothing to do
 196         pass
 197
 198     async def install_configuration_sw(self,
 199                                        ee_id: str,
 200                                        artifact_path: str,
 201                                        db_dict: dict,
 202                                        progress_timeout: float = None,
 203                                        total_timeout: float = None,
 204                                        config: dict = None,
 205                                        num_units: int = 1,
 206                                        vca_type: str = None
 207                                        ):
 208         # nothing to do
 209         pass
 210
 211     async def add_relation(self, ee_id_1: str, ee_id_2: str, endpoint_1: str, endpoint_2: str):
 212         # nothing to do
 213         pass
 214
 215     async def remove_relation(self):
 216         # nothing to to
 217         pass
 218
 219     async def get_status(self, namespace: str, yaml_format: bool = True):
 220         # not used for this connector
 221         pass
 222
 223     async def get_ee_ssh_public__key(self, ee_id: str, db_dict: dict, progress_timeout: float = None,
 224                                      total_timeout: float = None) -> str:
 225         """
 226         Obtains ssh-public key from ee executing GetSShKey method from the ee.
 227
 228         :param str ee_id: the id of the execution environment returned by
 229             create_execution_environment or register_execution_environment
 230         :param dict db_dict:
 231         :param float progress_timeout:
 232         :param float total_timeout:
 233         :returns: public key of the execution environment
 234         """
 235
 236         self.log.info(
 237             "get_ee_ssh_public_key: ee_id: {}, db_dict: {}".format(
 238                 ee_id, db_dict)
 239         )
 240
 241         # check arguments
 242         if ee_id is None or len(ee_id) == 0:
 243             raise N2VCBadArgumentsException(
 244                 message="ee_id is mandatory", bad_args=["ee_id"]
 245             )
 246
 247         try:
 248             # Obtain ip_addr for the ee service, it is resolved by dns from the ee name by kubernetes
 249             namespace, helm_id = self._get_ee_id_parts(ee_id)
 250             ip_addr = socket.gethostbyname(helm_id)
 251
 252             # Obtain ssh_key from the ee, this method will implement retries to allow the ee
 253             # install libraries and start successfully
 254             ssh_key = await self._get_ssh_key(ip_addr)
 255             return ssh_key
 256         except Exception as e:
 257             self.log.error("Error obtaining ee ssh_key: {}".format(e), exc_info=True)
 258             raise N2VCException("Error obtaining ee ssh_ke: {}".format(e))
 259
 260     async def exec_primitive(self, ee_id: str, primitive_name: str, params_dict: dict, db_dict: dict = None,
 261                              progress_timeout: float = None, total_timeout: float = None) -> str:
 262         """
 263         Execute a primitive in the execution environment
 264
 265         :param str ee_id: the one returned by create_execution_environment or
 266             register_execution_environment with the format namespace.helm_id
 267         :param str primitive_name: must be one defined in the software. There is one
 268             called 'config', where, for the proxy case, the 'credentials' of VM are
 269             provided
 270         :param dict params_dict: parameters of the action
 271         :param dict db_dict: where to write into database when the status changes.
 272                         It contains a dict with
 273                             {collection: <str>, filter: {},  path: <str>},
 274                             e.g. {collection: "nslcmops", filter:
 275                                 {_id: <nslcmop_id>, path: "_admin.VCA"}
 276                         It will be used to store information about intermediate notifications
 277         :param float progress_timeout:
 278         :param float total_timeout:
 279         :returns str: primitive result, if ok. It raises exceptions in case of fail
 280         """
 281
 282         self.log.info("exec primitive for ee_id : {}, primitive_name: {}, params_dict: {}, db_dict: {}".format(
 283             ee_id, primitive_name, params_dict, db_dict
 284         ))
 285
 286         # check arguments
 287         if ee_id is None or len(ee_id) == 0:
 288             raise N2VCBadArgumentsException(
 289                 message="ee_id is mandatory", bad_args=["ee_id"]
 290             )
 291         if primitive_name is None or len(primitive_name) == 0:
 292             raise N2VCBadArgumentsException(
 293                 message="action_name is mandatory", bad_args=["action_name"]
 294             )
 295         if params_dict is None:
 296             params_dict = dict()
 297
 298         try:
 299             namespace, helm_id = self._get_ee_id_parts(ee_id)
 300             ip_addr = socket.gethostbyname(helm_id)
 301         except Exception as e:
 302             self.log.error("Error getting ee ip ee: {}".format(e))
 303             raise N2VCException("Error getting ee ip ee: {}".format(e))
 304
 305         if primitive_name == "config":
 306             try:
 307                 # Execute config primitive, higher timeout to check the case ee is starting
 308                 status, detailed_message = await self._execute_config_primitive(ip_addr, params_dict, db_dict=db_dict)
 309                 self.log.debug("Executed config primitive ee_id_ {}, status: {}, message: {}".format(
 310                     ee_id, status, detailed_message))
 311                 if status != "OK":
 312                     self.log.error("Error configuring helm ee, status: {}, message: {}".format(
 313                         status, detailed_message))
 314                     raise N2VCExecutionException(
 315                         message="Error configuring helm ee_id: {}, status: {}, message: {}: ".format(
 316                             ee_id, status, detailed_message
 317                         ),
 318                         primitive_name=primitive_name,
 319                     )
 320             except Exception as e:
 321                 self.log.error("Error configuring helm ee: {}".format(e))
 322                 raise N2VCExecutionException(
 323                     message="Error configuring helm ee_id: {}, {}".format(
 324                         ee_id, e
 325                     ),
 326                     primitive_name=primitive_name,
 327                 )
 328             return "CONFIG OK"
 329         else:
 330             try:
 331                 # Execute primitive
 332                 status, detailed_message = await self._execute_primitive(ip_addr, primitive_name,
 333                                                                          params_dict, db_dict=db_dict)
 334                 self.log.debug("Executed primitive {} ee_id_ {}, status: {}, message: {}".format(
 335                     primitive_name, ee_id, status, detailed_message))
 336                 if status != "OK" and status != "PROCESSING":
 337                     self.log.error(
 338                         "Execute primitive {} returned not ok status: {}, message: {}".format(
 339                             primitive_name, status, detailed_message)
 340                     )
 341                     raise N2VCExecutionException(
 342                         message="Execute primitive {} returned not ok status: {}, message: {}".format(
 343                             primitive_name, status, detailed_message
 344                         ),
 345                         primitive_name=primitive_name,
 346                     )
 347             except Exception as e:
 348                 self.log.error(
 349                     "Error executing primitive {}: {}".format(primitive_name, e)
 350                 )
 351                 raise N2VCExecutionException(
 352                     message="Error executing primitive {} into ee={} : {}".format(
 353                         primitive_name, ee_id, e
 354                     ),
 355                     primitive_name=primitive_name,
 356                 )
 357             return detailed_message
 358
 359     async def deregister_execution_environments(self):
 360         # nothing to be done
 361         pass
 362
 363     async def delete_execution_environment(self, ee_id: str, db_dict: dict = None, total_timeout: float = None):
 364         """
 365         Delete an execution environment
 366         :param str ee_id: id of the execution environment to delete, included namespace.helm_id
 367         :param dict db_dict: where to write into database when the status changes.
 368                         It contains a dict with
 369                             {collection: <str>, filter: {},  path: <str>},
 370                             e.g. {collection: "nsrs", filter:
 371                                 {_id: <nsd-id>, path: "_admin.deployed.VCA.3"}
 372         :param float total_timeout:
 373         """
 374
 375         self.log.info("ee_id: {}".format(ee_id))
 376
 377         # check arguments
 378         if ee_id is None:
 379             raise N2VCBadArgumentsException(
 380                 message="ee_id is mandatory", bad_args=["ee_id"]
 381             )
 382
 383         try:
 384
 385             # Obtain cluster_uuid
 386             system_cluster_uuid = self._get_system_cluster_id()
 387
 388             # Get helm_id
 389             namespace, helm_id = self._get_ee_id_parts(ee_id)
 390
 391             # Uninstall chart
 392             await self._k8sclusterhelm.uninstall(system_cluster_uuid, helm_id)
 393             self.log.info("ee_id: {} deleted".format(ee_id))
 394         except Exception as e:
 395             self.log.error("Error deleting ee id: {}: {}".format(ee_id, e), exc_info=True)
 396             raise N2VCException("Error deleting ee id {}: {}".format(ee_id, e))
 397
 398     async def delete_namespace(self, namespace: str, db_dict: dict = None, total_timeout: float = None):
 399         # method not implemented for this connector, execution environments must be deleted individually
 400         pass
 401
 402     async def install_k8s_proxy_charm(
 403         self,
 404         charm_name: str,
 405         namespace: str,
 406         artifact_path: str,
 407         db_dict: dict,
 408         progress_timeout: float = None,
 409         total_timeout: float = None,
 410         config: dict = None,
 411     ) -> str:
 412         pass
 413
 414     @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
 415     async def _get_ssh_key(self, ip_addr):
 416         channel = Channel(ip_addr, self._ee_service_port)
 417         try:
 418             stub = FrontendExecutorStub(channel)
 419             self.log.debug("get ssh key, ip_addr: {}".format(ip_addr))
 420             reply: SshKeyReply = await stub.GetSshKey(SshKeyRequest())
 421             return reply.message
 422         finally:
 423             channel.close()
 424
 425     @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
 426     async def _execute_config_primitive(self, ip_addr, params, db_dict=None):
 427         return await self._execute_primitive_internal(ip_addr, "config", params, db_dict=db_dict)
 428
 429     @retryer(max_wait_time=_MAX_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
 430     async def _execute_primitive(self, ip_addr, primitive_name, params, db_dict=None):
 431         return await  self._execute_primitive_internal(ip_addr, primitive_name, params, db_dict=db_dict)
 432
 433     async def _execute_primitive_internal(self, ip_addr, primitive_name, params, db_dict=None):
 434
 435         channel = Channel(ip_addr, self._ee_service_port)
 436         try:
 437             stub = FrontendExecutorStub(channel)
 438             async with stub.RunPrimitive.open() as stream:
 439                 primitive_id = str(uuid.uuid1())
 440                 result = None
 441                 self.log.debug("Execute primitive internal: id:{}, name:{}, params: {}".
 442                                format(primitive_id, primitive_name, params))
 443                 await stream.send_message(
 444                     PrimitiveRequest(id=primitive_id, name=primitive_name, params=yaml.dump(params)), end=True)
 445                 async for reply in stream:
 446                     self.log.debug("Received reply: {}".format(reply))
 447                     result = reply
 448                     # If db_dict provided write notifs in database
 449                     if db_dict:
 450                         self._write_op_detailed_status(db_dict, reply.status, reply.detailed_message)
 451                 if result:
 452                     return reply.status, reply.detailed_message
 453                 else:
 454                     return "ERROR", "No result received"
 455         finally:
 456             channel.close()
 457
 458     def _write_op_detailed_status(self, db_dict, status, detailed_message):
 459
 460         # write ee_id to database: _admin.deployed.VCA.x
 461         try:
 462             the_table = db_dict["collection"]
 463             the_filter = db_dict["filter"]
 464             update_dict = {"detailed-status": "{}: {}".format(status, detailed_message)}
 465             # self.log.debug('Writing ee_id to database: {}'.format(the_path))
 466             self.db.set_one(
 467                 table=the_table,
 468                 q_filter=the_filter,
 469                 update_dict=update_dict,
 470                 fail_on_empty=True,
 471             )
 472         except asyncio.CancelledError:
 473             raise
 474         except Exception as e:
 475             self.log.error("Error writing detailedStatus to database: {}".format(e))
 476
 477     def _get_system_cluster_id(self):
 478         if not self._system_cluster_id:
 479             db_k8cluster = self.db.get_one("k8sclusters", {"name": self._KUBECTL_OSM_CLUSTER_NAME})
 480             k8s_hc_id = deep_get(db_k8cluster, ("_admin", "helm-chart", "id"))
 481             self._system_cluster_id = k8s_hc_id
 482         return self._system_cluster_id
 483
 484     def _get_ee_id_parts(self, ee_id):
 485         namespace, _, helm_id = ee_id.partition('.')
 486         return namespace, helm_id