osm_lcm/lcm_helm_conn.py

   1 ##
   2 # Copyright 2020 Telefonica Investigacion y Desarrollo, S.A.U.
   3 #
   4 # Licensed under the Apache License, Version 2.0 (the "License");
   5 # you may not use this file except in compliance with the License.
   6 # You may obtain a copy of the License at
   7 #
   8 #    http://www.apache.org/licenses/LICENSE-2.0
   9 #
  10 # Unless required by applicable law or agreed to in writing, software
  11 # distributed under the License is distributed on an "AS IS" BASIS,
  12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
  13 # implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16 #
  17 ##
  18 import functools
  19 import yaml
  20 import asyncio
  21 import socket
  22 import uuid
  23 import os
  24
  25 from grpclib.client import Channel
  26
  27 from osm_lcm.frontend_pb2 import PrimitiveRequest
  28 from osm_lcm.frontend_pb2 import SshKeyRequest, SshKeyReply
  29 from osm_lcm.frontend_grpc import FrontendExecutorStub
  30 from osm_lcm.lcm_utils import LcmBase
  31
  32 from n2vc.n2vc_conn import N2VCConnector
  33 from n2vc.k8s_helm_conn import K8sHelmConnector
  34 from n2vc.k8s_helm3_conn import K8sHelm3Connector
  35 from n2vc.exceptions import N2VCBadArgumentsException, N2VCException, N2VCExecutionException
  36
  37 from osm_lcm.lcm_utils import deep_get
  38
  39
  40 def retryer(max_wait_time=60, delay_time=10):
  41     def wrapper(func):
  42         retry_exceptions = (
  43             ConnectionRefusedError
  44         )
  45
  46         @functools.wraps(func)
  47         async def wrapped(*args, **kwargs):
  48             wait_time = max_wait_time
  49             while wait_time > 0:
  50                 try:
  51                     return await func(*args, **kwargs)
  52                 except retry_exceptions:
  53                     wait_time = wait_time - delay_time
  54                     await asyncio.sleep(delay_time)
  55                     continue
  56             else:
  57                 return ConnectionRefusedError
  58         return wrapped
  59     return wrapper
  60
  61
  62 class LCMHelmConn(N2VCConnector, LcmBase):
  63     _KUBECTL_OSM_NAMESPACE = "osm"
  64     _KUBECTL_OSM_CLUSTER_NAME = "_system-osm-k8s"
  65     _EE_SERVICE_PORT = 50050
  66
  67     # Time beetween retries
  68     _EE_RETRY_DELAY = 10
  69     # Initial max retry time
  70     _MAX_INITIAL_RETRY_TIME = 300
  71     # Other retry time
  72     _MAX_RETRY_TIME = 30
  73
  74     def __init__(self,
  75                  db: object,
  76                  fs: object,
  77                  log: object = None,
  78                  loop: object = None,
  79                  url: str = None,
  80                  username: str = None,
  81                  vca_config: dict = None,
  82                  on_update_db=None, ):
  83         """
  84         Initialize EE helm connector.
  85         """
  86
  87         # parent class constructor
  88         N2VCConnector.__init__(
  89             self,
  90             db=db,
  91             fs=fs,
  92             log=log,
  93             loop=loop,
  94             url=url,
  95             username=username,
  96             vca_config=vca_config,
  97             on_update_db=on_update_db,
  98         )
  99
 100         self.log.debug("Initialize helm N2VC connector")
 101
 102         # TODO - Obtain data from configuration
 103         self._ee_service_port = self._EE_SERVICE_PORT
 104
 105         self._retry_delay = self._EE_RETRY_DELAY
 106         self._max_retry_time = self._MAX_RETRY_TIME
 107         self._initial_retry_time = self._MAX_INITIAL_RETRY_TIME
 108
 109         # initialize helm connector for helmv2 and helmv3
 110         self._k8sclusterhelm2 = K8sHelmConnector(
 111             kubectl_command=self.vca_config.get("kubectlpath"),
 112             helm_command=self.vca_config.get("helmpath"),
 113             fs=self.fs,
 114             log=self.log,
 115             db=self.db,
 116             on_update_db=None,
 117         )
 118
 119         self._k8sclusterhelm3 = K8sHelm3Connector(
 120             kubectl_command=self.vca_config.get("kubectlpath"),
 121             helm_command=self.vca_config.get("helm3path"),
 122             fs=self.fs,
 123             log=self.log,
 124             db=self.db,
 125             on_update_db=None,
 126         )
 127
 128         self._system_cluster_id = None
 129         self.log.info("Helm N2VC connector initialized")
 130
 131     # TODO - ¿reuse_ee_id?
 132     async def create_execution_environment(self,
 133                                            namespace: str,
 134                                            db_dict: dict,
 135                                            reuse_ee_id: str = None,
 136                                            progress_timeout: float = None,
 137                                            total_timeout: float = None,
 138                                            config: dict = None,
 139                                            artifact_path: str = None,
 140                                            vca_type: str = None,
 141                                            *kargs, **kwargs) -> (str, dict):
 142         """
 143         Creates a new helm execution environment deploying the helm-chat indicated in the
 144         attifact_path
 145         :param str namespace: This param is not used, all helm charts are deployed in the osm
 146         system namespace
 147         :param dict db_dict: where to write to database when the status changes.
 148             It contains a dictionary with {collection: str, filter: {},  path: str},
 149                 e.g. {collection: "nsrs", filter: {_id: <nsd-id>, path:
 150                 "_admin.deployed.VCA.3"}
 151         :param str reuse_ee_id: ee id from an older execution. TODO - right now this params is not used
 152         :param float progress_timeout:
 153         :param float total_timeout:
 154         :param dict config:  General variables to instantiate KDU
 155         :param str artifact_path:  path of package content
 156         :param str vca_type:  Type of vca, must be type helm or helm-v3
 157         :returns str, dict: id of the new execution environment including namespace.helm_id
 158         and credentials object set to None as all credentials should be osm kubernetes .kubeconfig
 159         """
 160
 161         self.log.info(
 162             "create_execution_environment: namespace: {}, artifact_path: {}, db_dict: {}, "
 163             "reuse_ee_id: {}".format(
 164                 namespace, artifact_path, db_dict, reuse_ee_id)
 165         )
 166
 167         # Validate artifact-path is provided
 168         if artifact_path is None or len(artifact_path) == 0:
 169             raise N2VCBadArgumentsException(
 170                 message="artifact_path is mandatory", bad_args=["artifact_path"]
 171             )
 172
 173         # Validate artifact-path exists and sync path
 174         from_path = os.path.split(artifact_path)[0]
 175         self.fs.sync(from_path)
 176
 177         # remove / in charm path
 178         while artifact_path.find("//") >= 0:
 179             artifact_path = artifact_path.replace("//", "/")
 180
 181         # check charm path
 182         if self.fs.file_exists(artifact_path):
 183             helm_chart_path = artifact_path
 184         else:
 185             msg = "artifact path does not exist: {}".format(artifact_path)
 186             raise N2VCBadArgumentsException(message=msg, bad_args=["artifact_path"])
 187
 188         if artifact_path.startswith("/"):
 189             full_path = self.fs.path + helm_chart_path
 190         else:
 191             full_path = self.fs.path + "/" + helm_chart_path
 192
 193         try:
 194             # Call helm conn install
 195             # Obtain system cluster id from database
 196             system_cluster_uuid = await self._get_system_cluster_id()
 197             # Add parameter osm if exist to global
 198             if config and config.get("osm"):
 199                 if not config.get("global"):
 200                     config["global"] = {}
 201                 config["global"]["osm"] = config.get("osm")
 202
 203             self.log.debug("install helm chart: {}".format(full_path))
 204             if vca_type == "helm":
 205                 helm_id = await self._k8sclusterhelm2.install(system_cluster_uuid, kdu_model=full_path,
 206                                                               namespace=self._KUBECTL_OSM_NAMESPACE,
 207                                                               params=config,
 208                                                               db_dict=db_dict,
 209                                                               timeout=progress_timeout)
 210             else:
 211                 helm_id = await self._k8sclusterhelm3.install(system_cluster_uuid, kdu_model=full_path,
 212                                                               namespace=self._KUBECTL_OSM_NAMESPACE,
 213                                                               params=config,
 214                                                               db_dict=db_dict,
 215                                                               timeout=progress_timeout)
 216
 217             ee_id = "{}:{}.{}".format(vca_type, self._KUBECTL_OSM_NAMESPACE, helm_id)
 218             return ee_id, None
 219         except N2VCException:
 220             raise
 221         except Exception as e:
 222             self.log.error("Error deploying chart ee: {}".format(e), exc_info=True)
 223             raise N2VCException("Error deploying chart ee: {}".format(e))
 224
 225     async def register_execution_environment(self, namespace: str, credentials: dict, db_dict: dict,
 226                                              progress_timeout: float = None, total_timeout: float = None,
 227                                              *kargs, **kwargs) -> str:
 228         # nothing to do
 229         pass
 230
 231     async def install_configuration_sw(self,
 232                                        ee_id: str,
 233                                        artifact_path: str,
 234                                        db_dict: dict,
 235                                        progress_timeout: float = None,
 236                                        total_timeout: float = None,
 237                                        config: dict = None,
 238                                        num_units: int = 1,
 239                                        vca_type: str = None
 240                                        ):
 241         # nothing to do
 242         pass
 243
 244     async def add_relation(self, ee_id_1: str, ee_id_2: str, endpoint_1: str, endpoint_2: str):
 245         # nothing to do
 246         pass
 247
 248     async def remove_relation(self):
 249         # nothing to to
 250         pass
 251
 252     async def get_status(self, namespace: str, yaml_format: bool = True):
 253         # not used for this connector
 254         pass
 255
 256     async def get_ee_ssh_public__key(self, ee_id: str, db_dict: dict, progress_timeout: float = None,
 257                                      total_timeout: float = None) -> str:
 258         """
 259         Obtains ssh-public key from ee executing GetSShKey method from the ee.
 260
 261         :param str ee_id: the id of the execution environment returned by
 262             create_execution_environment or register_execution_environment
 263         :param dict db_dict:
 264         :param float progress_timeout:
 265         :param float total_timeout:
 266         :returns: public key of the execution environment
 267         """
 268
 269         self.log.info(
 270             "get_ee_ssh_public_key: ee_id: {}, db_dict: {}".format(
 271                 ee_id, db_dict)
 272         )
 273
 274         # check arguments
 275         if ee_id is None or len(ee_id) == 0:
 276             raise N2VCBadArgumentsException(
 277                 message="ee_id is mandatory", bad_args=["ee_id"]
 278             )
 279
 280         try:
 281             # Obtain ip_addr for the ee service, it is resolved by dns from the ee name by kubernetes
 282             version, namespace, helm_id = self._get_ee_id_parts(ee_id)
 283             ip_addr = socket.gethostbyname(helm_id)
 284
 285             # Obtain ssh_key from the ee, this method will implement retries to allow the ee
 286             # install libraries and start successfully
 287             ssh_key = await self._get_ssh_key(ip_addr)
 288             return ssh_key
 289         except Exception as e:
 290             self.log.error("Error obtaining ee ssh_key: {}".format(e), exc_info=True)
 291             raise N2VCException("Error obtaining ee ssh_ke: {}".format(e))
 292
 293     async def exec_primitive(self, ee_id: str, primitive_name: str, params_dict: dict, db_dict: dict = None,
 294                              progress_timeout: float = None, total_timeout: float = None) -> str:
 295         """
 296         Execute a primitive in the execution environment
 297
 298         :param str ee_id: the one returned by create_execution_environment or
 299             register_execution_environment with the format namespace.helm_id
 300         :param str primitive_name: must be one defined in the software. There is one
 301             called 'config', where, for the proxy case, the 'credentials' of VM are
 302             provided
 303         :param dict params_dict: parameters of the action
 304         :param dict db_dict: where to write into database when the status changes.
 305                         It contains a dict with
 306                             {collection: <str>, filter: {},  path: <str>},
 307                             e.g. {collection: "nslcmops", filter:
 308                                 {_id: <nslcmop_id>, path: "_admin.VCA"}
 309                         It will be used to store information about intermediate notifications
 310         :param float progress_timeout:
 311         :param float total_timeout:
 312         :returns str: primitive result, if ok. It raises exceptions in case of fail
 313         """
 314
 315         self.log.info("exec primitive for ee_id : {}, primitive_name: {}, params_dict: {}, db_dict: {}".format(
 316             ee_id, primitive_name, params_dict, db_dict
 317         ))
 318
 319         # check arguments
 320         if ee_id is None or len(ee_id) == 0:
 321             raise N2VCBadArgumentsException(
 322                 message="ee_id is mandatory", bad_args=["ee_id"]
 323             )
 324         if primitive_name is None or len(primitive_name) == 0:
 325             raise N2VCBadArgumentsException(
 326                 message="action_name is mandatory", bad_args=["action_name"]
 327             )
 328         if params_dict is None:
 329             params_dict = dict()
 330
 331         try:
 332             version, namespace, helm_id = self._get_ee_id_parts(ee_id)
 333             ip_addr = socket.gethostbyname(helm_id)
 334         except Exception as e:
 335             self.log.error("Error getting ee ip ee: {}".format(e))
 336             raise N2VCException("Error getting ee ip ee: {}".format(e))
 337
 338         if primitive_name == "config":
 339             try:
 340                 # Execute config primitive, higher timeout to check the case ee is starting
 341                 status, detailed_message = await self._execute_config_primitive(ip_addr, params_dict, db_dict=db_dict)
 342                 self.log.debug("Executed config primitive ee_id_ {}, status: {}, message: {}".format(
 343                     ee_id, status, detailed_message))
 344                 if status != "OK":
 345                     self.log.error("Error configuring helm ee, status: {}, message: {}".format(
 346                         status, detailed_message))
 347                     raise N2VCExecutionException(
 348                         message="Error configuring helm ee_id: {}, status: {}, message: {}: ".format(
 349                             ee_id, status, detailed_message
 350                         ),
 351                         primitive_name=primitive_name,
 352                     )
 353             except Exception as e:
 354                 self.log.error("Error configuring helm ee: {}".format(e))
 355                 raise N2VCExecutionException(
 356                     message="Error configuring helm ee_id: {}, {}".format(
 357                         ee_id, e
 358                     ),
 359                     primitive_name=primitive_name,
 360                 )
 361             return "CONFIG OK"
 362         else:
 363             try:
 364                 # Execute primitive
 365                 status, detailed_message = await self._execute_primitive(ip_addr, primitive_name,
 366                                                                          params_dict, db_dict=db_dict)
 367                 self.log.debug("Executed primitive {} ee_id_ {}, status: {}, message: {}".format(
 368                     primitive_name, ee_id, status, detailed_message))
 369                 if status != "OK" and status != "PROCESSING":
 370                     self.log.error(
 371                         "Execute primitive {} returned not ok status: {}, message: {}".format(
 372                             primitive_name, status, detailed_message)
 373                     )
 374                     raise N2VCExecutionException(
 375                         message="Execute primitive {} returned not ok status: {}, message: {}".format(
 376                             primitive_name, status, detailed_message
 377                         ),
 378                         primitive_name=primitive_name,
 379                     )
 380             except Exception as e:
 381                 self.log.error(
 382                     "Error executing primitive {}: {}".format(primitive_name, e)
 383                 )
 384                 raise N2VCExecutionException(
 385                     message="Error executing primitive {} into ee={} : {}".format(
 386                         primitive_name, ee_id, e
 387                     ),
 388                     primitive_name=primitive_name,
 389                 )
 390             return detailed_message
 391
 392     async def deregister_execution_environments(self):
 393         # nothing to be done
 394         pass
 395
 396     async def delete_execution_environment(self, ee_id: str, db_dict: dict = None, total_timeout: float = None):
 397         """
 398         Delete an execution environment
 399         :param str ee_id: id of the execution environment to delete, included namespace.helm_id
 400         :param dict db_dict: where to write into database when the status changes.
 401                         It contains a dict with
 402                             {collection: <str>, filter: {},  path: <str>},
 403                             e.g. {collection: "nsrs", filter:
 404                                 {_id: <nsd-id>, path: "_admin.deployed.VCA.3"}
 405         :param float total_timeout:
 406         """
 407
 408         self.log.info("ee_id: {}".format(ee_id))
 409
 410         # check arguments
 411         if ee_id is None:
 412             raise N2VCBadArgumentsException(
 413                 message="ee_id is mandatory", bad_args=["ee_id"]
 414             )
 415
 416         try:
 417
 418             # Obtain cluster_uuid
 419             system_cluster_uuid = await self._get_system_cluster_id()
 420
 421             # Get helm_id
 422             version, namespace, helm_id = self._get_ee_id_parts(ee_id)
 423
 424             # Uninstall chart, for backward compatibility we must assume that if there is no
 425             # version it is helm-v2
 426             if version == "helm-v3":
 427                 await  self._k8sclusterhelm3.uninstall(system_cluster_uuid, helm_id)
 428             else:
 429                 await self._k8sclusterhelm2.uninstall(system_cluster_uuid, helm_id)
 430             self.log.info("ee_id: {} deleted".format(ee_id))
 431         except N2VCException:
 432             raise
 433         except Exception as e:
 434             self.log.error("Error deleting ee id: {}: {}".format(ee_id, e), exc_info=True)
 435             raise N2VCException("Error deleting ee id {}: {}".format(ee_id, e))
 436
 437     async def delete_namespace(self, namespace: str, db_dict: dict = None, total_timeout: float = None):
 438         # method not implemented for this connector, execution environments must be deleted individually
 439         pass
 440
 441     async def install_k8s_proxy_charm(
 442         self,
 443         charm_name: str,
 444         namespace: str,
 445         artifact_path: str,
 446         db_dict: dict,
 447         progress_timeout: float = None,
 448         total_timeout: float = None,
 449         config: dict = None,
 450         *kargs, **kwargs
 451     ) -> str:
 452         pass
 453
 454     @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
 455     async def _get_ssh_key(self, ip_addr):
 456         channel = Channel(ip_addr, self._ee_service_port)
 457         try:
 458             stub = FrontendExecutorStub(channel)
 459             self.log.debug("get ssh key, ip_addr: {}".format(ip_addr))
 460             reply: SshKeyReply = await stub.GetSshKey(SshKeyRequest())
 461             return reply.message
 462         finally:
 463             channel.close()
 464
 465     @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
 466     async def _execute_config_primitive(self, ip_addr, params, db_dict=None):
 467         return await self._execute_primitive_internal(ip_addr, "config", params, db_dict=db_dict)
 468
 469     @retryer(max_wait_time=_MAX_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
 470     async def _execute_primitive(self, ip_addr, primitive_name, params, db_dict=None):
 471         return await  self._execute_primitive_internal(ip_addr, primitive_name, params, db_dict=db_dict)
 472
 473     async def _execute_primitive_internal(self, ip_addr, primitive_name, params, db_dict=None):
 474
 475         channel = Channel(ip_addr, self._ee_service_port)
 476         try:
 477             stub = FrontendExecutorStub(channel)
 478             async with stub.RunPrimitive.open() as stream:
 479                 primitive_id = str(uuid.uuid1())
 480                 result = None
 481                 self.log.debug("Execute primitive internal: id:{}, name:{}, params: {}".
 482                                format(primitive_id, primitive_name, params))
 483                 await stream.send_message(
 484                     PrimitiveRequest(id=primitive_id, name=primitive_name, params=yaml.dump(params)), end=True)
 485                 async for reply in stream:
 486                     self.log.debug("Received reply: {}".format(reply))
 487                     result = reply
 488                     # If db_dict provided write notifs in database
 489                     if db_dict:
 490                         self._write_op_detailed_status(db_dict, reply.status, reply.detailed_message)
 491                 if result:
 492                     return reply.status, reply.detailed_message
 493                 else:
 494                     return "ERROR", "No result received"
 495         finally:
 496             channel.close()
 497
 498     def _write_op_detailed_status(self, db_dict, status, detailed_message):
 499
 500         # write ee_id to database: _admin.deployed.VCA.x
 501         try:
 502             the_table = db_dict["collection"]
 503             the_filter = db_dict["filter"]
 504             update_dict = {"detailed-status": "{}: {}".format(status, detailed_message)}
 505             # self.log.debug('Writing ee_id to database: {}'.format(the_path))
 506             self.db.set_one(
 507                 table=the_table,
 508                 q_filter=the_filter,
 509                 update_dict=update_dict,
 510                 fail_on_empty=True,
 511             )
 512         except asyncio.CancelledError:
 513             raise
 514         except Exception as e:
 515             self.log.error("Error writing detailedStatus to database: {}".format(e))
 516
 517     async def _get_system_cluster_id(self):
 518         if not self._system_cluster_id:
 519             db_k8cluster = self.db.get_one("k8sclusters", {"name": self._KUBECTL_OSM_CLUSTER_NAME})
 520             k8s_hc_id = deep_get(db_k8cluster, ("_admin", "helm-chart-v3", "id"))
 521             if not k8s_hc_id:
 522                 try:
 523                     # backward compatibility for existing clusters that have not been initialized for helm v3
 524                     cluster_id = db_k8cluster.get("_id")
 525                     k8s_credentials = yaml.safe_dump(db_k8cluster.get("credentials"))
 526                     k8s_hc_id, uninstall_sw = await self._k8sclusterhelm3.init_env(k8s_credentials,
 527                                                                                    reuse_cluster_uuid=cluster_id)
 528                     db_k8scluster_update = {"_admin.helm-chart-v3.error_msg": None,
 529                                             "_admin.helm-chart-v3.id": k8s_hc_id,
 530                                             "_admin.helm-chart-v3}.created": uninstall_sw,
 531                                             "_admin.helm-chart-v3.operationalState": "ENABLED"}
 532                     self.update_db_2("k8sclusters", cluster_id, db_k8scluster_update)
 533                 except Exception as e:
 534                     self.log.error("error initializing helm-v3 cluster: {}".format(str(e)))
 535                     raise N2VCException("K8s system cluster '{}' has not been initialized for helm-chart-v3".format(
 536                         cluster_id))
 537             self._system_cluster_id = k8s_hc_id
 538         return self._system_cluster_id
 539
 540     def _get_ee_id_parts(self, ee_id):
 541         """
 542         Parses ee_id stored at database that can be either 'version:namespace.helm_id' or only
 543         namespace.helm_id for backward compatibility
 544         If exists helm version can be helm-v3 or helm (helm-v2 old version)
 545         """
 546         version, _, part_id = ee_id.rpartition(':')
 547         namespace, _, helm_id = part_id.rpartition('.')
 548         return version, namespace, helm_id