osm_lcm/lcm_helm_conn.py

   1 ##
   2 # Copyright 2020 Telefonica Investigacion y Desarrollo, S.A.U.
   3 #
   4 # Licensed under the Apache License, Version 2.0 (the "License");
   5 # you may not use this file except in compliance with the License.
   6 # You may obtain a copy of the License at
   7 #
   8 #    http://www.apache.org/licenses/LICENSE-2.0
   9 #
  10 # Unless required by applicable law or agreed to in writing, software
  11 # distributed under the License is distributed on an "AS IS" BASIS,
  12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
  13 # implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16 #
  17 ##
  18 import functools
  19 import yaml
  20 import asyncio
  21 import socket
  22 import uuid
  23 import os
  24
  25 from grpclib.client import Channel
  26
  27 from osm_lcm.frontend_pb2 import PrimitiveRequest
  28 from osm_lcm.frontend_pb2 import SshKeyRequest, SshKeyReply
  29 from osm_lcm.frontend_grpc import FrontendExecutorStub
  30 from osm_lcm.lcm_utils import LcmBase
  31
  32 from osm_lcm.data_utils.database.database import Database
  33 from osm_lcm.data_utils.filesystem.filesystem import Filesystem
  34
  35 from n2vc.n2vc_conn import N2VCConnector
  36 from n2vc.k8s_helm_conn import K8sHelmConnector
  37 from n2vc.k8s_helm3_conn import K8sHelm3Connector
  38 from n2vc.exceptions import N2VCBadArgumentsException, N2VCException, N2VCExecutionException
  39
  40 from osm_lcm.lcm_utils import deep_get
  41
  42
  43 def retryer(max_wait_time=60, delay_time=10):
  44     def wrapper(func):
  45         retry_exceptions = (
  46             ConnectionRefusedError
  47         )
  48
  49         @functools.wraps(func)
  50         async def wrapped(*args, **kwargs):
  51             wait_time = max_wait_time
  52             while wait_time > 0:
  53                 try:
  54                     return await func(*args, **kwargs)
  55                 except retry_exceptions:
  56                     wait_time = wait_time - delay_time
  57                     await asyncio.sleep(delay_time)
  58                     continue
  59             else:
  60                 return ConnectionRefusedError
  61         return wrapped
  62     return wrapper
  63
  64
  65 class LCMHelmConn(N2VCConnector, LcmBase):
  66     _KUBECTL_OSM_NAMESPACE = "osm"
  67     _KUBECTL_OSM_CLUSTER_NAME = "_system-osm-k8s"
  68     _EE_SERVICE_PORT = 50050
  69
  70     # Time beetween retries
  71     _EE_RETRY_DELAY = 10
  72     # Initial max retry time
  73     _MAX_INITIAL_RETRY_TIME = 300
  74     # Other retry time
  75     _MAX_RETRY_TIME = 30
  76
  77     def __init__(self,
  78                  log: object = None,
  79                  loop: object = None,
  80                  url: str = None,
  81                  username: str = None,
  82                  vca_config: dict = None,
  83                  on_update_db=None, ):
  84         """
  85         Initialize EE helm connector.
  86         """
  87
  88         self.db = Database().instance.db
  89         self.fs = Filesystem().instance.fs
  90
  91         # parent class constructor
  92         N2VCConnector.__init__(
  93             self,
  94             log=log,
  95             loop=loop,
  96             url=url,
  97             username=username,
  98             vca_config=vca_config,
  99             on_update_db=on_update_db,
 100             db=self.db,
 101             fs=self.fs
 102         )
 103
 104         self.log.debug("Initialize helm N2VC connector")
 105
 106         # TODO - Obtain data from configuration
 107         self._ee_service_port = self._EE_SERVICE_PORT
 108
 109         self._retry_delay = self._EE_RETRY_DELAY
 110         self._max_retry_time = self._MAX_RETRY_TIME
 111         self._initial_retry_time = self._MAX_INITIAL_RETRY_TIME
 112
 113         # initialize helm connector for helmv2 and helmv3
 114         self._k8sclusterhelm2 = K8sHelmConnector(
 115             kubectl_command=self.vca_config.get("kubectlpath"),
 116             helm_command=self.vca_config.get("helmpath"),
 117             fs=self.fs,
 118             db=self.db,
 119             log=self.log,
 120             on_update_db=None,
 121         )
 122
 123         self._k8sclusterhelm3 = K8sHelm3Connector(
 124             kubectl_command=self.vca_config.get("kubectlpath"),
 125             helm_command=self.vca_config.get("helm3path"),
 126             fs=self.fs,
 127             log=self.log,
 128             db=self.db,
 129             on_update_db=None,
 130         )
 131
 132         self._system_cluster_id = None
 133         self.log.info("Helm N2VC connector initialized")
 134
 135     # TODO - ¿reuse_ee_id?
 136     async def create_execution_environment(self,
 137                                            namespace: str,
 138                                            db_dict: dict,
 139                                            reuse_ee_id: str = None,
 140                                            progress_timeout: float = None,
 141                                            total_timeout: float = None,
 142                                            config: dict = None,
 143                                            artifact_path: str = None,
 144                                            vca_type: str = None,
 145                                            *kargs, **kwargs) -> (str, dict):
 146         """
 147         Creates a new helm execution environment deploying the helm-chat indicated in the
 148         attifact_path
 149         :param str namespace: This param is not used, all helm charts are deployed in the osm
 150         system namespace
 151         :param dict db_dict: where to write to database when the status changes.
 152             It contains a dictionary with {collection: str, filter: {},  path: str},
 153                 e.g. {collection: "nsrs", filter: {_id: <nsd-id>, path:
 154                 "_admin.deployed.VCA.3"}
 155         :param str reuse_ee_id: ee id from an older execution. TODO - right now this params is not used
 156         :param float progress_timeout:
 157         :param float total_timeout:
 158         :param dict config:  General variables to instantiate KDU
 159         :param str artifact_path:  path of package content
 160         :param str vca_type:  Type of vca, must be type helm or helm-v3
 161         :returns str, dict: id of the new execution environment including namespace.helm_id
 162         and credentials object set to None as all credentials should be osm kubernetes .kubeconfig
 163         """
 164
 165         self.log.info(
 166             "create_execution_environment: namespace: {}, artifact_path: {}, db_dict: {}, "
 167             "reuse_ee_id: {}".format(
 168                 namespace, artifact_path, db_dict, reuse_ee_id)
 169         )
 170
 171         # Validate artifact-path is provided
 172         if artifact_path is None or len(artifact_path) == 0:
 173             raise N2VCBadArgumentsException(
 174                 message="artifact_path is mandatory", bad_args=["artifact_path"]
 175             )
 176
 177         # Validate artifact-path exists and sync path
 178         from_path = os.path.split(artifact_path)[0]
 179         self.fs.sync(from_path)
 180
 181         # remove / in charm path
 182         while artifact_path.find("//") >= 0:
 183             artifact_path = artifact_path.replace("//", "/")
 184
 185         # check charm path
 186         if self.fs.file_exists(artifact_path):
 187             helm_chart_path = artifact_path
 188         else:
 189             msg = "artifact path does not exist: {}".format(artifact_path)
 190             raise N2VCBadArgumentsException(message=msg, bad_args=["artifact_path"])
 191
 192         if artifact_path.startswith("/"):
 193             full_path = self.fs.path + helm_chart_path
 194         else:
 195             full_path = self.fs.path + "/" + helm_chart_path
 196
 197         while full_path.find("//") >= 0:
 198             full_path = full_path.replace("//", "/")
 199
 200         try:
 201             # Call helm conn install
 202             # Obtain system cluster id from database
 203             system_cluster_uuid = await self._get_system_cluster_id()
 204             # Add parameter osm if exist to global
 205             if config and config.get("osm"):
 206                 if not config.get("global"):
 207                     config["global"] = {}
 208                 config["global"]["osm"] = config.get("osm")
 209
 210             self.log.debug("install helm chart: {}".format(full_path))
 211             if vca_type == "helm":
 212                 helm_id = await self._k8sclusterhelm2.install(system_cluster_uuid, kdu_model=full_path,
 213                                                               namespace=self._KUBECTL_OSM_NAMESPACE,
 214                                                               params=config,
 215                                                               db_dict=db_dict,
 216                                                               timeout=progress_timeout)
 217             else:
 218                 helm_id = await self._k8sclusterhelm3.install(system_cluster_uuid, kdu_model=full_path,
 219                                                               namespace=self._KUBECTL_OSM_NAMESPACE,
 220                                                               params=config,
 221                                                               db_dict=db_dict,
 222                                                               timeout=progress_timeout)
 223
 224             ee_id = "{}:{}.{}".format(vca_type, self._KUBECTL_OSM_NAMESPACE, helm_id)
 225             return ee_id, None
 226         except N2VCException:
 227             raise
 228         except Exception as e:
 229             self.log.error("Error deploying chart ee: {}".format(e), exc_info=True)
 230             raise N2VCException("Error deploying chart ee: {}".format(e))
 231
 232     async def register_execution_environment(self, namespace: str, credentials: dict, db_dict: dict,
 233                                              progress_timeout: float = None, total_timeout: float = None,
 234                                              *kargs, **kwargs) -> str:
 235         # nothing to do
 236         pass
 237
 238     async def install_configuration_sw(self,
 239                                        ee_id: str,
 240                                        artifact_path: str,
 241                                        db_dict: dict,
 242                                        progress_timeout: float = None,
 243                                        total_timeout: float = None,
 244                                        config: dict = None,
 245                                        num_units: int = 1,
 246                                        vca_type: str = None
 247                                        ):
 248         # nothing to do
 249         pass
 250
 251     async def add_relation(self, ee_id_1: str, ee_id_2: str, endpoint_1: str, endpoint_2: str):
 252         # nothing to do
 253         pass
 254
 255     async def remove_relation(self):
 256         # nothing to to
 257         pass
 258
 259     async def get_status(self, namespace: str, yaml_format: bool = True):
 260         # not used for this connector
 261         pass
 262
 263     async def get_ee_ssh_public__key(self, ee_id: str, db_dict: dict, progress_timeout: float = None,
 264                                      total_timeout: float = None) -> str:
 265         """
 266         Obtains ssh-public key from ee executing GetSShKey method from the ee.
 267
 268         :param str ee_id: the id of the execution environment returned by
 269             create_execution_environment or register_execution_environment
 270         :param dict db_dict:
 271         :param float progress_timeout:
 272         :param float total_timeout:
 273         :returns: public key of the execution environment
 274         """
 275
 276         self.log.info(
 277             "get_ee_ssh_public_key: ee_id: {}, db_dict: {}".format(
 278                 ee_id, db_dict)
 279         )
 280
 281         # check arguments
 282         if ee_id is None or len(ee_id) == 0:
 283             raise N2VCBadArgumentsException(
 284                 message="ee_id is mandatory", bad_args=["ee_id"]
 285             )
 286
 287         try:
 288             # Obtain ip_addr for the ee service, it is resolved by dns from the ee name by kubernetes
 289             version, namespace, helm_id = self._get_ee_id_parts(ee_id)
 290             ip_addr = socket.gethostbyname(helm_id)
 291
 292             # Obtain ssh_key from the ee, this method will implement retries to allow the ee
 293             # install libraries and start successfully
 294             ssh_key = await self._get_ssh_key(ip_addr)
 295             return ssh_key
 296         except Exception as e:
 297             self.log.error("Error obtaining ee ssh_key: {}".format(e), exc_info=True)
 298             raise N2VCException("Error obtaining ee ssh_ke: {}".format(e))
 299
 300     async def exec_primitive(self, ee_id: str, primitive_name: str, params_dict: dict, db_dict: dict = None,
 301                              progress_timeout: float = None, total_timeout: float = None) -> str:
 302         """
 303         Execute a primitive in the execution environment
 304
 305         :param str ee_id: the one returned by create_execution_environment or
 306             register_execution_environment with the format namespace.helm_id
 307         :param str primitive_name: must be one defined in the software. There is one
 308             called 'config', where, for the proxy case, the 'credentials' of VM are
 309             provided
 310         :param dict params_dict: parameters of the action
 311         :param dict db_dict: where to write into database when the status changes.
 312                         It contains a dict with
 313                             {collection: <str>, filter: {},  path: <str>},
 314                             e.g. {collection: "nslcmops", filter:
 315                                 {_id: <nslcmop_id>, path: "_admin.VCA"}
 316                         It will be used to store information about intermediate notifications
 317         :param float progress_timeout:
 318         :param float total_timeout:
 319         :returns str: primitive result, if ok. It raises exceptions in case of fail
 320         """
 321
 322         self.log.info("exec primitive for ee_id : {}, primitive_name: {}, params_dict: {}, db_dict: {}".format(
 323             ee_id, primitive_name, params_dict, db_dict
 324         ))
 325
 326         # check arguments
 327         if ee_id is None or len(ee_id) == 0:
 328             raise N2VCBadArgumentsException(
 329                 message="ee_id is mandatory", bad_args=["ee_id"]
 330             )
 331         if primitive_name is None or len(primitive_name) == 0:
 332             raise N2VCBadArgumentsException(
 333                 message="action_name is mandatory", bad_args=["action_name"]
 334             )
 335         if params_dict is None:
 336             params_dict = dict()
 337
 338         try:
 339             version, namespace, helm_id = self._get_ee_id_parts(ee_id)
 340             ip_addr = socket.gethostbyname(helm_id)
 341         except Exception as e:
 342             self.log.error("Error getting ee ip ee: {}".format(e))
 343             raise N2VCException("Error getting ee ip ee: {}".format(e))
 344
 345         if primitive_name == "config":
 346             try:
 347                 # Execute config primitive, higher timeout to check the case ee is starting
 348                 status, detailed_message = await self._execute_config_primitive(ip_addr, params_dict, db_dict=db_dict)
 349                 self.log.debug("Executed config primitive ee_id_ {}, status: {}, message: {}".format(
 350                     ee_id, status, detailed_message))
 351                 if status != "OK":
 352                     self.log.error("Error configuring helm ee, status: {}, message: {}".format(
 353                         status, detailed_message))
 354                     raise N2VCExecutionException(
 355                         message="Error configuring helm ee_id: {}, status: {}, message: {}: ".format(
 356                             ee_id, status, detailed_message
 357                         ),
 358                         primitive_name=primitive_name,
 359                     )
 360             except Exception as e:
 361                 self.log.error("Error configuring helm ee: {}".format(e))
 362                 raise N2VCExecutionException(
 363                     message="Error configuring helm ee_id: {}, {}".format(
 364                         ee_id, e
 365                     ),
 366                     primitive_name=primitive_name,
 367                 )
 368             return "CONFIG OK"
 369         else:
 370             try:
 371                 # Execute primitive
 372                 status, detailed_message = await self._execute_primitive(ip_addr, primitive_name,
 373                                                                          params_dict, db_dict=db_dict)
 374                 self.log.debug("Executed primitive {} ee_id_ {}, status: {}, message: {}".format(
 375                     primitive_name, ee_id, status, detailed_message))
 376                 if status != "OK" and status != "PROCESSING":
 377                     self.log.error(
 378                         "Execute primitive {} returned not ok status: {}, message: {}".format(
 379                             primitive_name, status, detailed_message)
 380                     )
 381                     raise N2VCExecutionException(
 382                         message="Execute primitive {} returned not ok status: {}, message: {}".format(
 383                             primitive_name, status, detailed_message
 384                         ),
 385                         primitive_name=primitive_name,
 386                     )
 387             except Exception as e:
 388                 self.log.error(
 389                     "Error executing primitive {}: {}".format(primitive_name, e)
 390                 )
 391                 raise N2VCExecutionException(
 392                     message="Error executing primitive {} into ee={} : {}".format(
 393                         primitive_name, ee_id, e
 394                     ),
 395                     primitive_name=primitive_name,
 396                 )
 397             return detailed_message
 398
 399     async def deregister_execution_environments(self):
 400         # nothing to be done
 401         pass
 402
 403     async def delete_execution_environment(self, ee_id: str, db_dict: dict = None, total_timeout: float = None):
 404         """
 405         Delete an execution environment
 406         :param str ee_id: id of the execution environment to delete, included namespace.helm_id
 407         :param dict db_dict: where to write into database when the status changes.
 408                         It contains a dict with
 409                             {collection: <str>, filter: {},  path: <str>},
 410                             e.g. {collection: "nsrs", filter:
 411                                 {_id: <nsd-id>, path: "_admin.deployed.VCA.3"}
 412         :param float total_timeout:
 413         """
 414
 415         self.log.info("ee_id: {}".format(ee_id))
 416
 417         # check arguments
 418         if ee_id is None:
 419             raise N2VCBadArgumentsException(
 420                 message="ee_id is mandatory", bad_args=["ee_id"]
 421             )
 422
 423         try:
 424
 425             # Obtain cluster_uuid
 426             system_cluster_uuid = await self._get_system_cluster_id()
 427
 428             # Get helm_id
 429             version, namespace, helm_id = self._get_ee_id_parts(ee_id)
 430
 431             # Uninstall chart, for backward compatibility we must assume that if there is no
 432             # version it is helm-v2
 433             if version == "helm-v3":
 434                 await self._k8sclusterhelm3.uninstall(system_cluster_uuid, helm_id)
 435             else:
 436                 await self._k8sclusterhelm2.uninstall(system_cluster_uuid, helm_id)
 437             self.log.info("ee_id: {} deleted".format(ee_id))
 438         except N2VCException:
 439             raise
 440         except Exception as e:
 441             self.log.error("Error deleting ee id: {}: {}".format(ee_id, e), exc_info=True)
 442             raise N2VCException("Error deleting ee id {}: {}".format(ee_id, e))
 443
 444     async def delete_namespace(self, namespace: str, db_dict: dict = None, total_timeout: float = None):
 445         # method not implemented for this connector, execution environments must be deleted individually
 446         pass
 447
 448     async def install_k8s_proxy_charm(
 449         self,
 450         charm_name: str,
 451         namespace: str,
 452         artifact_path: str,
 453         db_dict: dict,
 454         progress_timeout: float = None,
 455         total_timeout: float = None,
 456         config: dict = None,
 457         *kargs, **kwargs
 458     ) -> str:
 459         pass
 460
 461     @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
 462     async def _get_ssh_key(self, ip_addr):
 463         channel = Channel(ip_addr, self._ee_service_port)
 464         try:
 465             stub = FrontendExecutorStub(channel)
 466             self.log.debug("get ssh key, ip_addr: {}".format(ip_addr))
 467             reply: SshKeyReply = await stub.GetSshKey(SshKeyRequest())
 468             return reply.message
 469         finally:
 470             channel.close()
 471
 472     @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
 473     async def _execute_config_primitive(self, ip_addr, params, db_dict=None):
 474         return await self._execute_primitive_internal(ip_addr, "config", params, db_dict=db_dict)
 475
 476     @retryer(max_wait_time=_MAX_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
 477     async def _execute_primitive(self, ip_addr, primitive_name, params, db_dict=None):
 478         return await self._execute_primitive_internal(ip_addr, primitive_name, params, db_dict=db_dict)
 479
 480     async def _execute_primitive_internal(self, ip_addr, primitive_name, params, db_dict=None):
 481
 482         channel = Channel(ip_addr, self._ee_service_port)
 483         try:
 484             stub = FrontendExecutorStub(channel)
 485             async with stub.RunPrimitive.open() as stream:
 486                 primitive_id = str(uuid.uuid1())
 487                 result = None
 488                 self.log.debug("Execute primitive internal: id:{}, name:{}, params: {}".
 489                                format(primitive_id, primitive_name, params))
 490                 await stream.send_message(
 491                     PrimitiveRequest(id=primitive_id, name=primitive_name, params=yaml.dump(params)), end=True)
 492                 async for reply in stream:
 493                     self.log.debug("Received reply: {}".format(reply))
 494                     result = reply
 495                     # If db_dict provided write notifs in database
 496                     if db_dict:
 497                         self._write_op_detailed_status(db_dict, reply.status, reply.detailed_message)
 498                 if result:
 499                     return reply.status, reply.detailed_message
 500                 else:
 501                     return "ERROR", "No result received"
 502         finally:
 503             channel.close()
 504
 505     def _write_op_detailed_status(self, db_dict, status, detailed_message):
 506
 507         # write ee_id to database: _admin.deployed.VCA.x
 508         try:
 509             the_table = db_dict["collection"]
 510             the_filter = db_dict["filter"]
 511             update_dict = {"detailed-status": "{}: {}".format(status, detailed_message)}
 512             # self.log.debug('Writing ee_id to database: {}'.format(the_path))
 513             self.db.set_one(
 514                 table=the_table,
 515                 q_filter=the_filter,
 516                 update_dict=update_dict,
 517                 fail_on_empty=True,
 518             )
 519         except asyncio.CancelledError:
 520             raise
 521         except Exception as e:
 522             self.log.error("Error writing detailedStatus to database: {}".format(e))
 523
 524     async def _get_system_cluster_id(self):
 525         if not self._system_cluster_id:
 526             db_k8cluster = self.db.get_one("k8sclusters", {"name": self._KUBECTL_OSM_CLUSTER_NAME})
 527             k8s_hc_id = deep_get(db_k8cluster, ("_admin", "helm-chart-v3", "id"))
 528             if not k8s_hc_id:
 529                 try:
 530                     # backward compatibility for existing clusters that have not been initialized for helm v3
 531                     cluster_id = db_k8cluster.get("_id")
 532                     k8s_credentials = yaml.safe_dump(db_k8cluster.get("credentials"))
 533                     k8s_hc_id, uninstall_sw = await self._k8sclusterhelm3.init_env(k8s_credentials,
 534                                                                                    reuse_cluster_uuid=cluster_id)
 535                     db_k8scluster_update = {"_admin.helm-chart-v3.error_msg": None,
 536                                             "_admin.helm-chart-v3.id": k8s_hc_id,
 537                                             "_admin.helm-chart-v3}.created": uninstall_sw,
 538                                             "_admin.helm-chart-v3.operationalState": "ENABLED"}
 539                     self.update_db_2("k8sclusters", cluster_id, db_k8scluster_update)
 540                 except Exception as e:
 541                     self.log.error("error initializing helm-v3 cluster: {}".format(str(e)))
 542                     raise N2VCException("K8s system cluster '{}' has not been initialized for helm-chart-v3".format(
 543                         cluster_id))
 544             self._system_cluster_id = k8s_hc_id
 545         return self._system_cluster_id
 546
 547     def _get_ee_id_parts(self, ee_id):
 548         """
 549         Parses ee_id stored at database that can be either 'version:namespace.helm_id' or only
 550         namespace.helm_id for backward compatibility
 551         If exists helm version can be helm-v3 or helm (helm-v2 old version)
 552         """
 553         version, _, part_id = ee_id.rpartition(':')
 554         namespace, _, helm_id = part_id.rpartition('.')
 555         return version, namespace, helm_id