osm_lcm/lcm_helm_conn.py

   1 ##
   2 # Copyright 2020 Telefonica Investigacion y Desarrollo, S.A.U.
   3 #
   4 # Licensed under the Apache License, Version 2.0 (the "License");
   5 # you may not use this file except in compliance with the License.
   6 # You may obtain a copy of the License at
   7 #
   8 #    http://www.apache.org/licenses/LICENSE-2.0
   9 #
  10 # Unless required by applicable law or agreed to in writing, software
  11 # distributed under the License is distributed on an "AS IS" BASIS,
  12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
  13 # implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16 #
  17 ##
  18 import functools
  19 import yaml
  20 import asyncio
  21 import socket
  22 import uuid
  23
  24 from grpclib.client import Channel
  25
  26 from osm_lcm.frontend_pb2 import PrimitiveRequest
  27 from osm_lcm.frontend_pb2 import SshKeyRequest, SshKeyReply
  28 from osm_lcm.frontend_grpc import FrontendExecutorStub
  29
  30 from n2vc.n2vc_conn import N2VCConnector
  31 from n2vc.k8s_helm_conn import K8sHelmConnector
  32 from n2vc.exceptions import N2VCBadArgumentsException, N2VCException, N2VCExecutionException
  33
  34 from osm_lcm.lcm_utils import deep_get
  35
  36
  37 def retryer(max_wait_time=60, delay_time=10):
  38     def wrapper(func):
  39         retry_exceptions = (
  40             ConnectionRefusedError
  41         )
  42
  43         @functools.wraps(func)
  44         async def wrapped(*args, **kwargs):
  45             wait_time = max_wait_time
  46             while wait_time > 0:
  47                 try:
  48                     return await func(*args, **kwargs)
  49                 except retry_exceptions:
  50                     wait_time = wait_time - delay_time
  51                     await asyncio.sleep(delay_time)
  52                     continue
  53             else:
  54                 return ConnectionRefusedError
  55         return wrapped
  56     return wrapper
  57
  58
  59 class LCMHelmConn(N2VCConnector):
  60     _KUBECTL_OSM_NAMESPACE = "osm"
  61     _KUBECTL_OSM_CLUSTER_NAME = "_system-osm-k8s"
  62     _EE_SERVICE_PORT = 50050
  63
  64     # Time beetween retries
  65     _EE_RETRY_DELAY = 10
  66     # Initial max retry time
  67     _MAX_INITIAL_RETRY_TIME = 300
  68     # Other retry time
  69     _MAX_RETRY_TIME = 30
  70
  71     def __init__(self,
  72                  db: object,
  73                  fs: object,
  74                  log: object = None,
  75                  loop: object = None,
  76                  url: str = None,
  77                  username: str = None,
  78                  vca_config: dict = None,
  79                  on_update_db=None, ):
  80         """
  81         Initialize EE helm connector.
  82         """
  83
  84         # parent class constructor
  85         N2VCConnector.__init__(
  86             self,
  87             db=db,
  88             fs=fs,
  89             log=log,
  90             loop=loop,
  91             url=url,
  92             username=username,
  93             vca_config=vca_config,
  94             on_update_db=on_update_db,
  95         )
  96
  97         self.log.debug("Initialize helm N2VC connector")
  98
  99         # TODO - Obtain data from configuration
 100         self._ee_service_port = self._EE_SERVICE_PORT
 101
 102         self._retry_delay = self._EE_RETRY_DELAY
 103         self._max_retry_time = self._MAX_RETRY_TIME
 104         self._initial_retry_time = self._MAX_INITIAL_RETRY_TIME
 105
 106         # initialize helm connector
 107         self._k8sclusterhelm = K8sHelmConnector(
 108             kubectl_command=self.vca_config.get("kubectlpath"),
 109             helm_command=self.vca_config.get("helmpath"),
 110             fs=self.fs,
 111             log=self.log,
 112             db=self.db,
 113             on_update_db=None,
 114         )
 115
 116         self._system_cluster_id = None
 117         self.log.info("Helm N2VC connector initialized")
 118
 119     # TODO - ¿reuse_ee_id?
 120     async def create_execution_environment(self,
 121                                            namespace: str,
 122                                            db_dict: dict,
 123                                            reuse_ee_id: str = None,
 124                                            progress_timeout: float = None,
 125                                            total_timeout: float = None,
 126                                            config: dict = None,
 127                                            artifact_path: str = None,
 128                                            vca_type: str = None) -> (str, dict):
 129         """
 130         Creates a new helm execution environment deploying the helm-chat indicated in the
 131         attifact_path
 132         :param str namespace: This param is not used, all helm charts are deployed in the osm
 133         system namespace
 134         :param dict db_dict: where to write to database when the status changes.
 135             It contains a dictionary with {collection: str, filter: {},  path: str},
 136                 e.g. {collection: "nsrs", filter: {_id: <nsd-id>, path:
 137                 "_admin.deployed.VCA.3"}
 138         :param str reuse_ee_id: ee id from an older execution. TODO - right now this params is not used
 139         :param float progress_timeout:
 140         :param float total_timeout:
 141         :param dict config:  General variables to instantiate KDU
 142         :param str artifact_path:  path of package content
 143         :param str vca_type:  Type of vca, not used as assumed of type helm
 144         :returns str, dict: id of the new execution environment including namespace.helm_id
 145         and credentials object set to None as all credentials should be osm kubernetes .kubeconfig
 146         """
 147
 148         self.log.info(
 149             "create_execution_environment: namespace: {}, artifact_path: {}, db_dict: {}, "
 150             "reuse_ee_id: {}".format(
 151                 namespace, artifact_path, db_dict, reuse_ee_id)
 152         )
 153
 154         # Validate artifact-path is provided
 155         if artifact_path is None or len(artifact_path) == 0:
 156             raise N2VCBadArgumentsException(
 157                 message="artifact_path is mandatory", bad_args=["artifact_path"]
 158             )
 159
 160         # Validate artifact-path exists
 161
 162         # remove / in charm path
 163         while artifact_path.find("//") >= 0:
 164             artifact_path = artifact_path.replace("//", "/")
 165
 166         # check charm path
 167         if self.fs.file_exists(artifact_path):
 168             helm_chart_path = artifact_path
 169         else:
 170             msg = "artifact path does not exist: {}".format(artifact_path)
 171             raise N2VCBadArgumentsException(message=msg, bad_args=["artifact_path"])
 172
 173         if artifact_path.startswith("/"):
 174             full_path = self.fs.path + helm_chart_path
 175         else:
 176             full_path = self.fs.path + "/" + helm_chart_path
 177
 178         try:
 179             # Call helm conn install
 180             # Obtain system cluster id from database
 181             system_cluster_uuid = self._get_system_cluster_id()
 182             # Add parameter osm if exist to global
 183             if config and config.get("osm"):
 184                 if not config.get("global"):
 185                     config["global"] = {}
 186                 config["global"]["osm"] = config.get("osm")
 187
 188             self.log.debug("install helm chart: {}".format(full_path))
 189             helm_id = await self._k8sclusterhelm.install(system_cluster_uuid, kdu_model=full_path,
 190                                                          namespace=self._KUBECTL_OSM_NAMESPACE,
 191                                                          params=config,
 192                                                          db_dict=db_dict,
 193                                                          timeout=progress_timeout)
 194
 195             ee_id = "{}.{}".format(self._KUBECTL_OSM_NAMESPACE, helm_id)
 196             return ee_id, None
 197         except Exception as e:
 198             self.log.error("Error deploying chart ee: {}".format(e), exc_info=True)
 199             raise N2VCException("Error deploying chart ee: {}".format(e))
 200
 201     async def register_execution_environment(self, namespace: str, credentials: dict, db_dict: dict,
 202                                              progress_timeout: float = None, total_timeout: float = None) -> str:
 203         # nothing to do
 204         pass
 205
 206     async def install_configuration_sw(self,
 207                                        ee_id: str,
 208                                        artifact_path: str,
 209                                        db_dict: dict,
 210                                        progress_timeout: float = None,
 211                                        total_timeout: float = None,
 212                                        config: dict = None,
 213                                        num_units: int = 1,
 214                                        vca_type: str = None
 215                                        ):
 216         # nothing to do
 217         pass
 218
 219     async def add_relation(self, ee_id_1: str, ee_id_2: str, endpoint_1: str, endpoint_2: str):
 220         # nothing to do
 221         pass
 222
 223     async def remove_relation(self):
 224         # nothing to to
 225         pass
 226
 227     async def get_status(self, namespace: str, yaml_format: bool = True):
 228         # not used for this connector
 229         pass
 230
 231     async def get_ee_ssh_public__key(self, ee_id: str, db_dict: dict, progress_timeout: float = None,
 232                                      total_timeout: float = None) -> str:
 233         """
 234         Obtains ssh-public key from ee executing GetSShKey method from the ee.
 235
 236         :param str ee_id: the id of the execution environment returned by
 237             create_execution_environment or register_execution_environment
 238         :param dict db_dict:
 239         :param float progress_timeout:
 240         :param float total_timeout:
 241         :returns: public key of the execution environment
 242         """
 243
 244         self.log.info(
 245             "get_ee_ssh_public_key: ee_id: {}, db_dict: {}".format(
 246                 ee_id, db_dict)
 247         )
 248
 249         # check arguments
 250         if ee_id is None or len(ee_id) == 0:
 251             raise N2VCBadArgumentsException(
 252                 message="ee_id is mandatory", bad_args=["ee_id"]
 253             )
 254
 255         try:
 256             # Obtain ip_addr for the ee service, it is resolved by dns from the ee name by kubernetes
 257             namespace, helm_id = self._get_ee_id_parts(ee_id)
 258             ip_addr = socket.gethostbyname(helm_id)
 259
 260             # Obtain ssh_key from the ee, this method will implement retries to allow the ee
 261             # install libraries and start successfully
 262             ssh_key = await self._get_ssh_key(ip_addr)
 263             return ssh_key
 264         except Exception as e:
 265             self.log.error("Error obtaining ee ssh_key: {}".format(e), exc_info=True)
 266             raise N2VCException("Error obtaining ee ssh_ke: {}".format(e))
 267
 268     async def exec_primitive(self, ee_id: str, primitive_name: str, params_dict: dict, db_dict: dict = None,
 269                              progress_timeout: float = None, total_timeout: float = None) -> str:
 270         """
 271         Execute a primitive in the execution environment
 272
 273         :param str ee_id: the one returned by create_execution_environment or
 274             register_execution_environment with the format namespace.helm_id
 275         :param str primitive_name: must be one defined in the software. There is one
 276             called 'config', where, for the proxy case, the 'credentials' of VM are
 277             provided
 278         :param dict params_dict: parameters of the action
 279         :param dict db_dict: where to write into database when the status changes.
 280                         It contains a dict with
 281                             {collection: <str>, filter: {},  path: <str>},
 282                             e.g. {collection: "nslcmops", filter:
 283                                 {_id: <nslcmop_id>, path: "_admin.VCA"}
 284                         It will be used to store information about intermediate notifications
 285         :param float progress_timeout:
 286         :param float total_timeout:
 287         :returns str: primitive result, if ok. It raises exceptions in case of fail
 288         """
 289
 290         self.log.info("exec primitive for ee_id : {}, primitive_name: {}, params_dict: {}, db_dict: {}".format(
 291             ee_id, primitive_name, params_dict, db_dict
 292         ))
 293
 294         # check arguments
 295         if ee_id is None or len(ee_id) == 0:
 296             raise N2VCBadArgumentsException(
 297                 message="ee_id is mandatory", bad_args=["ee_id"]
 298             )
 299         if primitive_name is None or len(primitive_name) == 0:
 300             raise N2VCBadArgumentsException(
 301                 message="action_name is mandatory", bad_args=["action_name"]
 302             )
 303         if params_dict is None:
 304             params_dict = dict()
 305
 306         try:
 307             namespace, helm_id = self._get_ee_id_parts(ee_id)
 308             ip_addr = socket.gethostbyname(helm_id)
 309         except Exception as e:
 310             self.log.error("Error getting ee ip ee: {}".format(e))
 311             raise N2VCException("Error getting ee ip ee: {}".format(e))
 312
 313         if primitive_name == "config":
 314             try:
 315                 # Execute config primitive, higher timeout to check the case ee is starting
 316                 status, detailed_message = await self._execute_config_primitive(ip_addr, params_dict, db_dict=db_dict)
 317                 self.log.debug("Executed config primitive ee_id_ {}, status: {}, message: {}".format(
 318                     ee_id, status, detailed_message))
 319                 if status != "OK":
 320                     self.log.error("Error configuring helm ee, status: {}, message: {}".format(
 321                         status, detailed_message))
 322                     raise N2VCExecutionException(
 323                         message="Error configuring helm ee_id: {}, status: {}, message: {}: ".format(
 324                             ee_id, status, detailed_message
 325                         ),
 326                         primitive_name=primitive_name,
 327                     )
 328             except Exception as e:
 329                 self.log.error("Error configuring helm ee: {}".format(e))
 330                 raise N2VCExecutionException(
 331                     message="Error configuring helm ee_id: {}, {}".format(
 332                         ee_id, e
 333                     ),
 334                     primitive_name=primitive_name,
 335                 )
 336             return "CONFIG OK"
 337         else:
 338             try:
 339                 # Execute primitive
 340                 status, detailed_message = await self._execute_primitive(ip_addr, primitive_name,
 341                                                                          params_dict, db_dict=db_dict)
 342                 self.log.debug("Executed primitive {} ee_id_ {}, status: {}, message: {}".format(
 343                     primitive_name, ee_id, status, detailed_message))
 344                 if status != "OK" and status != "PROCESSING":
 345                     self.log.error(
 346                         "Execute primitive {} returned not ok status: {}, message: {}".format(
 347                             primitive_name, status, detailed_message)
 348                     )
 349                     raise N2VCExecutionException(
 350                         message="Execute primitive {} returned not ok status: {}, message: {}".format(
 351                             primitive_name, status, detailed_message
 352                         ),
 353                         primitive_name=primitive_name,
 354                     )
 355             except Exception as e:
 356                 self.log.error(
 357                     "Error executing primitive {}: {}".format(primitive_name, e)
 358                 )
 359                 raise N2VCExecutionException(
 360                     message="Error executing primitive {} into ee={} : {}".format(
 361                         primitive_name, ee_id, e
 362                     ),
 363                     primitive_name=primitive_name,
 364                 )
 365             return detailed_message
 366
 367     async def deregister_execution_environments(self):
 368         # nothing to be done
 369         pass
 370
 371     async def delete_execution_environment(self, ee_id: str, db_dict: dict = None, total_timeout: float = None):
 372         """
 373         Delete an execution environment
 374         :param str ee_id: id of the execution environment to delete, included namespace.helm_id
 375         :param dict db_dict: where to write into database when the status changes.
 376                         It contains a dict with
 377                             {collection: <str>, filter: {},  path: <str>},
 378                             e.g. {collection: "nsrs", filter:
 379                                 {_id: <nsd-id>, path: "_admin.deployed.VCA.3"}
 380         :param float total_timeout:
 381         """
 382
 383         self.log.info("ee_id: {}".format(ee_id))
 384
 385         # check arguments
 386         if ee_id is None:
 387             raise N2VCBadArgumentsException(
 388                 message="ee_id is mandatory", bad_args=["ee_id"]
 389             )
 390
 391         try:
 392
 393             # Obtain cluster_uuid
 394             system_cluster_uuid = self._get_system_cluster_id()
 395
 396             # Get helm_id
 397             namespace, helm_id = self._get_ee_id_parts(ee_id)
 398
 399             # Uninstall chart
 400             await self._k8sclusterhelm.uninstall(system_cluster_uuid, helm_id)
 401             self.log.info("ee_id: {} deleted".format(ee_id))
 402         except Exception as e:
 403             self.log.error("Error deleting ee id: {}: {}".format(ee_id, e), exc_info=True)
 404             raise N2VCException("Error deleting ee id {}: {}".format(ee_id, e))
 405
 406     async def delete_namespace(self, namespace: str, db_dict: dict = None, total_timeout: float = None):
 407         # method not implemented for this connector, execution environments must be deleted individually
 408         pass
 409
 410     async def install_k8s_proxy_charm(
 411         self,
 412         charm_name: str,
 413         namespace: str,
 414         artifact_path: str,
 415         db_dict: dict,
 416         progress_timeout: float = None,
 417         total_timeout: float = None,
 418         config: dict = None,
 419     ) -> str:
 420         pass
 421
 422     @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
 423     async def _get_ssh_key(self, ip_addr):
 424         channel = Channel(ip_addr, self._ee_service_port)
 425         try:
 426             stub = FrontendExecutorStub(channel)
 427             self.log.debug("get ssh key, ip_addr: {}".format(ip_addr))
 428             reply: SshKeyReply = await stub.GetSshKey(SshKeyRequest())
 429             return reply.message
 430         finally:
 431             channel.close()
 432
 433     @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
 434     async def _execute_config_primitive(self, ip_addr, params, db_dict=None):
 435         return await self._execute_primitive_internal(ip_addr, "config", params, db_dict=db_dict)
 436
 437     @retryer(max_wait_time=_MAX_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
 438     async def _execute_primitive(self, ip_addr, primitive_name, params, db_dict=None):
 439         return await  self._execute_primitive_internal(ip_addr, primitive_name, params, db_dict=db_dict)
 440
 441     async def _execute_primitive_internal(self, ip_addr, primitive_name, params, db_dict=None):
 442
 443         channel = Channel(ip_addr, self._ee_service_port)
 444         try:
 445             stub = FrontendExecutorStub(channel)
 446             async with stub.RunPrimitive.open() as stream:
 447                 primitive_id = str(uuid.uuid1())
 448                 result = None
 449                 self.log.debug("Execute primitive internal: id:{}, name:{}, params: {}".
 450                                format(primitive_id, primitive_name, params))
 451                 await stream.send_message(
 452                     PrimitiveRequest(id=primitive_id, name=primitive_name, params=yaml.dump(params)), end=True)
 453                 async for reply in stream:
 454                     self.log.debug("Received reply: {}".format(reply))
 455                     result = reply
 456                     # If db_dict provided write notifs in database
 457                     if db_dict:
 458                         self._write_op_detailed_status(db_dict, reply.status, reply.detailed_message)
 459                 if result:
 460                     return reply.status, reply.detailed_message
 461                 else:
 462                     return "ERROR", "No result received"
 463         finally:
 464             channel.close()
 465
 466     def _write_op_detailed_status(self, db_dict, status, detailed_message):
 467
 468         # write ee_id to database: _admin.deployed.VCA.x
 469         try:
 470             the_table = db_dict["collection"]
 471             the_filter = db_dict["filter"]
 472             update_dict = {"detailed-status": "{}: {}".format(status, detailed_message)}
 473             # self.log.debug('Writing ee_id to database: {}'.format(the_path))
 474             self.db.set_one(
 475                 table=the_table,
 476                 q_filter=the_filter,
 477                 update_dict=update_dict,
 478                 fail_on_empty=True,
 479             )
 480         except asyncio.CancelledError:
 481             raise
 482         except Exception as e:
 483             self.log.error("Error writing detailedStatus to database: {}".format(e))
 484
 485     def _get_system_cluster_id(self):
 486         if not self._system_cluster_id:
 487             db_k8cluster = self.db.get_one("k8sclusters", {"name": self._KUBECTL_OSM_CLUSTER_NAME})
 488             k8s_hc_id = deep_get(db_k8cluster, ("_admin", "helm-chart", "id"))
 489             self._system_cluster_id = k8s_hc_id
 490         return self._system_cluster_id
 491
 492     def _get_ee_id_parts(self, ee_id):
 493         namespace, _, helm_id = ee_id.partition('.')
 494         return namespace, helm_id