osm_lcm/lcm_helm_conn.py

   1 ##
   2 # Copyright 2020 Telefonica Investigacion y Desarrollo, S.A.U.
   3 #
   4 # Licensed under the Apache License, Version 2.0 (the "License");
   5 # you may not use this file except in compliance with the License.
   6 # You may obtain a copy of the License at
   7 #
   8 #    http://www.apache.org/licenses/LICENSE-2.0
   9 #
  10 # Unless required by applicable law or agreed to in writing, software
  11 # distributed under the License is distributed on an "AS IS" BASIS,
  12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
  13 # implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16 #
  17 ##
  18 import functools
  19 import yaml
  20 import asyncio
  21 import socket
  22 import uuid
  23
  24 from grpclib.client import Channel
  25
  26 from osm_lcm.frontend_pb2 import PrimitiveRequest
  27 from osm_lcm.frontend_pb2 import SshKeyRequest, SshKeyReply
  28 from osm_lcm.frontend_grpc import FrontendExecutorStub
  29
  30 from n2vc.n2vc_conn import N2VCConnector
  31 from n2vc.k8s_helm_conn import K8sHelmConnector
  32 from n2vc.exceptions import N2VCBadArgumentsException, N2VCException, N2VCExecutionException
  33
  34 from osm_lcm.lcm_utils import deep_get
  35
  36
  37 def retryer(max_wait_time=60, delay_time=10):
  38     def wrapper(func):
  39         retry_exceptions = (
  40             ConnectionRefusedError
  41         )
  42
  43         @functools.wraps(func)
  44         async def wrapped(*args, **kwargs):
  45             wait_time = max_wait_time
  46             while wait_time > 0:
  47                 try:
  48                     return await func(*args, **kwargs)
  49                 except retry_exceptions:
  50                     wait_time = wait_time - delay_time
  51                     await asyncio.sleep(delay_time)
  52                     continue
  53             else:
  54                 return ConnectionRefusedError
  55         return wrapped
  56     return wrapper
  57
  58
  59 class LCMHelmConn(N2VCConnector):
  60     _KUBECTL_OSM_NAMESPACE = "osm"
  61     _KUBECTL_OSM_CLUSTER_NAME = "_system-osm-k8s"
  62     _EE_SERVICE_PORT = 50050
  63
  64     # Time beetween retries
  65     _EE_RETRY_DELAY = 10
  66     # Initial max retry time
  67     _MAX_INITIAL_RETRY_TIME = 300
  68     # Other retry time
  69     _MAX_RETRY_TIME = 30
  70
  71     def __init__(self,
  72                  db: object,
  73                  fs: object,
  74                  log: object = None,
  75                  loop: object = None,
  76                  url: str = None,
  77                  username: str = None,
  78                  vca_config: dict = None,
  79                  on_update_db=None, ):
  80         """
  81         Initialize EE helm connector.
  82         """
  83
  84         # parent class constructor
  85         N2VCConnector.__init__(
  86             self,
  87             db=db,
  88             fs=fs,
  89             log=log,
  90             loop=loop,
  91             url=url,
  92             username=username,
  93             vca_config=vca_config,
  94             on_update_db=on_update_db,
  95         )
  96
  97         self.log.debug("Initialize helm N2VC connector")
  98
  99         # TODO - Obtain data from configuration
 100         self._ee_service_port = self._EE_SERVICE_PORT
 101
 102         self._retry_delay = self._EE_RETRY_DELAY
 103         self._max_retry_time = self._MAX_RETRY_TIME
 104         self._initial_retry_time = self._MAX_INITIAL_RETRY_TIME
 105
 106         # initialize helm connector
 107         self._k8sclusterhelm = K8sHelmConnector(
 108             kubectl_command=self.vca_config.get("kubectlpath"),
 109             helm_command=self.vca_config.get("helmpath"),
 110             fs=self.fs,
 111             log=self.log,
 112             db=self.db,
 113             on_update_db=None,
 114         )
 115
 116         self._system_cluster_id = None
 117         self.log.info("Helm N2VC connector initialized")
 118
 119     # TODO - ¿reuse_ee_id?
 120     async def create_execution_environment(self,
 121                                            namespace: str,
 122                                            db_dict: dict,
 123                                            reuse_ee_id: str = None,
 124                                            progress_timeout: float = None,
 125                                            total_timeout: float = None,
 126                                            config: dict = None,
 127                                            artifact_path: str = None,
 128                                            vca_type: str = None) -> (str, dict):
 129         """
 130         Creates a new helm execution environment deploying the helm-chat indicated in the
 131         attifact_path
 132         :param str namespace: This param is not used, all helm charts are deployed in the osm
 133         system namespace
 134         :param dict db_dict: where to write to database when the status changes.
 135             It contains a dictionary with {collection: str, filter: {},  path: str},
 136                 e.g. {collection: "nsrs", filter: {_id: <nsd-id>, path:
 137                 "_admin.deployed.VCA.3"}
 138         :param str reuse_ee_id: ee id from an older execution. TODO - right now this params is not used
 139         :param float progress_timeout:
 140         :param float total_timeout:
 141         :param dict config:  General variables to instantiate KDU
 142         :param str artifact_path:  path of package content
 143         :param str vca_type:  Type of vca, not used as assumed of type helm
 144         :returns str, dict: id of the new execution environment including namespace.helm_id
 145         and credentials object set to None as all credentials should be osm kubernetes .kubeconfig
 146         """
 147
 148         self.log.info(
 149             "create_execution_environment: namespace: {}, artifact_path: {}, db_dict: {}, "
 150             "reuse_ee_id: {}".format(
 151                 namespace, artifact_path, db_dict, reuse_ee_id)
 152         )
 153
 154         # Validate artifact-path is provided
 155         if artifact_path is None or len(artifact_path) == 0:
 156             raise N2VCBadArgumentsException(
 157                 message="artifact_path is mandatory", bad_args=["artifact_path"]
 158             )
 159
 160         # Validate artifact-path exists
 161
 162         # remove / in charm path
 163         while artifact_path.find("//") >= 0:
 164             artifact_path = artifact_path.replace("//", "/")
 165
 166         # check charm path
 167         if self.fs.file_exists(artifact_path):
 168             helm_chart_path = artifact_path
 169         else:
 170             msg = "artifact path does not exist: {}".format(artifact_path)
 171             raise N2VCBadArgumentsException(message=msg, bad_args=["artifact_path"])
 172
 173         if artifact_path.startswith("/"):
 174             full_path = self.fs.path + helm_chart_path
 175         else:
 176             full_path = self.fs.path + "/" + helm_chart_path
 177
 178         try:
 179             # Call helm conn install
 180             # Obtain system cluster id from database
 181             system_cluster_uuid = self._get_system_cluster_id()
 182             # Add parameter osm if exist to global
 183             if config and config.get("osm"):
 184                 if not config.get("global"):
 185                     config["global"] = {}
 186                 config["global"]["osm"] = config.get("osm")
 187
 188             self.log.debug("install helm chart: {}".format(full_path))
 189             helm_id = await self._k8sclusterhelm.install(system_cluster_uuid, kdu_model=full_path,
 190                                                          namespace=self._KUBECTL_OSM_NAMESPACE,
 191                                                          params=config,
 192                                                          db_dict=db_dict,
 193                                                          timeout=progress_timeout)
 194
 195             ee_id = "{}.{}".format(self._KUBECTL_OSM_NAMESPACE, helm_id)
 196             return ee_id, None
 197         except N2VCException:
 198             raise
 199         except Exception as e:
 200             self.log.error("Error deploying chart ee: {}".format(e), exc_info=True)
 201             raise N2VCException("Error deploying chart ee: {}".format(e))
 202
 203     async def register_execution_environment(self, namespace: str, credentials: dict, db_dict: dict,
 204                                              progress_timeout: float = None, total_timeout: float = None) -> str:
 205         # nothing to do
 206         pass
 207
 208     async def install_configuration_sw(self,
 209                                        ee_id: str,
 210                                        artifact_path: str,
 211                                        db_dict: dict,
 212                                        progress_timeout: float = None,
 213                                        total_timeout: float = None,
 214                                        config: dict = None,
 215                                        num_units: int = 1,
 216                                        vca_type: str = None
 217                                        ):
 218         # nothing to do
 219         pass
 220
 221     async def add_relation(self, ee_id_1: str, ee_id_2: str, endpoint_1: str, endpoint_2: str):
 222         # nothing to do
 223         pass
 224
 225     async def remove_relation(self):
 226         # nothing to to
 227         pass
 228
 229     async def get_status(self, namespace: str, yaml_format: bool = True):
 230         # not used for this connector
 231         pass
 232
 233     async def get_ee_ssh_public__key(self, ee_id: str, db_dict: dict, progress_timeout: float = None,
 234                                      total_timeout: float = None) -> str:
 235         """
 236         Obtains ssh-public key from ee executing GetSShKey method from the ee.
 237
 238         :param str ee_id: the id of the execution environment returned by
 239             create_execution_environment or register_execution_environment
 240         :param dict db_dict:
 241         :param float progress_timeout:
 242         :param float total_timeout:
 243         :returns: public key of the execution environment
 244         """
 245
 246         self.log.info(
 247             "get_ee_ssh_public_key: ee_id: {}, db_dict: {}".format(
 248                 ee_id, db_dict)
 249         )
 250
 251         # check arguments
 252         if ee_id is None or len(ee_id) == 0:
 253             raise N2VCBadArgumentsException(
 254                 message="ee_id is mandatory", bad_args=["ee_id"]
 255             )
 256
 257         try:
 258             # Obtain ip_addr for the ee service, it is resolved by dns from the ee name by kubernetes
 259             namespace, helm_id = self._get_ee_id_parts(ee_id)
 260             ip_addr = socket.gethostbyname(helm_id)
 261
 262             # Obtain ssh_key from the ee, this method will implement retries to allow the ee
 263             # install libraries and start successfully
 264             ssh_key = await self._get_ssh_key(ip_addr)
 265             return ssh_key
 266         except Exception as e:
 267             self.log.error("Error obtaining ee ssh_key: {}".format(e), exc_info=True)
 268             raise N2VCException("Error obtaining ee ssh_ke: {}".format(e))
 269
 270     async def exec_primitive(self, ee_id: str, primitive_name: str, params_dict: dict, db_dict: dict = None,
 271                              progress_timeout: float = None, total_timeout: float = None) -> str:
 272         """
 273         Execute a primitive in the execution environment
 274
 275         :param str ee_id: the one returned by create_execution_environment or
 276             register_execution_environment with the format namespace.helm_id
 277         :param str primitive_name: must be one defined in the software. There is one
 278             called 'config', where, for the proxy case, the 'credentials' of VM are
 279             provided
 280         :param dict params_dict: parameters of the action
 281         :param dict db_dict: where to write into database when the status changes.
 282                         It contains a dict with
 283                             {collection: <str>, filter: {},  path: <str>},
 284                             e.g. {collection: "nslcmops", filter:
 285                                 {_id: <nslcmop_id>, path: "_admin.VCA"}
 286                         It will be used to store information about intermediate notifications
 287         :param float progress_timeout:
 288         :param float total_timeout:
 289         :returns str: primitive result, if ok. It raises exceptions in case of fail
 290         """
 291
 292         self.log.info("exec primitive for ee_id : {}, primitive_name: {}, params_dict: {}, db_dict: {}".format(
 293             ee_id, primitive_name, params_dict, db_dict
 294         ))
 295
 296         # check arguments
 297         if ee_id is None or len(ee_id) == 0:
 298             raise N2VCBadArgumentsException(
 299                 message="ee_id is mandatory", bad_args=["ee_id"]
 300             )
 301         if primitive_name is None or len(primitive_name) == 0:
 302             raise N2VCBadArgumentsException(
 303                 message="action_name is mandatory", bad_args=["action_name"]
 304             )
 305         if params_dict is None:
 306             params_dict = dict()
 307
 308         try:
 309             namespace, helm_id = self._get_ee_id_parts(ee_id)
 310             ip_addr = socket.gethostbyname(helm_id)
 311         except Exception as e:
 312             self.log.error("Error getting ee ip ee: {}".format(e))
 313             raise N2VCException("Error getting ee ip ee: {}".format(e))
 314
 315         if primitive_name == "config":
 316             try:
 317                 # Execute config primitive, higher timeout to check the case ee is starting
 318                 status, detailed_message = await self._execute_config_primitive(ip_addr, params_dict, db_dict=db_dict)
 319                 self.log.debug("Executed config primitive ee_id_ {}, status: {}, message: {}".format(
 320                     ee_id, status, detailed_message))
 321                 if status != "OK":
 322                     self.log.error("Error configuring helm ee, status: {}, message: {}".format(
 323                         status, detailed_message))
 324                     raise N2VCExecutionException(
 325                         message="Error configuring helm ee_id: {}, status: {}, message: {}: ".format(
 326                             ee_id, status, detailed_message
 327                         ),
 328                         primitive_name=primitive_name,
 329                     )
 330             except Exception as e:
 331                 self.log.error("Error configuring helm ee: {}".format(e))
 332                 raise N2VCExecutionException(
 333                     message="Error configuring helm ee_id: {}, {}".format(
 334                         ee_id, e
 335                     ),
 336                     primitive_name=primitive_name,
 337                 )
 338             return "CONFIG OK"
 339         else:
 340             try:
 341                 # Execute primitive
 342                 status, detailed_message = await self._execute_primitive(ip_addr, primitive_name,
 343                                                                          params_dict, db_dict=db_dict)
 344                 self.log.debug("Executed primitive {} ee_id_ {}, status: {}, message: {}".format(
 345                     primitive_name, ee_id, status, detailed_message))
 346                 if status != "OK" and status != "PROCESSING":
 347                     self.log.error(
 348                         "Execute primitive {} returned not ok status: {}, message: {}".format(
 349                             primitive_name, status, detailed_message)
 350                     )
 351                     raise N2VCExecutionException(
 352                         message="Execute primitive {} returned not ok status: {}, message: {}".format(
 353                             primitive_name, status, detailed_message
 354                         ),
 355                         primitive_name=primitive_name,
 356                     )
 357             except Exception as e:
 358                 self.log.error(
 359                     "Error executing primitive {}: {}".format(primitive_name, e)
 360                 )
 361                 raise N2VCExecutionException(
 362                     message="Error executing primitive {} into ee={} : {}".format(
 363                         primitive_name, ee_id, e
 364                     ),
 365                     primitive_name=primitive_name,
 366                 )
 367             return detailed_message
 368
 369     async def deregister_execution_environments(self):
 370         # nothing to be done
 371         pass
 372
 373     async def delete_execution_environment(self, ee_id: str, db_dict: dict = None, total_timeout: float = None):
 374         """
 375         Delete an execution environment
 376         :param str ee_id: id of the execution environment to delete, included namespace.helm_id
 377         :param dict db_dict: where to write into database when the status changes.
 378                         It contains a dict with
 379                             {collection: <str>, filter: {},  path: <str>},
 380                             e.g. {collection: "nsrs", filter:
 381                                 {_id: <nsd-id>, path: "_admin.deployed.VCA.3"}
 382         :param float total_timeout:
 383         """
 384
 385         self.log.info("ee_id: {}".format(ee_id))
 386
 387         # check arguments
 388         if ee_id is None:
 389             raise N2VCBadArgumentsException(
 390                 message="ee_id is mandatory", bad_args=["ee_id"]
 391             )
 392
 393         try:
 394
 395             # Obtain cluster_uuid
 396             system_cluster_uuid = self._get_system_cluster_id()
 397
 398             # Get helm_id
 399             namespace, helm_id = self._get_ee_id_parts(ee_id)
 400
 401             # Uninstall chart
 402             await self._k8sclusterhelm.uninstall(system_cluster_uuid, helm_id)
 403             self.log.info("ee_id: {} deleted".format(ee_id))
 404         except N2VCException:
 405             raise
 406         except Exception as e:
 407             self.log.error("Error deleting ee id: {}: {}".format(ee_id, e), exc_info=True)
 408             raise N2VCException("Error deleting ee id {}: {}".format(ee_id, e))
 409
 410     async def delete_namespace(self, namespace: str, db_dict: dict = None, total_timeout: float = None):
 411         # method not implemented for this connector, execution environments must be deleted individually
 412         pass
 413
 414     async def install_k8s_proxy_charm(
 415         self,
 416         charm_name: str,
 417         namespace: str,
 418         artifact_path: str,
 419         db_dict: dict,
 420         progress_timeout: float = None,
 421         total_timeout: float = None,
 422         config: dict = None,
 423     ) -> str:
 424         pass
 425
 426     @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
 427     async def _get_ssh_key(self, ip_addr):
 428         channel = Channel(ip_addr, self._ee_service_port)
 429         try:
 430             stub = FrontendExecutorStub(channel)
 431             self.log.debug("get ssh key, ip_addr: {}".format(ip_addr))
 432             reply: SshKeyReply = await stub.GetSshKey(SshKeyRequest())
 433             return reply.message
 434         finally:
 435             channel.close()
 436
 437     @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
 438     async def _execute_config_primitive(self, ip_addr, params, db_dict=None):
 439         return await self._execute_primitive_internal(ip_addr, "config", params, db_dict=db_dict)
 440
 441     @retryer(max_wait_time=_MAX_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
 442     async def _execute_primitive(self, ip_addr, primitive_name, params, db_dict=None):
 443         return await  self._execute_primitive_internal(ip_addr, primitive_name, params, db_dict=db_dict)
 444
 445     async def _execute_primitive_internal(self, ip_addr, primitive_name, params, db_dict=None):
 446
 447         channel = Channel(ip_addr, self._ee_service_port)
 448         try:
 449             stub = FrontendExecutorStub(channel)
 450             async with stub.RunPrimitive.open() as stream:
 451                 primitive_id = str(uuid.uuid1())
 452                 result = None
 453                 self.log.debug("Execute primitive internal: id:{}, name:{}, params: {}".
 454                                format(primitive_id, primitive_name, params))
 455                 await stream.send_message(
 456                     PrimitiveRequest(id=primitive_id, name=primitive_name, params=yaml.dump(params)), end=True)
 457                 async for reply in stream:
 458                     self.log.debug("Received reply: {}".format(reply))
 459                     result = reply
 460                     # If db_dict provided write notifs in database
 461                     if db_dict:
 462                         self._write_op_detailed_status(db_dict, reply.status, reply.detailed_message)
 463                 if result:
 464                     return reply.status, reply.detailed_message
 465                 else:
 466                     return "ERROR", "No result received"
 467         finally:
 468             channel.close()
 469
 470     def _write_op_detailed_status(self, db_dict, status, detailed_message):
 471
 472         # write ee_id to database: _admin.deployed.VCA.x
 473         try:
 474             the_table = db_dict["collection"]
 475             the_filter = db_dict["filter"]
 476             update_dict = {"detailed-status": "{}: {}".format(status, detailed_message)}
 477             # self.log.debug('Writing ee_id to database: {}'.format(the_path))
 478             self.db.set_one(
 479                 table=the_table,
 480                 q_filter=the_filter,
 481                 update_dict=update_dict,
 482                 fail_on_empty=True,
 483             )
 484         except asyncio.CancelledError:
 485             raise
 486         except Exception as e:
 487             self.log.error("Error writing detailedStatus to database: {}".format(e))
 488
 489     def _get_system_cluster_id(self):
 490         if not self._system_cluster_id:
 491             db_k8cluster = self.db.get_one("k8sclusters", {"name": self._KUBECTL_OSM_CLUSTER_NAME})
 492             k8s_hc_id = deep_get(db_k8cluster, ("_admin", "helm-chart", "id"))
 493             if not k8s_hc_id:
 494                 self.log.error("osm system cluster has not been properly initialized for helm connector, "
 495                                "helm-chart id is not defined")
 496                 raise N2VCException("osm system cluster has not been properly initialized for helm connector")
 497             self._system_cluster_id = k8s_hc_id
 498         return self._system_cluster_id
 499
 500     def _get_ee_id_parts(self, ee_id):
 501         namespace, _, helm_id = ee_id.partition('.')
 502         return namespace, helm_id