Fix Bug 2028 Multi-site VNF creation fails
[osm/LCM.git] / osm_lcm / lcm_helm_conn.py
index 904b7d9..c8ba44d 100644 (file)
@@ -29,22 +29,38 @@ from osm_lcm.frontend_pb2 import SshKeyRequest, SshKeyReply
 from osm_lcm.frontend_grpc import FrontendExecutorStub
 from osm_lcm.lcm_utils import LcmBase
 
+from osm_lcm.data_utils.database.database import Database
+from osm_lcm.data_utils.filesystem.filesystem import Filesystem
+
 from n2vc.n2vc_conn import N2VCConnector
 from n2vc.k8s_helm_conn import K8sHelmConnector
 from n2vc.k8s_helm3_conn import K8sHelm3Connector
-from n2vc.exceptions import N2VCBadArgumentsException, N2VCException, N2VCExecutionException
+from n2vc.exceptions import (
+    N2VCBadArgumentsException,
+    N2VCException,
+    N2VCExecutionException,
+)
 
 from osm_lcm.lcm_utils import deep_get
 
 
-def retryer(max_wait_time=60, delay_time=10):
+def retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_delay"):
     def wrapper(func):
-        retry_exceptions = (
-            ConnectionRefusedError
-        )
+        retry_exceptions = ConnectionRefusedError
 
         @functools.wraps(func)
         async def wrapped(*args, **kwargs):
+            # default values for wait time and delay_time
+            delay_time = 10
+            max_wait_time = 300
+
+            # obtain arguments from variable names
+            self = args[0]
+            if self.__dict__.get(max_wait_time_var):
+                max_wait_time = self.__dict__.get(max_wait_time_var)
+            if self.__dict__.get(delay_time_var):
+                delay_time = self.__dict__.get(delay_time_var)
+
             wait_time = max_wait_time
             while wait_time > 0:
                 try:
@@ -55,7 +71,9 @@ def retryer(max_wait_time=60, delay_time=10):
                     continue
             else:
                 return ConnectionRefusedError
+
         return wrapped
+
     return wrapper
 
 
@@ -64,55 +82,66 @@ class LCMHelmConn(N2VCConnector, LcmBase):
     _KUBECTL_OSM_CLUSTER_NAME = "_system-osm-k8s"
     _EE_SERVICE_PORT = 50050
 
-    # Time beetween retries
-    _EE_RETRY_DELAY = 10
     # Initial max retry time
-    _MAX_INITIAL_RETRY_TIME = 300
-    # Other retry time
+    _MAX_INITIAL_RETRY_TIME = 600
+    # Max retry time for normal operations
     _MAX_RETRY_TIME = 30
+    # Time beetween retries, retry time after a connection error is raised
+    _EE_RETRY_DELAY = 10
 
-    def __init__(self,
-                 db: object,
-                 fs: object,
-                 log: object = None,
-                 loop: object = None,
-                 url: str = None,
-                 username: str = None,
-                 vca_config: dict = None,
-                 on_update_db=None, ):
+    def __init__(
+        self,
+        log: object = None,
+        loop: object = None,
+        vca_config: dict = None,
+        on_update_db=None,
+    ):
         """
         Initialize EE helm connector.
         """
 
+        self.db = Database().instance.db
+        self.fs = Filesystem().instance.fs
+
         # parent class constructor
         N2VCConnector.__init__(
-            self,
-            db=db,
-            fs=fs,
-            log=log,
-            loop=loop,
-            url=url,
-            username=username,
-            vca_config=vca_config,
-            on_update_db=on_update_db,
+            self, log=log, loop=loop, on_update_db=on_update_db, db=self.db, fs=self.fs
         )
 
+        self.vca_config = vca_config
         self.log.debug("Initialize helm N2VC connector")
+        self.log.debug("initial vca_config: {}".format(vca_config))
 
         # TODO - Obtain data from configuration
         self._ee_service_port = self._EE_SERVICE_PORT
 
         self._retry_delay = self._EE_RETRY_DELAY
-        self._max_retry_time = self._MAX_RETRY_TIME
-        self._initial_retry_time = self._MAX_INITIAL_RETRY_TIME
+
+        if self.vca_config and self.vca_config.get("eegrpcinittimeout"):
+            self._initial_retry_time = self.vca_config.get("eegrpcinittimeout")
+            self.log.debug("Initial retry time: {}".format(self._initial_retry_time))
+        else:
+            self._initial_retry_time = self._MAX_INITIAL_RETRY_TIME
+            self.log.debug(
+                "Applied default retry time: {}".format(self._initial_retry_time)
+            )
+
+        if self.vca_config and self.vca_config.get("eegrpctimeout"):
+            self._max_retry_time = self.vca_config.get("eegrpctimeout")
+            self.log.debug("Retry time: {}".format(self._max_retry_time))
+        else:
+            self._max_retry_time = self._MAX_RETRY_TIME
+            self.log.debug(
+                "Applied default retry time: {}".format(self._max_retry_time)
+            )
 
         # initialize helm connector for helmv2 and helmv3
         self._k8sclusterhelm2 = K8sHelmConnector(
             kubectl_command=self.vca_config.get("kubectlpath"),
             helm_command=self.vca_config.get("helmpath"),
             fs=self.fs,
-            log=self.log,
             db=self.db,
+            log=self.log,
             on_update_db=None,
         )
 
@@ -129,16 +158,19 @@ class LCMHelmConn(N2VCConnector, LcmBase):
         self.log.info("Helm N2VC connector initialized")
 
     # TODO - ¿reuse_ee_id?
-    async def create_execution_environment(self,
-                                           namespace: str,
-                                           db_dict: dict,
-                                           reuse_ee_id: str = None,
-                                           progress_timeout: float = None,
-                                           total_timeout: float = None,
-                                           config: dict = None,
-                                           artifact_path: str = None,
-                                           vca_type: str = None,
-                                           *kargs, **kwargs) -> (str, dict):
+    async def create_execution_environment(
+        self,
+        namespace: str,
+        db_dict: dict,
+        reuse_ee_id: str = None,
+        progress_timeout: float = None,
+        total_timeout: float = None,
+        config: dict = None,
+        artifact_path: str = None,
+        vca_type: str = None,
+        *kargs,
+        **kwargs,
+    ) -> (str, dict):
         """
         Creates a new helm execution environment deploying the helm-chat indicated in the
         attifact_path
@@ -160,8 +192,7 @@ class LCMHelmConn(N2VCConnector, LcmBase):
 
         self.log.info(
             "create_execution_environment: namespace: {}, artifact_path: {}, db_dict: {}, "
-            "reuse_ee_id: {}".format(
-                namespace, artifact_path, db_dict, reuse_ee_id)
+            "reuse_ee_id: {}".format(namespace, artifact_path, db_dict, reuse_ee_id)
         )
 
         # Validate artifact-path is provided
@@ -190,6 +221,9 @@ class LCMHelmConn(N2VCConnector, LcmBase):
         else:
             full_path = self.fs.path + "/" + helm_chart_path
 
+        while full_path.find("//") >= 0:
+            full_path = full_path.replace("//", "/")
+
         try:
             # Call helm conn install
             # Obtain system cluster id from database
@@ -202,17 +236,33 @@ class LCMHelmConn(N2VCConnector, LcmBase):
 
             self.log.debug("install helm chart: {}".format(full_path))
             if vca_type == "helm":
-                helm_id = await self._k8sclusterhelm2.install(system_cluster_uuid, kdu_model=full_path,
-                                                              namespace=self._KUBECTL_OSM_NAMESPACE,
-                                                              params=config,
-                                                              db_dict=db_dict,
-                                                              timeout=progress_timeout)
+                helm_id = self._k8sclusterhelm2.generate_kdu_instance_name(
+                    db_dict=db_dict,
+                    kdu_model=full_path,
+                )
+                await self._k8sclusterhelm2.install(
+                    system_cluster_uuid,
+                    kdu_model=full_path,
+                    kdu_instance=helm_id,
+                    namespace=self._KUBECTL_OSM_NAMESPACE,
+                    params=config,
+                    db_dict=db_dict,
+                    timeout=progress_timeout,
+                )
             else:
-                helm_id = await self._k8sclusterhelm3.install(system_cluster_uuid, kdu_model=full_path,
-                                                              namespace=self._KUBECTL_OSM_NAMESPACE,
-                                                              params=config,
-                                                              db_dict=db_dict,
-                                                              timeout=progress_timeout)
+                helm_id = self._k8sclusterhelm2.generate_kdu_instance_name(
+                    db_dict=db_dict,
+                    kdu_model=full_path,
+                )
+                await self._k8sclusterhelm3.install(
+                    system_cluster_uuid,
+                    kdu_model=full_path,
+                    kdu_instance=helm_id,
+                    namespace=self._KUBECTL_OSM_NAMESPACE,
+                    params=config,
+                    db_dict=db_dict,
+                    timeout=progress_timeout,
+                )
 
             ee_id = "{}:{}.{}".format(vca_type, self._KUBECTL_OSM_NAMESPACE, helm_id)
             return ee_id, None
@@ -222,26 +272,24 @@ class LCMHelmConn(N2VCConnector, LcmBase):
             self.log.error("Error deploying chart ee: {}".format(e), exc_info=True)
             raise N2VCException("Error deploying chart ee: {}".format(e))
 
-    async def register_execution_environment(self, namespace: str, credentials: dict, db_dict: dict,
-                                             progress_timeout: float = None, total_timeout: float = None,
-                                             *kargs, **kwargs) -> str:
+    async def register_execution_environment(
+        self,
+        namespace: str,
+        credentials: dict,
+        db_dict: dict,
+        progress_timeout: float = None,
+        total_timeout: float = None,
+        *kargs,
+        **kwargs,
+    ) -> str:
         # nothing to do
         pass
 
-    async def install_configuration_sw(self,
-                                       ee_id: str,
-                                       artifact_path: str,
-                                       db_dict: dict,
-                                       progress_timeout: float = None,
-                                       total_timeout: float = None,
-                                       config: dict = None,
-                                       num_units: int = 1,
-                                       vca_type: str = None
-                                       ):
+    async def install_configuration_sw(self, *args, **kwargs):
         # nothing to do
         pass
 
-    async def add_relation(self, ee_id_1: str, ee_id_2: str, endpoint_1: str, endpoint_2: str):
+    async def add_relation(self, *args, **kwargs):
         # nothing to do
         pass
 
@@ -249,12 +297,22 @@ class LCMHelmConn(N2VCConnector, LcmBase):
         # nothing to to
         pass
 
-    async def get_status(self, namespace: str, yaml_format: bool = True):
+    async def get_status(self, *args, **kwargs):
+        # not used for this connector
+        pass
+
+    async def upgrade_charm(self, *args, **kwargs):
         # not used for this connector
         pass
 
-    async def get_ee_ssh_public__key(self, ee_id: str, db_dict: dict, progress_timeout: float = None,
-                                     total_timeout: float = None) -> str:
+    async def get_ee_ssh_public__key(
+        self,
+        ee_id: str,
+        db_dict: dict,
+        progress_timeout: float = None,
+        total_timeout: float = None,
+        **kwargs,
+    ) -> str:
         """
         Obtains ssh-public key from ee executing GetSShKey method from the ee.
 
@@ -267,8 +325,7 @@ class LCMHelmConn(N2VCConnector, LcmBase):
         """
 
         self.log.info(
-            "get_ee_ssh_public_key: ee_id: {}, db_dict: {}".format(
-                ee_id, db_dict)
+            "get_ee_ssh_public_key: ee_id: {}, db_dict: {}".format(ee_id, db_dict)
         )
 
         # check arguments
@@ -290,8 +347,41 @@ class LCMHelmConn(N2VCConnector, LcmBase):
             self.log.error("Error obtaining ee ssh_key: {}".format(e), exc_info=True)
             raise N2VCException("Error obtaining ee ssh_ke: {}".format(e))
 
-    async def exec_primitive(self, ee_id: str, primitive_name: str, params_dict: dict, db_dict: dict = None,
-                             progress_timeout: float = None, total_timeout: float = None) -> str:
+    async def upgrade_charm(
+        self,
+        ee_id: str = None,
+        path: str = None,
+        charm_id: str = None,
+        charm_type: str = None,
+        timeout: float = None,
+    ) -> str:
+        """This method upgrade charms in VNFs
+
+        This method does not support KDU's deployed with Helm.
+
+        Args:
+            ee_id:  Execution environment id
+            path:   Local path to the charm
+            charm_id:   charm-id
+            charm_type: Charm type can be lxc-proxy-charm, native-charm or k8s-proxy-charm
+            timeout: (Float)    Timeout for the ns update operation
+
+        Returns:
+            the output of the update operation if status equals to "completed"
+
+        """
+        raise N2VCException("KDUs deployed with Helm do not support charm upgrade")
+
+    async def exec_primitive(
+        self,
+        ee_id: str,
+        primitive_name: str,
+        params_dict: dict,
+        db_dict: dict = None,
+        progress_timeout: float = None,
+        total_timeout: float = None,
+        **kwargs,
+    ) -> str:
         """
         Execute a primitive in the execution environment
 
@@ -312,9 +402,11 @@ class LCMHelmConn(N2VCConnector, LcmBase):
         :returns str: primitive result, if ok. It raises exceptions in case of fail
         """
 
-        self.log.info("exec primitive for ee_id : {}, primitive_name: {}, params_dict: {}, db_dict: {}".format(
-            ee_id, primitive_name, params_dict, db_dict
-        ))
+        self.log.info(
+            "exec primitive for ee_id : {}, primitive_name: {}, params_dict: {}, db_dict: {}".format(
+                ee_id, primitive_name, params_dict, db_dict
+            )
+        )
 
         # check arguments
         if ee_id is None or len(ee_id) == 0:
@@ -338,12 +430,20 @@ class LCMHelmConn(N2VCConnector, LcmBase):
         if primitive_name == "config":
             try:
                 # Execute config primitive, higher timeout to check the case ee is starting
-                status, detailed_message = await self._execute_config_primitive(ip_addr, params_dict, db_dict=db_dict)
-                self.log.debug("Executed config primitive ee_id_ {}, status: {}, message: {}".format(
-                    ee_id, status, detailed_message))
+                status, detailed_message = await self._execute_config_primitive(
+                    ip_addr, params_dict, db_dict=db_dict
+                )
+                self.log.debug(
+                    "Executed config primitive ee_id_ {}, status: {}, message: {}".format(
+                        ee_id, status, detailed_message
+                    )
+                )
                 if status != "OK":
-                    self.log.error("Error configuring helm ee, status: {}, message: {}".format(
-                        status, detailed_message))
+                    self.log.error(
+                        "Error configuring helm ee, status: {}, message: {}".format(
+                            status, detailed_message
+                        )
+                    )
                     raise N2VCExecutionException(
                         message="Error configuring helm ee_id: {}, status: {}, message: {}: ".format(
                             ee_id, status, detailed_message
@@ -353,23 +453,26 @@ class LCMHelmConn(N2VCConnector, LcmBase):
             except Exception as e:
                 self.log.error("Error configuring helm ee: {}".format(e))
                 raise N2VCExecutionException(
-                    message="Error configuring helm ee_id: {}, {}".format(
-                        ee_id, e
-                    ),
+                    message="Error configuring helm ee_id: {}, {}".format(ee_id, e),
                     primitive_name=primitive_name,
                 )
             return "CONFIG OK"
         else:
             try:
                 # Execute primitive
-                status, detailed_message = await self._execute_primitive(ip_addr, primitive_name,
-                                                                         params_dict, db_dict=db_dict)
-                self.log.debug("Executed primitive {} ee_id_ {}, status: {}, message: {}".format(
-                    primitive_name, ee_id, status, detailed_message))
+                status, detailed_message = await self._execute_primitive(
+                    ip_addr, primitive_name, params_dict, db_dict=db_dict
+                )
+                self.log.debug(
+                    "Executed primitive {} ee_id_ {}, status: {}, message: {}".format(
+                        primitive_name, ee_id, status, detailed_message
+                    )
+                )
                 if status != "OK" and status != "PROCESSING":
                     self.log.error(
                         "Execute primitive {} returned not ok status: {}, message: {}".format(
-                            primitive_name, status, detailed_message)
+                            primitive_name, status, detailed_message
+                        )
                     )
                     raise N2VCExecutionException(
                         message="Execute primitive {} returned not ok status: {}, message: {}".format(
@@ -393,7 +496,13 @@ class LCMHelmConn(N2VCConnector, LcmBase):
         # nothing to be done
         pass
 
-    async def delete_execution_environment(self, ee_id: str, db_dict: dict = None, total_timeout: float = None):
+    async def delete_execution_environment(
+        self,
+        ee_id: str,
+        db_dict: dict = None,
+        total_timeout: float = None,
+        **kwargs,
+    ):
         """
         Delete an execution environment
         :param str ee_id: id of the execution environment to delete, included namespace.helm_id
@@ -424,17 +533,21 @@ class LCMHelmConn(N2VCConnector, LcmBase):
             # Uninstall chart, for backward compatibility we must assume that if there is no
             # version it is helm-v2
             if version == "helm-v3":
-                await  self._k8sclusterhelm3.uninstall(system_cluster_uuid, helm_id)
+                await self._k8sclusterhelm3.uninstall(system_cluster_uuid, helm_id)
             else:
                 await self._k8sclusterhelm2.uninstall(system_cluster_uuid, helm_id)
             self.log.info("ee_id: {} deleted".format(ee_id))
         except N2VCException:
             raise
         except Exception as e:
-            self.log.error("Error deleting ee id: {}: {}".format(ee_id, e), exc_info=True)
+            self.log.error(
+                "Error deleting ee id: {}: {}".format(ee_id, e), exc_info=True
+            )
             raise N2VCException("Error deleting ee id {}: {}".format(ee_id, e))
 
-    async def delete_namespace(self, namespace: str, db_dict: dict = None, total_timeout: float = None):
+    async def delete_namespace(
+        self, namespace: str, db_dict: dict = None, total_timeout: float = None
+    ):
         # method not implemented for this connector, execution environments must be deleted individually
         pass
 
@@ -447,11 +560,12 @@ class LCMHelmConn(N2VCConnector, LcmBase):
         progress_timeout: float = None,
         total_timeout: float = None,
         config: dict = None,
-        *kargs, **kwargs
+        *kargs,
+        **kwargs,
     ) -> str:
         pass
 
-    @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
+    @retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_delay")
     async def _get_ssh_key(self, ip_addr):
         channel = Channel(ip_addr, self._ee_service_port)
         try:
@@ -462,15 +576,21 @@ class LCMHelmConn(N2VCConnector, LcmBase):
         finally:
             channel.close()
 
-    @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
+    @retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_delay")
     async def _execute_config_primitive(self, ip_addr, params, db_dict=None):
-        return await self._execute_primitive_internal(ip_addr, "config", params, db_dict=db_dict)
+        return await self._execute_primitive_internal(
+            ip_addr, "config", params, db_dict=db_dict
+        )
 
-    @retryer(max_wait_time=_MAX_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
+    @retryer(max_wait_time_var="_max_retry_time", delay_time_var="_retry_delay")
     async def _execute_primitive(self, ip_addr, primitive_name, params, db_dict=None):
-        return await  self._execute_primitive_internal(ip_addr, primitive_name, params, db_dict=db_dict)
+        return await self._execute_primitive_internal(
+            ip_addr, primitive_name, params, db_dict=db_dict
+        )
 
-    async def _execute_primitive_internal(self, ip_addr, primitive_name, params, db_dict=None):
+    async def _execute_primitive_internal(
+        self, ip_addr, primitive_name, params, db_dict=None
+    ):
 
         channel = Channel(ip_addr, self._ee_service_port)
         try:
@@ -478,16 +598,25 @@ class LCMHelmConn(N2VCConnector, LcmBase):
             async with stub.RunPrimitive.open() as stream:
                 primitive_id = str(uuid.uuid1())
                 result = None
-                self.log.debug("Execute primitive internal: id:{}, name:{}, params: {}".
-                               format(primitive_id, primitive_name, params))
+                self.log.debug(
+                    "Execute primitive internal: id:{}, name:{}, params: {}".format(
+                        primitive_id, primitive_name, params
+                    )
+                )
                 await stream.send_message(
-                    PrimitiveRequest(id=primitive_id, name=primitive_name, params=yaml.dump(params)), end=True)
+                    PrimitiveRequest(
+                        id=primitive_id, name=primitive_name, params=yaml.dump(params)
+                    ),
+                    end=True,
+                )
                 async for reply in stream:
                     self.log.debug("Received reply: {}".format(reply))
                     result = reply
                     # If db_dict provided write notifs in database
                     if db_dict:
-                        self._write_op_detailed_status(db_dict, reply.status, reply.detailed_message)
+                        self._write_op_detailed_status(
+                            db_dict, reply.status, reply.detailed_message
+                        )
                 if result:
                     return reply.status, reply.detailed_message
                 else:
@@ -516,24 +645,34 @@ class LCMHelmConn(N2VCConnector, LcmBase):
 
     async def _get_system_cluster_id(self):
         if not self._system_cluster_id:
-            db_k8cluster = self.db.get_one("k8sclusters", {"name": self._KUBECTL_OSM_CLUSTER_NAME})
+            db_k8cluster = self.db.get_one(
+                "k8sclusters", {"name": self._KUBECTL_OSM_CLUSTER_NAME}
+            )
             k8s_hc_id = deep_get(db_k8cluster, ("_admin", "helm-chart-v3", "id"))
             if not k8s_hc_id:
                 try:
                     # backward compatibility for existing clusters that have not been initialized for helm v3
                     cluster_id = db_k8cluster.get("_id")
                     k8s_credentials = yaml.safe_dump(db_k8cluster.get("credentials"))
-                    k8s_hc_id, uninstall_sw = await self._k8sclusterhelm3.init_env(k8s_credentials,
-                                                                                   reuse_cluster_uuid=cluster_id)
-                    db_k8scluster_update = {"_admin.helm-chart-v3.error_msg": None,
-                                            "_admin.helm-chart-v3.id": k8s_hc_id,
-                                            "_admin.helm-chart-v3}.created": uninstall_sw,
-                                            "_admin.helm-chart-v3.operationalState": "ENABLED"}
+                    k8s_hc_id, uninstall_sw = await self._k8sclusterhelm3.init_env(
+                        k8s_credentials, reuse_cluster_uuid=cluster_id
+                    )
+                    db_k8scluster_update = {
+                        "_admin.helm-chart-v3.error_msg": None,
+                        "_admin.helm-chart-v3.id": k8s_hc_id,
+                        "_admin.helm-chart-v3}.created": uninstall_sw,
+                        "_admin.helm-chart-v3.operationalState": "ENABLED",
+                    }
                     self.update_db_2("k8sclusters", cluster_id, db_k8scluster_update)
                 except Exception as e:
-                    self.log.error("error initializing helm-v3 cluster: {}".format(str(e)))
-                    raise N2VCException("K8s system cluster '{}' has not been initialized for helm-chart-v3".format(
-                        cluster_id))
+                    self.log.error(
+                        "error initializing helm-v3 cluster: {}".format(str(e))
+                    )
+                    raise N2VCException(
+                        "K8s system cluster '{}' has not been initialized for helm-chart-v3".format(
+                            cluster_id
+                        )
+                    )
             self._system_cluster_id = k8s_hc_id
         return self._system_cluster_id
 
@@ -543,6 +682,6 @@ class LCMHelmConn(N2VCConnector, LcmBase):
         namespace.helm_id for backward compatibility
         If exists helm version can be helm-v3 or helm (helm-v2 old version)
         """
-        version, _, part_id = ee_id.rpartition(':')
-        namespace, _, helm_id = part_id.rpartition('.')
+        version, _, part_id = ee_id.rpartition(":")
+        namespace, _, helm_id = part_id.rpartition(".")
         return version, namespace, helm_id