Feature 10929: LCM saga, Milestone 1.

[osm/LCM.git] / osm_lcm / lcm_helm_conn.py
diff --git a/osm_lcm/lcm_helm_conn.py b/osm_lcm/lcm_helm_conn.py

index c8ba44d..b8817a5 100644 (file)
--- a/osm_lcm/lcm_helm_conn.py
+++ b/osm_lcm/lcm_helm_conn.py
@@ -18,16 +18,17 @@
  import functools
  import yaml
  import asyncio
-import socket
  import uuid
  import os
+import ssl
  
  from grpclib.client import Channel
  
+from osm_lcm.data_utils.lcm_config import VcaConfig
  from osm_lcm.frontend_pb2 import PrimitiveRequest
  from osm_lcm.frontend_pb2 import SshKeyRequest, SshKeyReply
  from osm_lcm.frontend_grpc import FrontendExecutorStub
-from osm_lcm.lcm_utils import LcmBase
+from osm_lcm.lcm_utils import LcmBase, get_ee_id_parts
  
  from osm_lcm.data_utils.database.database import Database
  from osm_lcm.data_utils.filesystem.filesystem import Filesystem
@@ -77,23 +78,31 @@ def retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_dela
      return wrapper
  
  
-class LCMHelmConn(N2VCConnector, LcmBase):
-    _KUBECTL_OSM_NAMESPACE = "osm"
-    _KUBECTL_OSM_CLUSTER_NAME = "_system-osm-k8s"
-    _EE_SERVICE_PORT = 50050
+def create_secure_context(
+    trusted: str,
+) -> ssl.SSLContext:
+    ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+    ctx.verify_mode = ssl.CERT_REQUIRED
+    ctx.check_hostname = True
+    ctx.minimum_version = ssl.TLSVersion.TLSv1_2
+    # TODO: client TLS
+    # ctx.load_cert_chain(str(client_cert), str(client_key))
+    ctx.load_verify_locations(trusted)
+    ctx.set_ciphers("ECDHE+AESGCM:ECDHE+CHACHA20:DHE+AESGCM:DHE+CHACHA20")
+    ctx.set_alpn_protocols(["h2"])
+    try:
+        ctx.set_npn_protocols(["h2"])
+    except NotImplementedError:
+        pass
+    return ctx
  
-    # Initial max retry time
-    _MAX_INITIAL_RETRY_TIME = 600
-    # Max retry time for normal operations
-    _MAX_RETRY_TIME = 30
-    # Time beetween retries, retry time after a connection error is raised
-    _EE_RETRY_DELAY = 10
  
+class LCMHelmConn(N2VCConnector, LcmBase):
      def __init__(
          self,
          log: object = None,
          loop: object = None,
-        vca_config: dict = None,
+        vca_config: VcaConfig = None,
          on_update_db=None,
      ):
          """
@@ -110,35 +119,20 @@ class LCMHelmConn(N2VCConnector, LcmBase):
  
          self.vca_config = vca_config
          self.log.debug("Initialize helm N2VC connector")
-        self.log.debug("initial vca_config: {}".format(vca_config))
+        self.log.debug("initial vca_config: {}".format(vca_config.to_dict()))
  
-        # TODO - Obtain data from configuration
-        self._ee_service_port = self._EE_SERVICE_PORT
+        self._retry_delay = self.vca_config.helm_ee_retry_delay
  
-        self._retry_delay = self._EE_RETRY_DELAY
+        self._initial_retry_time = self.vca_config.helm_max_initial_retry_time
+        self.log.debug("Initial retry time: {}".format(self._initial_retry_time))
  
-        if self.vca_config and self.vca_config.get("eegrpcinittimeout"):
-            self._initial_retry_time = self.vca_config.get("eegrpcinittimeout")
-            self.log.debug("Initial retry time: {}".format(self._initial_retry_time))
-        else:
-            self._initial_retry_time = self._MAX_INITIAL_RETRY_TIME
-            self.log.debug(
-                "Applied default retry time: {}".format(self._initial_retry_time)
-            )
-
-        if self.vca_config and self.vca_config.get("eegrpctimeout"):
-            self._max_retry_time = self.vca_config.get("eegrpctimeout")
-            self.log.debug("Retry time: {}".format(self._max_retry_time))
-        else:
-            self._max_retry_time = self._MAX_RETRY_TIME
-            self.log.debug(
-                "Applied default retry time: {}".format(self._max_retry_time)
-            )
+        self._max_retry_time = self.vca_config.helm_max_retry_time
+        self.log.debug("Retry time: {}".format(self._max_retry_time))
  
          # initialize helm connector for helmv2 and helmv3
          self._k8sclusterhelm2 = K8sHelmConnector(
-            kubectl_command=self.vca_config.get("kubectlpath"),
-            helm_command=self.vca_config.get("helmpath"),
+            kubectl_command=self.vca_config.kubectlpath,
+            helm_command=self.vca_config.helmpath,
              fs=self.fs,
              db=self.db,
              log=self.log,
@@ -146,8 +140,8 @@ class LCMHelmConn(N2VCConnector, LcmBase):
          )
  
          self._k8sclusterhelm3 = K8sHelm3Connector(
-            kubectl_command=self.vca_config.get("kubectlpath"),
-            helm_command=self.vca_config.get("helm3path"),
+            kubectl_command=self.vca_config.kubectlpath,
+            helm_command=self.vca_config.helm3path,
              fs=self.fs,
              log=self.log,
              db=self.db,
@@ -167,32 +161,41 @@ class LCMHelmConn(N2VCConnector, LcmBase):
          total_timeout: float = None,
          config: dict = None,
          artifact_path: str = None,
+        chart_model: str = None,
          vca_type: str = None,
          *kargs,
          **kwargs,
      ) -> (str, dict):
          """
          Creates a new helm execution environment deploying the helm-chat indicated in the
-        attifact_path
+        artifact_path
          :param str namespace: This param is not used, all helm charts are deployed in the osm
          system namespace
          :param dict db_dict: where to write to database when the status changes.
              It contains a dictionary with {collection: str, filter: {},  path: str},
                  e.g. {collection: "nsrs", filter: {_id: <nsd-id>, path:
                  "_admin.deployed.VCA.3"}
-        :param str reuse_ee_id: ee id from an older execution. TODO - right now this params is not used
+        :param str reuse_ee_id: ee id from an older execution. TODO - right now this param is not used
          :param float progress_timeout:
          :param float total_timeout:
          :param dict config:  General variables to instantiate KDU
-        :param str artifact_path:  path of package content
+        :param str artifact_path: path of package content
+        :param str chart_model: helm chart/reference (string), which can be either
+            of these options:
+            - a name of chart available via the repos known by OSM
+              (e.g. stable/openldap, stable/openldap:1.2.4)
+            - a path to a packaged chart (e.g. mychart.tgz)
+            - a path to an unpacked chart directory or a URL (e.g. mychart)
          :param str vca_type:  Type of vca, must be type helm or helm-v3
          :returns str, dict: id of the new execution environment including namespace.helm_id
          and credentials object set to None as all credentials should be osm kubernetes .kubeconfig
          """
  
          self.log.info(
-            "create_execution_environment: namespace: {}, artifact_path: {}, db_dict: {}, "
-            "reuse_ee_id: {}".format(namespace, artifact_path, db_dict, reuse_ee_id)
+            "create_execution_environment: namespace: {}, artifact_path: {}, "
+            "chart_model: {}, db_dict: {}, reuse_ee_id: {}".format(
+                namespace, artifact_path, db_dict, chart_model, reuse_ee_id
+            )
          )
  
          # Validate artifact-path is provided
@@ -224,6 +227,13 @@ class LCMHelmConn(N2VCConnector, LcmBase):
          while full_path.find("//") >= 0:
              full_path = full_path.replace("//", "/")
  
+        # By default, the KDU is expected to be a file
+        kdu_model = full_path
+        # If the chart_model includes a "/", then it is a reference:
+        #    e.g. (stable/openldap; stable/openldap:1.2.4)
+        if chart_model.find("/") >= 0:
+            kdu_model = chart_model
+
          try:
              # Call helm conn install
              # Obtain system cluster id from database
@@ -238,13 +248,13 @@ class LCMHelmConn(N2VCConnector, LcmBase):
              if vca_type == "helm":
                  helm_id = self._k8sclusterhelm2.generate_kdu_instance_name(
                      db_dict=db_dict,
-                    kdu_model=full_path,
+                    kdu_model=kdu_model,
                  )
                  await self._k8sclusterhelm2.install(
                      system_cluster_uuid,
-                    kdu_model=full_path,
+                    kdu_model=kdu_model,
                      kdu_instance=helm_id,
-                    namespace=self._KUBECTL_OSM_NAMESPACE,
+                    namespace=self.vca_config.kubectl_osm_namespace,
                      params=config,
                      db_dict=db_dict,
                      timeout=progress_timeout,
@@ -252,19 +262,21 @@ class LCMHelmConn(N2VCConnector, LcmBase):
              else:
                  helm_id = self._k8sclusterhelm2.generate_kdu_instance_name(
                      db_dict=db_dict,
-                    kdu_model=full_path,
+                    kdu_model=kdu_model,
                  )
                  await self._k8sclusterhelm3.install(
                      system_cluster_uuid,
-                    kdu_model=full_path,
+                    kdu_model=kdu_model,
                      kdu_instance=helm_id,
-                    namespace=self._KUBECTL_OSM_NAMESPACE,
+                    namespace=self.vca_config.kubectl_osm_namespace,
                      params=config,
                      db_dict=db_dict,
                      timeout=progress_timeout,
                  )
  
-            ee_id = "{}:{}.{}".format(vca_type, self._KUBECTL_OSM_NAMESPACE, helm_id)
+            ee_id = "{}:{}.{}".format(
+                vca_type, self.vca_config.kubectl_osm_namespace, helm_id
+            )
              return ee_id, None
          except N2VCException:
              raise
@@ -272,6 +284,150 @@ class LCMHelmConn(N2VCConnector, LcmBase):
              self.log.error("Error deploying chart ee: {}".format(e), exc_info=True)
              raise N2VCException("Error deploying chart ee: {}".format(e))
  
+    async def upgrade_execution_environment(
+        self,
+        namespace: str,
+        db_dict: dict,
+        helm_id: str,
+        progress_timeout: float = None,
+        total_timeout: float = None,
+        config: dict = None,
+        artifact_path: str = None,
+        vca_type: str = None,
+        *kargs,
+        **kwargs,
+    ) -> (str, dict):
+        """
+        Creates a new helm execution environment deploying the helm-chat indicated in the
+        attifact_path
+        :param str namespace: This param is not used, all helm charts are deployed in the osm
+        system namespace
+        :param dict db_dict: where to write to database when the status changes.
+            It contains a dictionary with {collection: str, filter: {},  path: str},
+                e.g. {collection: "nsrs", filter: {_id: <nsd-id>, path:
+                "_admin.deployed.VCA.3"}
+        :param helm_id: unique name of the Helm release to upgrade
+        :param float progress_timeout:
+        :param float total_timeout:
+        :param dict config:  General variables to instantiate KDU
+        :param str artifact_path:  path of package content
+        :param str vca_type:  Type of vca, must be type helm or helm-v3
+        :returns str, dict: id of the new execution environment including namespace.helm_id
+        and credentials object set to None as all credentials should be osm kubernetes .kubeconfig
+        """
+
+        self.log.info(
+            "upgrade_execution_environment: namespace: {}, artifact_path: {}, db_dict: {}, "
+        )
+
+        # Validate helm_id is provided
+        if helm_id is None or len(helm_id) == 0:
+            raise N2VCBadArgumentsException(
+                message="helm_id is mandatory", bad_args=["helm_id"]
+            )
+
+        # Validate artifact-path is provided
+        if artifact_path is None or len(artifact_path) == 0:
+            raise N2VCBadArgumentsException(
+                message="artifact_path is mandatory", bad_args=["artifact_path"]
+            )
+
+        # Validate artifact-path exists and sync path
+        from_path = os.path.split(artifact_path)[0]
+        self.fs.sync(from_path)
+
+        # remove / in charm path
+        while artifact_path.find("//") >= 0:
+            artifact_path = artifact_path.replace("//", "/")
+
+        # check charm path
+        if self.fs.file_exists(artifact_path):
+            helm_chart_path = artifact_path
+        else:
+            msg = "artifact path does not exist: {}".format(artifact_path)
+            raise N2VCBadArgumentsException(message=msg, bad_args=["artifact_path"])
+
+        if artifact_path.startswith("/"):
+            full_path = self.fs.path + helm_chart_path
+        else:
+            full_path = self.fs.path + "/" + helm_chart_path
+
+        while full_path.find("//") >= 0:
+            full_path = full_path.replace("//", "/")
+
+        try:
+            # Call helm conn upgrade
+            # Obtain system cluster id from database
+            system_cluster_uuid = await self._get_system_cluster_id()
+            # Add parameter osm if exist to global
+            if config and config.get("osm"):
+                if not config.get("global"):
+                    config["global"] = {}
+                config["global"]["osm"] = config.get("osm")
+
+            self.log.debug("Ugrade helm chart: {}".format(full_path))
+            if vca_type == "helm":
+                await self._k8sclusterhelm2.upgrade(
+                    system_cluster_uuid,
+                    kdu_model=full_path,
+                    kdu_instance=helm_id,
+                    namespace=namespace,
+                    params=config,
+                    db_dict=db_dict,
+                    timeout=progress_timeout,
+                    force=True,
+                )
+            else:
+                await self._k8sclusterhelm3.upgrade(
+                    system_cluster_uuid,
+                    kdu_model=full_path,
+                    kdu_instance=helm_id,
+                    namespace=namespace,
+                    params=config,
+                    db_dict=db_dict,
+                    timeout=progress_timeout,
+                    force=True,
+                )
+
+        except N2VCException:
+            raise
+        except Exception as e:
+            self.log.error("Error upgrading chart ee: {}".format(e), exc_info=True)
+            raise N2VCException("Error upgrading chart ee: {}".format(e))
+
+    async def create_tls_certificate(
+        self,
+        nsr_id: str,
+        secret_name: str,
+        usage: str,
+        dns_prefix: str,
+        namespace: str = None,
+    ):
+        # Obtain system cluster id from database
+        system_cluster_uuid = await self._get_system_cluster_id()
+        # use helm-v3 as certificates don't depend on helm version
+        await self._k8sclusterhelm3.create_certificate(
+            cluster_uuid=system_cluster_uuid,
+            namespace=namespace or self.vca_config.kubectl_osm_namespace,
+            dns_prefix=dns_prefix,
+            name=nsr_id,
+            secret_name=secret_name,
+            usage=usage,
+        )
+
+    async def delete_tls_certificate(
+        self,
+        certificate_name: str = None,
+        namespace: str = None,
+    ):
+        # Obtain system cluster id from database
+        system_cluster_uuid = await self._get_system_cluster_id()
+        await self._k8sclusterhelm3.delete_certificate(
+            cluster_uuid=system_cluster_uuid,
+            namespace=namespace or self.vca_config.kubectl_osm_namespace,
+            certificate_name=certificate_name,
+        )
+
      async def register_execution_environment(
          self,
          namespace: str,
@@ -301,10 +457,6 @@ class LCMHelmConn(N2VCConnector, LcmBase):
          # not used for this connector
          pass
  
-    async def upgrade_charm(self, *args, **kwargs):
-        # not used for this connector
-        pass
-
      async def get_ee_ssh_public__key(
          self,
          ee_id: str,
@@ -336,9 +488,8 @@ class LCMHelmConn(N2VCConnector, LcmBase):
  
          try:
              # Obtain ip_addr for the ee service, it is resolved by dns from the ee name by kubernetes
-            version, namespace, helm_id = self._get_ee_id_parts(ee_id)
-            ip_addr = socket.gethostbyname(helm_id)
-
+            version, namespace, helm_id = get_ee_id_parts(ee_id)
+            ip_addr = "{}.{}.svc".format(helm_id, namespace)
              # Obtain ssh_key from the ee, this method will implement retries to allow the ee
              # install libraries and start successfully
              ssh_key = await self._get_ssh_key(ip_addr)
@@ -421,8 +572,8 @@ class LCMHelmConn(N2VCConnector, LcmBase):
              params_dict = dict()
  
          try:
-            version, namespace, helm_id = self._get_ee_id_parts(ee_id)
-            ip_addr = socket.gethostbyname(helm_id)
+            version, namespace, helm_id = get_ee_id_parts(ee_id)
+            ip_addr = "{}.{}.svc".format(helm_id, namespace)
          except Exception as e:
              self.log.error("Error getting ee ip ee: {}".format(e))
              raise N2VCException("Error getting ee ip ee: {}".format(e))
@@ -528,7 +679,7 @@ class LCMHelmConn(N2VCConnector, LcmBase):
              system_cluster_uuid = await self._get_system_cluster_id()
  
              # Get helm_id
-            version, namespace, helm_id = self._get_ee_id_parts(ee_id)
+            version, namespace, helm_id = get_ee_id_parts(ee_id)
  
              # Uninstall chart, for backward compatibility we must assume that if there is no
              # version it is helm-v2
@@ -567,14 +718,11 @@ class LCMHelmConn(N2VCConnector, LcmBase):
  
      @retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_delay")
      async def _get_ssh_key(self, ip_addr):
-        channel = Channel(ip_addr, self._ee_service_port)
-        try:
-            stub = FrontendExecutorStub(channel)
-            self.log.debug("get ssh key, ip_addr: {}".format(ip_addr))
-            reply: SshKeyReply = await stub.GetSshKey(SshKeyRequest())
-            return reply.message
-        finally:
-            channel.close()
+        return await self._execute_primitive_internal(
+            ip_addr,
+            "_get_ssh_key",
+            None,
+        )
  
      @retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_delay")
      async def _execute_config_primitive(self, ip_addr, params, db_dict=None):
@@ -591,10 +739,13 @@ class LCMHelmConn(N2VCConnector, LcmBase):
      async def _execute_primitive_internal(
          self, ip_addr, primitive_name, params, db_dict=None
      ):
-
-        channel = Channel(ip_addr, self._ee_service_port)
-        try:
+        async def execute():
              stub = FrontendExecutorStub(channel)
+            if primitive_name == "_get_ssh_key":
+                self.log.debug("get ssh key, ip_addr: {}".format(ip_addr))
+                reply: SshKeyReply = await stub.GetSshKey(SshKeyRequest())
+                return reply.message
+            # For any other primitives
              async with stub.RunPrimitive.open() as stream:
                  primitive_id = str(uuid.uuid1())
                  result = None
@@ -621,6 +772,29 @@ class LCMHelmConn(N2VCConnector, LcmBase):
                      return reply.status, reply.detailed_message
                  else:
                      return "ERROR", "No result received"
+
+        ssl_context = create_secure_context(self.vca_config.ca_store)
+        channel = Channel(
+            ip_addr, self.vca_config.helm_ee_service_port, ssl=ssl_context
+        )
+        try:
+            return await execute()
+        except ssl.SSLError as ssl_error:  # fallback to insecure gRPC
+            if (
+                ssl_error.reason == "WRONG_VERSION_NUMBER"
+                and not self.vca_config.eegrpc_tls_enforce
+            ):
+                self.log.debug(
+                    "Execution environment doesn't support TLS, falling back to unsecure gRPC"
+                )
+                channel = Channel(ip_addr, self.vca_config.helm_ee_service_port)
+                return await execute()
+            elif ssl_error.reason == "WRONG_VERSION_NUMBER":
+                raise N2VCException(
+                    "Execution environment doesn't support TLS, primitives cannot be executed"
+                )
+            else:
+                raise
          finally:
              channel.close()
  
@@ -646,7 +820,7 @@ class LCMHelmConn(N2VCConnector, LcmBase):
      async def _get_system_cluster_id(self):
          if not self._system_cluster_id:
              db_k8cluster = self.db.get_one(
-                "k8sclusters", {"name": self._KUBECTL_OSM_CLUSTER_NAME}
+                "k8sclusters", {"name": self.vca_config.kubectl_osm_cluster_name}
              )
              k8s_hc_id = deep_get(db_k8cluster, ("_admin", "helm-chart-v3", "id"))
              if not k8s_hc_id:
@@ -675,13 +849,3 @@ class LCMHelmConn(N2VCConnector, LcmBase):
                      )
              self._system_cluster_id = k8s_hc_id
          return self._system_cluster_id
-
-    def _get_ee_id_parts(self, ee_id):
-        """
-        Parses ee_id stored at database that can be either 'version:namespace.helm_id' or only
-        namespace.helm_id for backward compatibility
-        If exists helm version can be helm-v3 or helm (helm-v2 old version)
-        """
-        version, _, part_id = ee_id.rpartition(":")
-        namespace, _, helm_id = part_id.rpartition(".")
-        return version, namespace, helm_id