Feature 11002: Deorecate helmv2
[osm/LCM.git] / osm_lcm / lcm_helm_conn.py
index 6fd3374..930ec60 100644 (file)
 import functools
 import yaml
 import asyncio
-import socket
 import uuid
 import os
+import ssl
 
 from grpclib.client import Channel
 
+from osm_lcm.data_utils.lcm_config import VcaConfig
 from osm_lcm.frontend_pb2 import PrimitiveRequest
 from osm_lcm.frontend_pb2 import SshKeyRequest, SshKeyReply
 from osm_lcm.frontend_grpc import FrontendExecutorStub
-from osm_lcm.lcm_utils import LcmBase
+from osm_lcm.lcm_utils import LcmBase, get_ee_id_parts
 
 from osm_lcm.data_utils.database.database import Database
 from osm_lcm.data_utils.filesystem.filesystem import Filesystem
 
 from n2vc.n2vc_conn import N2VCConnector
-from n2vc.k8s_helm_conn import K8sHelmConnector
 from n2vc.k8s_helm3_conn import K8sHelm3Connector
 from n2vc.exceptions import (
     N2VCBadArgumentsException,
@@ -46,7 +46,7 @@ from osm_lcm.lcm_utils import deep_get
 
 def retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_delay"):
     def wrapper(func):
-        retry_exceptions = ConnectionRefusedError
+        retry_exceptions = (ConnectionRefusedError, TimeoutError)
 
         @functools.wraps(func)
         async def wrapped(*args, **kwargs):
@@ -77,23 +77,25 @@ def retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_dela
     return wrapper
 
 
-class LCMHelmConn(N2VCConnector, LcmBase):
-    _KUBECTL_OSM_NAMESPACE = "osm"
-    _KUBECTL_OSM_CLUSTER_NAME = "_system-osm-k8s"
-    _EE_SERVICE_PORT = 50050
+def create_secure_context(
+    trusted: str, client_cert_path: str, client_key_path: str
+) -> ssl.SSLContext:
+    ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+    ctx.verify_mode = ssl.CERT_REQUIRED
+    ctx.check_hostname = True
+    ctx.minimum_version = ssl.TLSVersion.TLSv1_2
+    ctx.load_cert_chain(client_cert_path, client_key_path)
+    ctx.load_verify_locations(trusted)
+    ctx.set_ciphers("ECDHE+AESGCM:ECDHE+CHACHA20:DHE+AESGCM:DHE+CHACHA20")
+    ctx.set_alpn_protocols(["h2"])
+    return ctx
 
-    # Initial max retry time
-    _MAX_INITIAL_RETRY_TIME = 600
-    # Max retry time for normal operations
-    _MAX_RETRY_TIME = 30
-    # Time beetween retries, retry time after a connection error is raised
-    _EE_RETRY_DELAY = 10
 
+class LCMHelmConn(N2VCConnector, LcmBase):
     def __init__(
         self,
         log: object = None,
-        loop: object = None,
-        vca_config: dict = None,
+        vca_config: VcaConfig = None,
         on_update_db=None,
     ):
         """
@@ -105,49 +107,25 @@ class LCMHelmConn(N2VCConnector, LcmBase):
 
         # parent class constructor
         N2VCConnector.__init__(
-            self, log=log, loop=loop, on_update_db=on_update_db, db=self.db, fs=self.fs
+            self, log=log, on_update_db=on_update_db, db=self.db, fs=self.fs
         )
 
         self.vca_config = vca_config
         self.log.debug("Initialize helm N2VC connector")
-        self.log.debug("initial vca_config: {}".format(vca_config))
-
-        # TODO - Obtain data from configuration
-        self._ee_service_port = self._EE_SERVICE_PORT
+        self.log.debug("initial vca_config: {}".format(vca_config.to_dict()))
 
-        self._retry_delay = self._EE_RETRY_DELAY
+        self._retry_delay = self.vca_config.helm_ee_retry_delay
 
-        if self.vca_config and self.vca_config.get("eegrpcinittimeout"):
-            self._initial_retry_time = self.vca_config.get("eegrpcinittimeout")
-            self.log.debug("Initial retry time: {}".format(self._initial_retry_time))
-        else:
-            self._initial_retry_time = self._MAX_INITIAL_RETRY_TIME
-            self.log.debug(
-                "Applied default retry time: {}".format(self._initial_retry_time)
-            )
+        self._initial_retry_time = self.vca_config.helm_max_initial_retry_time
+        self.log.debug("Initial retry time: {}".format(self._initial_retry_time))
 
-        if self.vca_config and self.vca_config.get("eegrpctimeout"):
-            self._max_retry_time = self.vca_config.get("eegrpctimeout")
-            self.log.debug("Retry time: {}".format(self._max_retry_time))
-        else:
-            self._max_retry_time = self._MAX_RETRY_TIME
-            self.log.debug(
-                "Applied default retry time: {}".format(self._max_retry_time)
-            )
-
-        # initialize helm connector for helmv2 and helmv3
-        self._k8sclusterhelm2 = K8sHelmConnector(
-            kubectl_command=self.vca_config.get("kubectlpath"),
-            helm_command=self.vca_config.get("helmpath"),
-            fs=self.fs,
-            db=self.db,
-            log=self.log,
-            on_update_db=None,
-        )
+        self._max_retry_time = self.vca_config.helm_max_retry_time
+        self.log.debug("Retry time: {}".format(self._max_retry_time))
 
+        # initialize helm connector for helmv3
         self._k8sclusterhelm3 = K8sHelm3Connector(
-            kubectl_command=self.vca_config.get("kubectlpath"),
-            helm_command=self.vca_config.get("helm3path"),
+            kubectl_command=self.vca_config.kubectlpath,
+            helm_command=self.vca_config.helm3path,
             fs=self.fs,
             log=self.log,
             db=self.db,
@@ -167,32 +145,44 @@ class LCMHelmConn(N2VCConnector, LcmBase):
         total_timeout: float = None,
         config: dict = None,
         artifact_path: str = None,
+        chart_model: str = None,
         vca_type: str = None,
         *kargs,
         **kwargs,
     ) -> (str, dict):
         """
         Creates a new helm execution environment deploying the helm-chat indicated in the
-        attifact_path
+        artifact_path
         :param str namespace: This param is not used, all helm charts are deployed in the osm
         system namespace
         :param dict db_dict: where to write to database when the status changes.
             It contains a dictionary with {collection: str, filter: {},  path: str},
                 e.g. {collection: "nsrs", filter: {_id: <nsd-id>, path:
                 "_admin.deployed.VCA.3"}
-        :param str reuse_ee_id: ee id from an older execution. TODO - right now this params is not used
+        :param str reuse_ee_id: ee id from an older execution. TODO - right now this param is not used
         :param float progress_timeout:
         :param float total_timeout:
         :param dict config:  General variables to instantiate KDU
-        :param str artifact_path:  path of package content
-        :param str vca_type:  Type of vca, must be type helm or helm-v3
+        :param str artifact_path: path of package content
+        :param str chart_model: helm chart/reference (string), which can be either
+            of these options:
+            - a name of chart available via the repos known by OSM
+              (e.g. stable/openldap, stable/openldap:1.2.4)
+            - a path to a packaged chart (e.g. mychart.tgz)
+            - a path to an unpacked chart directory or a URL (e.g. mychart)
+        :param str vca_type:  Type of vca, must be type helm-v3
         :returns str, dict: id of the new execution environment including namespace.helm_id
         and credentials object set to None as all credentials should be osm kubernetes .kubeconfig
         """
 
+        if not namespace:
+            namespace = self.vca_config.kubectl_osm_namespace
+
         self.log.info(
-            "create_execution_environment: namespace: {}, artifact_path: {}, db_dict: {}, "
-            "reuse_ee_id: {}".format(namespace, artifact_path, db_dict, reuse_ee_id)
+            "create_execution_environment: namespace: {}, artifact_path: {}, "
+            "chart_model: {}, db_dict: {}, reuse_ee_id: {}".format(
+                namespace, artifact_path, db_dict, chart_model, reuse_ee_id
+            )
         )
 
         # Validate artifact-path is provided
@@ -224,6 +214,13 @@ class LCMHelmConn(N2VCConnector, LcmBase):
         while full_path.find("//") >= 0:
             full_path = full_path.replace("//", "/")
 
+        # By default, the KDU is expected to be a file
+        kdu_model = full_path
+        # If the chart_model includes a "/", then it is a reference:
+        #    e.g. (stable/openldap; stable/openldap:1.2.4)
+        if chart_model.find("/") >= 0:
+            kdu_model = chart_model
+
         try:
             # Call helm conn install
             # Obtain system cluster id from database
@@ -235,36 +232,21 @@ class LCMHelmConn(N2VCConnector, LcmBase):
                 config["global"]["osm"] = config.get("osm")
 
             self.log.debug("install helm chart: {}".format(full_path))
-            if vca_type == "helm":
-                helm_id = self._k8sclusterhelm2.generate_kdu_instance_name(
-                    db_dict=db_dict,
-                    kdu_model=full_path,
-                )
-                await self._k8sclusterhelm2.install(
-                    system_cluster_uuid,
-                    kdu_model=full_path,
-                    kdu_instance=helm_id,
-                    namespace=self._KUBECTL_OSM_NAMESPACE,
-                    params=config,
-                    db_dict=db_dict,
-                    timeout=progress_timeout,
-                )
-            else:
-                helm_id = self._k8sclusterhelm2.generate_kdu_instance_name(
-                    db_dict=db_dict,
-                    kdu_model=full_path,
-                )
-                await self._k8sclusterhelm3.install(
-                    system_cluster_uuid,
-                    kdu_model=full_path,
-                    kdu_instance=helm_id,
-                    namespace=self._KUBECTL_OSM_NAMESPACE,
-                    params=config,
-                    db_dict=db_dict,
-                    timeout=progress_timeout,
-                )
+            helm_id = self._k8sclusterhelm3.generate_kdu_instance_name(
+                db_dict=db_dict,
+                kdu_model=kdu_model,
+            )
+            await self._k8sclusterhelm3.install(
+                system_cluster_uuid,
+                kdu_model=kdu_model,
+                kdu_instance=helm_id,
+                namespace=namespace,
+                params=config,
+                db_dict=db_dict,
+                timeout=progress_timeout,
+            )
 
-            ee_id = "{}:{}.{}".format(vca_type, self._KUBECTL_OSM_NAMESPACE, helm_id)
+            ee_id = "{}:{}.{}".format(vca_type, namespace, helm_id)
             return ee_id, None
         except N2VCException:
             raise
@@ -272,6 +254,169 @@ class LCMHelmConn(N2VCConnector, LcmBase):
             self.log.error("Error deploying chart ee: {}".format(e), exc_info=True)
             raise N2VCException("Error deploying chart ee: {}".format(e))
 
+    async def upgrade_execution_environment(
+        self,
+        namespace: str,
+        db_dict: dict,
+        helm_id: str,
+        progress_timeout: float = None,
+        total_timeout: float = None,
+        config: dict = None,
+        artifact_path: str = None,
+        vca_type: str = None,
+        *kargs,
+        **kwargs,
+    ) -> (str, dict):
+        """
+        Creates a new helm execution environment deploying the helm-chat indicated in the
+        attifact_path
+        :param str namespace: This param is not used, all helm charts are deployed in the osm
+        system namespace
+        :param dict db_dict: where to write to database when the status changes.
+            It contains a dictionary with {collection: str, filter: {},  path: str},
+                e.g. {collection: "nsrs", filter: {_id: <nsd-id>, path:
+                "_admin.deployed.VCA.3"}
+        :param helm_id: unique name of the Helm release to upgrade
+        :param float progress_timeout:
+        :param float total_timeout:
+        :param dict config:  General variables to instantiate KDU
+        :param str artifact_path:  path of package content
+        :param str vca_type:  Type of vca, must be type helm-v3
+        :returns str, dict: id of the new execution environment including namespace.helm_id
+        and credentials object set to None as all credentials should be osm kubernetes .kubeconfig
+        """
+
+        self.log.info(
+            "upgrade_execution_environment: namespace: {}, artifact_path: {}, db_dict: {}, "
+        )
+
+        # Validate helm_id is provided
+        if helm_id is None or len(helm_id) == 0:
+            raise N2VCBadArgumentsException(
+                message="helm_id is mandatory", bad_args=["helm_id"]
+            )
+
+        # Validate artifact-path is provided
+        if artifact_path is None or len(artifact_path) == 0:
+            raise N2VCBadArgumentsException(
+                message="artifact_path is mandatory", bad_args=["artifact_path"]
+            )
+
+        # Validate artifact-path exists and sync path
+        from_path = os.path.split(artifact_path)[0]
+        self.fs.sync(from_path)
+
+        # remove / in charm path
+        while artifact_path.find("//") >= 0:
+            artifact_path = artifact_path.replace("//", "/")
+
+        # check charm path
+        if self.fs.file_exists(artifact_path):
+            helm_chart_path = artifact_path
+        else:
+            msg = "artifact path does not exist: {}".format(artifact_path)
+            raise N2VCBadArgumentsException(message=msg, bad_args=["artifact_path"])
+
+        if artifact_path.startswith("/"):
+            full_path = self.fs.path + helm_chart_path
+        else:
+            full_path = self.fs.path + "/" + helm_chart_path
+
+        while full_path.find("//") >= 0:
+            full_path = full_path.replace("//", "/")
+
+        try:
+            # Call helm conn upgrade
+            # Obtain system cluster id from database
+            system_cluster_uuid = await self._get_system_cluster_id()
+            # Add parameter osm if exist to global
+            if config and config.get("osm"):
+                if not config.get("global"):
+                    config["global"] = {}
+                config["global"]["osm"] = config.get("osm")
+
+            self.log.debug("Ugrade helm chart: {}".format(full_path))
+            await self._k8sclusterhelm3.upgrade(
+                system_cluster_uuid,
+                kdu_model=full_path,
+                kdu_instance=helm_id,
+                namespace=namespace,
+                params=config,
+                db_dict=db_dict,
+                timeout=progress_timeout,
+                force=True,
+            )
+
+        except N2VCException:
+            raise
+        except Exception as e:
+            self.log.error("Error upgrading chart ee: {}".format(e), exc_info=True)
+            raise N2VCException("Error upgrading chart ee: {}".format(e))
+
+    async def create_tls_certificate(
+        self,
+        nsr_id: str,
+        secret_name: str,
+        usage: str,
+        dns_prefix: str,
+        namespace: str = None,
+    ):
+        # Obtain system cluster id from database
+        system_cluster_uuid = await self._get_system_cluster_id()
+        # use helm-v3 as certificates don't depend on helm version
+        await self._k8sclusterhelm3.create_certificate(
+            cluster_uuid=system_cluster_uuid,
+            namespace=namespace or self.vca_config.kubectl_osm_namespace,
+            dns_prefix=dns_prefix,
+            name=nsr_id,
+            secret_name=secret_name,
+            usage=usage,
+        )
+
+    async def delete_tls_certificate(
+        self,
+        certificate_name: str = None,
+        namespace: str = None,
+    ):
+        # Obtain system cluster id from database
+        system_cluster_uuid = await self._get_system_cluster_id()
+        await self._k8sclusterhelm3.delete_certificate(
+            cluster_uuid=system_cluster_uuid,
+            namespace=namespace or self.vca_config.kubectl_osm_namespace,
+            certificate_name=certificate_name,
+        )
+
+    async def setup_ns_namespace(
+        self,
+        name: str,
+    ):
+        # Obtain system cluster id from database
+        system_cluster_uuid = await self._get_system_cluster_id()
+        await self._k8sclusterhelm3.create_namespace(
+            namespace=name,
+            cluster_uuid=system_cluster_uuid,
+            labels={
+                "pod-security.kubernetes.io/enforce": self.vca_config.eegrpc_pod_admission_policy
+            },
+        )
+        await self._k8sclusterhelm3.setup_default_rbac(
+            name="ee-role",
+            namespace=name,
+            api_groups=[""],
+            resources=["secrets"],
+            verbs=["get"],
+            service_account="default",
+            cluster_uuid=system_cluster_uuid,
+        )
+        await self._k8sclusterhelm3.copy_secret_data(
+            src_secret="osm-ca",
+            dst_secret="osm-ca",
+            src_namespace=self.vca_config.kubectl_osm_namespace,
+            dst_namespace=name,
+            cluster_uuid=system_cluster_uuid,
+            data_key="ca.crt",
+        )
+
     async def register_execution_environment(
         self,
         namespace: str,
@@ -332,9 +477,8 @@ class LCMHelmConn(N2VCConnector, LcmBase):
 
         try:
             # Obtain ip_addr for the ee service, it is resolved by dns from the ee name by kubernetes
-            version, namespace, helm_id = self._get_ee_id_parts(ee_id)
-            ip_addr = socket.gethostbyname(helm_id)
-
+            version, namespace, helm_id = get_ee_id_parts(ee_id)
+            ip_addr = "{}.{}.svc".format(helm_id, namespace)
             # Obtain ssh_key from the ee, this method will implement retries to allow the ee
             # install libraries and start successfully
             ssh_key = await self._get_ssh_key(ip_addr)
@@ -417,8 +561,8 @@ class LCMHelmConn(N2VCConnector, LcmBase):
             params_dict = dict()
 
         try:
-            version, namespace, helm_id = self._get_ee_id_parts(ee_id)
-            ip_addr = socket.gethostbyname(helm_id)
+            version, namespace, helm_id = get_ee_id_parts(ee_id)
+            ip_addr = "{}.{}.svc".format(helm_id, namespace)
         except Exception as e:
             self.log.error("Error getting ee ip ee: {}".format(e))
             raise N2VCException("Error getting ee ip ee: {}".format(e))
@@ -519,19 +663,13 @@ class LCMHelmConn(N2VCConnector, LcmBase):
             )
 
         try:
-
             # Obtain cluster_uuid
             system_cluster_uuid = await self._get_system_cluster_id()
 
             # Get helm_id
-            version, namespace, helm_id = self._get_ee_id_parts(ee_id)
+            version, namespace, helm_id = get_ee_id_parts(ee_id)
 
-            # Uninstall chart, for backward compatibility we must assume that if there is no
-            # version it is helm-v2
-            if version == "helm-v3":
-                await self._k8sclusterhelm3.uninstall(system_cluster_uuid, helm_id)
-            else:
-                await self._k8sclusterhelm2.uninstall(system_cluster_uuid, helm_id)
+            await self._k8sclusterhelm3.uninstall(system_cluster_uuid, helm_id)
             self.log.info("ee_id: {} deleted".format(ee_id))
         except N2VCException:
             raise
@@ -544,8 +682,12 @@ class LCMHelmConn(N2VCConnector, LcmBase):
     async def delete_namespace(
         self, namespace: str, db_dict: dict = None, total_timeout: float = None
     ):
-        # method not implemented for this connector, execution environments must be deleted individually
-        pass
+        # Obtain system cluster id from database
+        system_cluster_uuid = await self._get_system_cluster_id()
+        await self._k8sclusterhelm3.delete_namespace(
+            namespace=namespace,
+            cluster_uuid=system_cluster_uuid,
+        )
 
     async def install_k8s_proxy_charm(
         self,
@@ -563,14 +705,11 @@ class LCMHelmConn(N2VCConnector, LcmBase):
 
     @retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_delay")
     async def _get_ssh_key(self, ip_addr):
-        channel = Channel(ip_addr, self._ee_service_port)
-        try:
-            stub = FrontendExecutorStub(channel)
-            self.log.debug("get ssh key, ip_addr: {}".format(ip_addr))
-            reply: SshKeyReply = await stub.GetSshKey(SshKeyRequest())
-            return reply.message
-        finally:
-            channel.close()
+        return await self._execute_primitive_internal(
+            ip_addr,
+            "_get_ssh_key",
+            None,
+        )
 
     @retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_delay")
     async def _execute_config_primitive(self, ip_addr, params, db_dict=None):
@@ -587,10 +726,13 @@ class LCMHelmConn(N2VCConnector, LcmBase):
     async def _execute_primitive_internal(
         self, ip_addr, primitive_name, params, db_dict=None
     ):
-
-        channel = Channel(ip_addr, self._ee_service_port)
-        try:
+        async def execute():
             stub = FrontendExecutorStub(channel)
+            if primitive_name == "_get_ssh_key":
+                self.log.debug("get ssh key, ip_addr: {}".format(ip_addr))
+                reply: SshKeyReply = await stub.GetSshKey(SshKeyRequest())
+                return reply.message
+            # For any other primitives
             async with stub.RunPrimitive.open() as stream:
                 primitive_id = str(uuid.uuid1())
                 result = None
@@ -617,11 +759,37 @@ class LCMHelmConn(N2VCConnector, LcmBase):
                     return reply.status, reply.detailed_message
                 else:
                     return "ERROR", "No result received"
+
+        ssl_context = create_secure_context(
+            self.vca_config.ca_store,
+            self.vca_config.client_cert_path,
+            self.vca_config.client_key_path,
+        )
+        channel = Channel(
+            ip_addr, self.vca_config.helm_ee_service_port, ssl=ssl_context
+        )
+        try:
+            return await execute()
+        except ssl.SSLError as ssl_error:  # fallback to insecure gRPC
+            if (
+                ssl_error.reason == "WRONG_VERSION_NUMBER"
+                and not self.vca_config.eegrpc_tls_enforce
+            ):
+                self.log.debug(
+                    "Execution environment doesn't support TLS, falling back to unsecure gRPC"
+                )
+                channel = Channel(ip_addr, self.vca_config.helm_ee_service_port)
+                return await execute()
+            elif ssl_error.reason == "WRONG_VERSION_NUMBER":
+                raise N2VCException(
+                    "Execution environment doesn't support TLS, primitives cannot be executed"
+                )
+            else:
+                raise
         finally:
             channel.close()
 
     def _write_op_detailed_status(self, db_dict, status, detailed_message):
-
         # write ee_id to database: _admin.deployed.VCA.x
         try:
             the_table = db_dict["collection"]
@@ -642,7 +810,7 @@ class LCMHelmConn(N2VCConnector, LcmBase):
     async def _get_system_cluster_id(self):
         if not self._system_cluster_id:
             db_k8cluster = self.db.get_one(
-                "k8sclusters", {"name": self._KUBECTL_OSM_CLUSTER_NAME}
+                "k8sclusters", {"name": self.vca_config.kubectl_osm_cluster_name}
             )
             k8s_hc_id = deep_get(db_k8cluster, ("_admin", "helm-chart-v3", "id"))
             if not k8s_hc_id:
@@ -671,13 +839,3 @@ class LCMHelmConn(N2VCConnector, LcmBase):
                     )
             self._system_cluster_id = k8s_hc_id
         return self._system_cluster_id
-
-    def _get_ee_id_parts(self, ee_id):
-        """
-        Parses ee_id stored at database that can be either 'version:namespace.helm_id' or only
-        namespace.helm_id for backward compatibility
-        If exists helm version can be helm-v3 or helm (helm-v2 old version)
-        """
-        version, _, part_id = ee_id.rpartition(":")
-        namespace, _, helm_id = part_id.rpartition(".")
-        return version, namespace, helm_id