Feature 10947: Create certificates for helm EE: 33/12633/8
authorGabriel Cuba <gcuba@whitestack.com>
Fri, 7 Oct 2022 16:38:23 +0000 (11:38 -0500)
committerGabriel Cuba <gcuba@whitestack.com>
Tue, 22 Nov 2022 16:40:52 +0000 (11:40 -0500)
- Add methods to create certificates
- Create and delete certs when NS is created and deleted.
- Add SSL context to gRPC channels

Change-Id: Ib1d86a61b5c5be098218d6892ba935e9b8bfdf9f
Signed-off-by: Gabriel Cuba <gcuba@whitestack.com>
osm_lcm/data_utils/vnfd.py
osm_lcm/lcm.cfg
osm_lcm/lcm_helm_conn.py
osm_lcm/ns.py
osm_lcm/tests/test_lcm_helm_conn.py

index ffcb582..9f8104a 100644 (file)
@@ -191,3 +191,15 @@ def find_software_version(vnfd: dict) -> str:
 
     else:
         return default_sw_version
+
+
+def check_helm_ee_in_ns(db_vnfds: list) -> bool:
+    for vnfd in db_vnfds:
+        descriptor_config = get_configuration(vnfd, vnfd["id"])
+        if not (
+            descriptor_config and "execution-environment-list" in descriptor_config
+        ):
+            continue
+        ee_list = descriptor_config.get("execution-environment-list", [])
+        if list_utils.find_in_list(ee_list, lambda ee_item: "helm-chart" in ee_item):
+            return True
index 96fb373..2c04743 100644 (file)
@@ -47,6 +47,7 @@ VCA:
     helm3path:    /usr/local/bin/helm3
     kubectlpath: /usr/bin/kubectl
     jujupath:    /usr/local/bin/juju
+    eegrpc_tls_enforce: False
     # pubkey: pubkey
     # cacert: cacert
     # apiproxy: apiproxy
index 0bd5c0f..887c212 100644 (file)
@@ -18,9 +18,9 @@
 import functools
 import yaml
 import asyncio
-import socket
 import uuid
 import os
+import ssl
 
 from grpclib.client import Channel
 
@@ -43,6 +43,8 @@ from n2vc.exceptions import (
 
 from osm_lcm.lcm_utils import deep_get
 
+CA_STORE = "/etc/ssl/certs/osm-ca.crt"
+
 
 def retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_delay"):
     def wrapper(func):
@@ -77,6 +79,25 @@ def retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_dela
     return wrapper
 
 
+def create_secure_context(
+    trusted: str,
+) -> ssl.SSLContext:
+    ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+    ctx.verify_mode = ssl.CERT_REQUIRED
+    ctx.check_hostname = True
+    ctx.minimum_version = ssl.TLSVersion.TLSv1_2
+    # TODO: client TLS
+    # ctx.load_cert_chain(str(client_cert), str(client_key))
+    ctx.load_verify_locations(trusted)
+    ctx.set_ciphers("ECDHE+AESGCM:ECDHE+CHACHA20:DHE+AESGCM:DHE+CHACHA20")
+    ctx.set_alpn_protocols(["h2"])
+    try:
+        ctx.set_npn_protocols(["h2"])
+    except NotImplementedError:
+        pass
+    return ctx
+
+
 class LCMHelmConn(N2VCConnector, LcmBase):
     _KUBECTL_OSM_NAMESPACE = "osm"
     _KUBECTL_OSM_CLUSTER_NAME = "_system-osm-k8s"
@@ -135,6 +156,14 @@ class LCMHelmConn(N2VCConnector, LcmBase):
                 "Applied default retry time: {}".format(self._max_retry_time)
             )
 
+        if self.vca_config and self.vca_config.get("eegrpc_tls_enforce"):
+            self._tls_enforce = str(
+                self.vca_config.get("eegrpc_tls_enforce")
+            ).lower() in ("true", "1", "yes")
+        else:
+            self._tls_enforce = False
+        self.log.debug("TLS enforce enabled: {}".format(self._tls_enforce))
+
         # initialize helm connector for helmv2 and helmv3
         self._k8sclusterhelm2 = K8sHelmConnector(
             kubectl_command=self.vca_config.get("kubectlpath"),
@@ -399,6 +428,39 @@ class LCMHelmConn(N2VCConnector, LcmBase):
             self.log.error("Error upgrading chart ee: {}".format(e), exc_info=True)
             raise N2VCException("Error upgrading chart ee: {}".format(e))
 
+    async def create_tls_certificate(
+        self,
+        nsr_id: str,
+        secret_name: str,
+        usage: str,
+        dns_prefix: str,
+        namespace: str = _KUBECTL_OSM_NAMESPACE,
+    ):
+        # Obtain system cluster id from database
+        system_cluster_uuid = await self._get_system_cluster_id()
+        # use helm-v3 as certificates don't depend on helm version
+        await self._k8sclusterhelm3.create_certificate(
+            cluster_uuid=system_cluster_uuid,
+            namespace=namespace,
+            dns_prefix=dns_prefix,
+            name=nsr_id,
+            secret_name=secret_name,
+            usage=usage,
+        )
+
+    async def delete_tls_certificate(
+        self,
+        certificate_name: str = None,
+        namespace: str = _KUBECTL_OSM_NAMESPACE,
+    ):
+        # Obtain system cluster id from database
+        system_cluster_uuid = await self._get_system_cluster_id()
+        await self._k8sclusterhelm3.delete_certificate(
+            cluster_uuid=system_cluster_uuid,
+            namespace=namespace,
+            certificate_name=certificate_name,
+        )
+
     async def register_execution_environment(
         self,
         namespace: str,
@@ -460,8 +522,7 @@ class LCMHelmConn(N2VCConnector, LcmBase):
         try:
             # Obtain ip_addr for the ee service, it is resolved by dns from the ee name by kubernetes
             version, namespace, helm_id = get_ee_id_parts(ee_id)
-            ip_addr = socket.gethostbyname(helm_id)
-
+            ip_addr = "{}.{}.svc".format(helm_id, namespace)
             # Obtain ssh_key from the ee, this method will implement retries to allow the ee
             # install libraries and start successfully
             ssh_key = await self._get_ssh_key(ip_addr)
@@ -545,7 +606,7 @@ class LCMHelmConn(N2VCConnector, LcmBase):
 
         try:
             version, namespace, helm_id = get_ee_id_parts(ee_id)
-            ip_addr = socket.gethostbyname(helm_id)
+            ip_addr = "{}.{}.svc".format(helm_id, namespace)
         except Exception as e:
             self.log.error("Error getting ee ip ee: {}".format(e))
             raise N2VCException("Error getting ee ip ee: {}".format(e))
@@ -690,14 +751,11 @@ class LCMHelmConn(N2VCConnector, LcmBase):
 
     @retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_delay")
     async def _get_ssh_key(self, ip_addr):
-        channel = Channel(ip_addr, self._ee_service_port)
-        try:
-            stub = FrontendExecutorStub(channel)
-            self.log.debug("get ssh key, ip_addr: {}".format(ip_addr))
-            reply: SshKeyReply = await stub.GetSshKey(SshKeyRequest())
-            return reply.message
-        finally:
-            channel.close()
+        return await self._execute_primitive_internal(
+            ip_addr,
+            "_get_ssh_key",
+            None,
+        )
 
     @retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_delay")
     async def _execute_config_primitive(self, ip_addr, params, db_dict=None):
@@ -714,10 +772,13 @@ class LCMHelmConn(N2VCConnector, LcmBase):
     async def _execute_primitive_internal(
         self, ip_addr, primitive_name, params, db_dict=None
     ):
-
-        channel = Channel(ip_addr, self._ee_service_port)
-        try:
+        async def execute():
             stub = FrontendExecutorStub(channel)
+            if primitive_name == "_get_ssh_key":
+                self.log.debug("get ssh key, ip_addr: {}".format(ip_addr))
+                reply: SshKeyReply = await stub.GetSshKey(SshKeyRequest())
+                return reply.message
+            # For any other primitives
             async with stub.RunPrimitive.open() as stream:
                 primitive_id = str(uuid.uuid1())
                 result = None
@@ -744,6 +805,24 @@ class LCMHelmConn(N2VCConnector, LcmBase):
                     return reply.status, reply.detailed_message
                 else:
                     return "ERROR", "No result received"
+
+        ssl_context = create_secure_context(CA_STORE)
+        channel = Channel(ip_addr, self._ee_service_port, ssl=ssl_context)
+        try:
+            return await execute()
+        except ssl.SSLError as ssl_error:  # fallback to insecure gRPC
+            if ssl_error.reason == "WRONG_VERSION_NUMBER" and not self._tls_enforce:
+                self.log.debug(
+                    "Execution environment doesn't support TLS, falling back to unsecure gRPC"
+                )
+                channel = Channel(ip_addr, self._ee_service_port)
+                return await execute()
+            elif ssl_error.reason == "WRONG_VERSION_NUMBER":
+                raise N2VCException(
+                    "Execution environment doesn't support TLS, primitives cannot be executed"
+                )
+            else:
+                raise
         finally:
             channel.close()
 
index 4fb8d5d..9885f40 100644 (file)
@@ -84,6 +84,7 @@ from osm_lcm.data_utils.vnfd import (
     get_juju_ee_ref,
     get_kdu_resource_profile,
     find_software_version,
+    check_helm_ee_in_ns,
 )
 from osm_lcm.data_utils.list_utils import find_in_list
 from osm_lcm.data_utils.vnfr import (
@@ -2638,6 +2639,17 @@ class NsLcm(LcmBase):
             stage[1] = "Deploying Execution Environments."
             self.logger.debug(logging_text + stage[1])
 
+            # create namespace and certificate if any helm based EE is present in the NS
+            if check_helm_ee_in_ns(db_vnfds):
+                # TODO: create EE namespace
+                # create TLS certificates
+                await self.vca_map["helm-v3"].create_tls_certificate(
+                    secret_name="ee-tls-{}".format(nsr_id),
+                    dns_prefix="*",
+                    nsr_id=nsr_id,
+                    usage="server auth",
+                )
+
             nsi_id = None  # TODO put nsi_id when this nsr belongs to a NSI
             for vnf_profile in get_vnf_profiles(nsd):
                 vnfd_id = vnf_profile["vnfd-id"]
@@ -4628,6 +4640,13 @@ class NsLcm(LcmBase):
                 # task_delete_ee = asyncio.ensure_future(self.n2vc.delete_namespace(namespace="." + nsr_id))
                 tasks_dict_info[task_delete_ee] = "Terminating all VCA"
 
+            # Delete Namespace and Certificates if necessary
+            if check_helm_ee_in_ns(list(db_vnfds_from_member_index.values())):
+                await self.vca_map["helm-v3"].delete_tls_certificate(
+                    certificate_name=db_nslcmop["nsInstanceId"],
+                )
+                # TODO: Delete namespace
+
             # Delete from k8scluster
             stage[1] = "Deleting KDUs."
             self.logger.debug(logging_text + stage[1])
index 3730162..724b5e2 100644 (file)
@@ -104,7 +104,6 @@ class TestLcmHelmConn(asynctest.TestCase):
     async def test_get_ee_ssh_public__key(self):
         ee_id = "osm.helm_sample_charm_0001"
         db_dict = {}
-        lcm_helm_conn.socket.gethostbyname = asynctest.Mock()
         mock_pub_key = "ssh-rsapubkey"
         self.db.get_one.return_value = {"_admin": {"helm-chart": {"id": "myk8s_id"}}}
         self.helm_conn._get_ssh_key = asynctest.CoroutineMock(return_value=mock_pub_key)
@@ -115,7 +114,6 @@ class TestLcmHelmConn(asynctest.TestCase):
 
     @asynctest.fail_on(active_handles=True)
     async def test_execute_primitive(self):
-        lcm_helm_conn.socket.gethostbyname = asynctest.Mock()
         ee_id = "osm.helm_sample_charm_0001"
         primitive_name = "sleep"
         params = {}
@@ -129,7 +127,6 @@ class TestLcmHelmConn(asynctest.TestCase):
     @asynctest.fail_on(active_handles=True)
     async def test_execute_config_primitive(self):
         self.logger.debug("Execute config primitive")
-        lcm_helm_conn.socket.gethostbyname = asynctest.Mock()
         ee_id = "osm.helm_sample_charm_0001"
         primitive_name = "config"
         params = {"ssh-host-name": "host1"}