fix issues with k8scluster and repos
[osm/LCM.git] / osm_lcm / ns.py
index 0e6ca87..8705823 100644 (file)
@@ -615,7 +615,8 @@ class NsLcm(LcmBase):
                 if vnf_RO.get("ip_address"):
                     db_vnfr["ip-address"] = vnfr_update["ip-address"] = vnf_RO["ip_address"].split(";")[0]
                 elif not db_vnfr.get("ip-address"):
-                    raise LcmExceptionNoMgmtIP("ns member_vnf_index '{}' has no IP address".format(vnf_index))
+                    if db_vnfr.get("vdur"):   # if not VDUs, there is not ip_address
+                        raise LcmExceptionNoMgmtIP("ns member_vnf_index '{}' has no IP address".format(vnf_index))
 
                 for vdu_index, vdur in enumerate(get_iterable(db_vnfr, "vdur")):
                     vdur_RO_count_index = 0
@@ -731,13 +732,15 @@ class NsLcm(LcmBase):
         RO_descriptor_number = 0   # number of descriptors created at RO
         vnf_index_2_RO_id = {}    # map between vnfd/nsd id to the id used at RO
         start_deploy = time()
-        vdu_flag = False  # If any of the VNFDs has VDUs
         ns_params = db_nslcmop.get("operationParams")
         if ns_params and ns_params.get("timeout_ns_deploy"):
             timeout_ns_deploy = ns_params["timeout_ns_deploy"]
         else:
             timeout_ns_deploy = self.timeout.get("ns_deploy", self.timeout_ns_deploy)
 
+        # Check for and optionally request placement optimization. Database will be updated if placement activated
+        await self.do_placement(logging_text, db_nslcmop, db_vnfrs)
+
         # deploy RO
 
         # get vnfds, instantiate at RO
@@ -745,8 +748,6 @@ class NsLcm(LcmBase):
         for c_vnf in nsd.get("constituent-vnfd", ()):
             member_vnf_index = c_vnf["member-vnf-index"]
             vnfd = db_vnfds_ref[c_vnf['vnfd-id-ref']]
-            if vnfd.get("vdu"):
-                vdu_flag = True
             vnfd_ref = vnfd["id"]
             step = db_nsr_update["_admin.deployed.RO.detailed-status"] = "Creating vnfd='{}' member_vnf_index='{}' at" \
                                                                          " RO".format(vnfd_ref, member_vnf_index)
@@ -891,8 +892,7 @@ class NsLcm(LcmBase):
                 elif ns_status == "ACTIVE":
                     step = detailed_status = "Waiting for management IP address reported by the VIM. Updating VNFRs"
                     try:
-                        if vdu_flag:
-                            self.ns_update_vnfr(db_vnfrs, desc)
+                        self.ns_update_vnfr(db_vnfrs, desc)
                         break
                     except LcmExceptionNoMgmtIP:
                         pass
@@ -964,12 +964,13 @@ class NsLcm(LcmBase):
                     vdur = next((x for x in get_iterable(db_vnfr, "vdur")
                                  if x.get("vdu-id-ref") == vdu_id and x.get("count-index") == vdu_index), None)
 
+                if not vdur and len(db_vnfr.get("vdur", ())) == 1:  # If only one, this should be the target vdu
+                    vdur = db_vnfr["vdur"][0]
                 if not vdur:
-                    raise LcmException("Not found vnfr_id={}, vdu_index={}, vdu_index={}".format(
-                        vnfr_id, vdu_id, vdu_index
-                    ))
+                    raise LcmException("Not found vnfr_id={}, vdu_id={}, vdu_index={}".format(vnfr_id, vdu_id,
+                                                                                              vdu_index))
 
-                if vdur.get("status") == "ACTIVE":
+                if vdur.get("pdu-type") or vdur.get("status") == "ACTIVE":
                     ip_address = vdur.get("ip-address")
                     if not ip_address:
                         continue
@@ -980,11 +981,12 @@ class NsLcm(LcmBase):
             if not target_vdu_id:
                 continue
 
-            # self.logger.debug(logging_text + "IP address={}".format(ip_address))
-
             # inject public key into machine
             if pub_key and user:
                 # self.logger.debug(logging_text + "Inserting RO key")
+                if vdur.get("pdu-type"):
+                    self.logger.error(logging_text + "Cannot inject ssh-ky to a PDU")
+                    return ip_address
                 try:
                     ro_vm_id = "{}-{}".format(db_vnfr["member-vnf-index-ref"], target_vdu_id)  # TODO add vdu_index
                     result_dict = await self.RO.create_action(
@@ -1176,6 +1178,12 @@ class NsLcm(LcmBase):
             self.logger.debug(logging_text + step)
             await self.n2vc.install_configuration_sw(ee_id=ee_id, artifact_path=artifact_path, db_dict=db_dict)
 
+            # write in db flag of configuration_sw already installed
+            self.update_db_2("nsrs", nsr_id, {db_update_entry + "config_sw_installed": True})
+
+            # add relations for this VCA (wait for other peers related with this VCA)
+            await self._add_vca_relations(logging_text=logging_text, nsr_id=nsr_id, vca_index=vca_index)
+
             # if SSH access is required, then get execution environment SSH public
             if is_proxy_charm:  # if native charm we have waited already to VM be UP
                 pub_key = None
@@ -1209,10 +1217,13 @@ class NsLcm(LcmBase):
             initial_config_primitive_list = config_descriptor.get('initial-config-primitive')
 
             # sort initial config primitives by 'seq'
-            try:
-                initial_config_primitive_list.sort(key=lambda val: int(val['seq']))
-            except Exception as e:
-                self.logger.error(logging_text + step + ": " + str(e))
+            if initial_config_primitive_list:
+                try:
+                    initial_config_primitive_list.sort(key=lambda val: int(val['seq']))
+                except Exception as e:
+                    self.logger.error(logging_text + step + ": " + str(e))
+            else:
+                self.logger.debug(logging_text + step + ": No initial-config-primitive")
 
             # add config if not present for NS charm
             initial_config_primitive_list = self._get_initial_config_primitive_list(initial_config_primitive_list,
@@ -1324,7 +1335,7 @@ class NsLcm(LcmBase):
         except Exception as e:
             self.logger.warn('Error writing all configuration status, ns={}: {}'.format(nsr_id, e))
 
-    def _write_configuration_status(self, nsr_id: str, vca_index: int, status: str,
+    def _write_configuration_status(self, nsr_id: str, vca_index: int, status: str = None,
                                     element_under_configuration: str = None, element_type: str = None):
 
         # self.logger.debug('_write_configuration_status(): vca_index={}, status={}'
@@ -1333,7 +1344,8 @@ class NsLcm(LcmBase):
         try:
             db_path = 'configurationStatus.{}.'.format(vca_index)
             db_dict = dict()
-            db_dict[db_path + 'status'] = status
+            if status:
+                db_dict[db_path + 'status'] = status
             if element_under_configuration:
                 db_dict[db_path + 'elementUnderConfiguration'] = element_under_configuration
             if element_type:
@@ -1343,6 +1355,37 @@ class NsLcm(LcmBase):
             self.logger.warn('Error writing configuration status={}, ns={}, vca_index={}: {}'
                              .format(status, nsr_id, vca_index, e))
 
+    async def do_placement(self, logging_text, db_nslcmop, db_vnfrs):
+        placement_engine = deep_get(db_nslcmop, ('operationParams', 'placement-engine'))
+        if placement_engine == "PLA":
+            self.logger.debug(logging_text + "Invoke placement optimization for nslcmopId={}".format(db_nslcmop['id']))
+            await self.msg.aiowrite("pla", "get_placement", {'nslcmopId': db_nslcmop['_id']}, loop=self.loop)
+            db_poll_interval = 5
+            wait = db_poll_interval * 4
+            pla_result = None
+            while not pla_result and wait >= 0:
+                await asyncio.sleep(db_poll_interval)
+                wait -= db_poll_interval
+                db_nslcmop = self.db.get_one("nslcmops", {"_id": db_nslcmop["_id"]})
+                pla_result = deep_get(db_nslcmop, ('_admin', 'pla'))
+
+            if not pla_result:
+                raise LcmException("Placement timeout for nslcmopId={}".format(db_nslcmop['id']))
+
+            for pla_vnf in pla_result['vnf']:
+                vnfr = db_vnfrs.get(pla_vnf['member-vnf-index'])
+                if not pla_vnf.get('vimAccountId') or not vnfr:
+                    continue
+                self.db.set_one("vnfrs", {"_id": vnfr["_id"]}, {"vim-account-id": pla_vnf['vimAccountId']})
+        return
+
+    def update_nsrs_with_pla_result(self, params):
+        try:
+            nslcmop_id = deep_get(params, ('placement', 'nslcmopId'))
+            self.update_db_2("nslcmops", nslcmop_id, {"_admin.pla": params.get('placement')})
+        except Exception as e:
+            self.logger.warn('Update failed for nslcmop_id={}:{}'.format(nslcmop_id, e))
+
     async def instantiate(self, nsr_id, nslcmop_id):
         """
 
@@ -1437,7 +1480,7 @@ class NsLcm(LcmBase):
                 vnfd_ref = vnfr["vnfd-ref"]                     # vnfd name for this vnf
                 # if we haven't this vnfd, read it from db
                 if vnfd_id not in db_vnfds:
-                    # read from cb
+                    # read from db
                     step = "Getting vnfd={} id='{}' from db".format(vnfd_id, vnfd_ref)
                     self.logger.debug(logging_text + step)
                     vnfd = self.db.get_one("vnfds", {"_id": vnfd_id})
@@ -1490,6 +1533,7 @@ class NsLcm(LcmBase):
                     nsr_id=nsr_id,
                     db_nsr=db_nsr,
                     db_vnfrs=db_vnfrs,
+                    db_vnfds=db_vnfds
                 )
             )
             self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "instantiate_KDUs", task_kdu)
@@ -1760,6 +1804,7 @@ class NsLcm(LcmBase):
                                  exc_info=True)
         finally:
             if exc:
+                instantiated_ok = False
                 if db_nsr:
                     db_nsr_update["detailed-status"] = "ERROR {}: {}".format(step, exc)
                     db_nsr_update["operational-status"] = "failed"
@@ -1816,7 +1861,167 @@ class NsLcm(LcmBase):
             self.logger.debug(logging_text + "Exit")
             self.lcm_tasks.remove("ns", nsr_id, nslcmop_id, "ns_instantiate")
 
-    async def deploy_kdus(self, logging_text, nsr_id, db_nsr, db_vnfrs):
+    async def _add_vca_relations(self, logging_text, nsr_id, vca_index: int, timeout: int = 3600) -> bool:
+
+        # steps:
+        # 1. find all relations for this VCA
+        # 2. wait for other peers related
+        # 3. add relations
+
+        try:
+
+            # STEP 1: find all relations for this VCA
+
+            # read nsr record
+            db_nsr = self.db.get_one("nsrs", {"_id": nsr_id})
+
+            # this VCA data
+            my_vca = deep_get(db_nsr, ('_admin', 'deployed', 'VCA'))[vca_index]
+
+            # read all ns-configuration relations
+            ns_relations = list()
+            db_ns_relations = deep_get(db_nsr, ('nsd', 'ns-configuration', 'relation'))
+            if db_ns_relations:
+                for r in db_ns_relations:
+                    # check if this VCA is in the relation
+                    if my_vca.get('member-vnf-index') in\
+                            (r.get('entities')[0].get('id'), r.get('entities')[1].get('id')):
+                        ns_relations.append(r)
+
+            # read all vnf-configuration relations
+            vnf_relations = list()
+            db_vnfd_list = db_nsr.get('vnfd-id')
+            if db_vnfd_list:
+                for vnfd in db_vnfd_list:
+                    db_vnfd = self.db.get_one("vnfds", {"_id": vnfd})
+                    db_vnf_relations = deep_get(db_vnfd, ('vnf-configuration', 'relation'))
+                    if db_vnf_relations:
+                        for r in db_vnf_relations:
+                            # check if this VCA is in the relation
+                            if my_vca.get('vdu_id') in (r.get('entities')[0].get('id'), r.get('entities')[1].get('id')):
+                                vnf_relations.append(r)
+
+            # if no relations, terminate
+            if not ns_relations and not vnf_relations:
+                self.logger.debug(logging_text + ' No relations')
+                return True
+
+            self.logger.debug(logging_text + ' adding relations\n    {}\n    {}'.format(ns_relations, vnf_relations))
+
+            # add all relations
+            start = time()
+            while True:
+                # check timeout
+                now = time()
+                if now - start >= timeout:
+                    self.logger.error(logging_text + ' : timeout adding relations')
+                    return False
+
+                # reload nsr from database (we need to update record: _admin.deloyed.VCA)
+                db_nsr = self.db.get_one("nsrs", {"_id": nsr_id})
+
+                # for each defined NS relation, find the VCA's related
+                for r in ns_relations:
+                    from_vca_ee_id = None
+                    to_vca_ee_id = None
+                    from_vca_endpoint = None
+                    to_vca_endpoint = None
+                    vca_list = deep_get(db_nsr, ('_admin', 'deployed', 'VCA'))
+                    for vca in vca_list:
+                        if vca.get('member-vnf-index') == r.get('entities')[0].get('id') \
+                                and vca.get('config_sw_installed'):
+                            from_vca_ee_id = vca.get('ee_id')
+                            from_vca_endpoint = r.get('entities')[0].get('endpoint')
+                        if vca.get('member-vnf-index') == r.get('entities')[1].get('id') \
+                                and vca.get('config_sw_installed'):
+                            to_vca_ee_id = vca.get('ee_id')
+                            to_vca_endpoint = r.get('entities')[1].get('endpoint')
+                    if from_vca_ee_id and to_vca_ee_id:
+                        # add relation
+                        await self.n2vc.add_relation(
+                            ee_id_1=from_vca_ee_id,
+                            ee_id_2=to_vca_ee_id,
+                            endpoint_1=from_vca_endpoint,
+                            endpoint_2=to_vca_endpoint)
+                        # remove entry from relations list
+                        ns_relations.remove(r)
+                    else:
+                        # check failed peers
+                        try:
+                            vca_status_list = db_nsr.get('configurationStatus')
+                            if vca_status_list:
+                                for i in range(len(vca_list)):
+                                    vca = vca_list[i]
+                                    vca_status = vca_status_list[i]
+                                    if vca.get('member-vnf-index') == r.get('entities')[0].get('id'):
+                                        if vca_status.get('status') == 'BROKEN':
+                                            # peer broken: remove relation from list
+                                            ns_relations.remove(r)
+                                    if vca.get('member-vnf-index') == r.get('entities')[1].get('id'):
+                                        if vca_status.get('status') == 'BROKEN':
+                                            # peer broken: remove relation from list
+                                            ns_relations.remove(r)
+                        except Exception:
+                            # ignore
+                            pass
+
+                # for each defined VNF relation, find the VCA's related
+                for r in vnf_relations:
+                    from_vca_ee_id = None
+                    to_vca_ee_id = None
+                    from_vca_endpoint = None
+                    to_vca_endpoint = None
+                    vca_list = deep_get(db_nsr, ('_admin', 'deployed', 'VCA'))
+                    for vca in vca_list:
+                        if vca.get('vdu_id') == r.get('entities')[0].get('id') and vca.get('config_sw_installed'):
+                            from_vca_ee_id = vca.get('ee_id')
+                            from_vca_endpoint = r.get('entities')[0].get('endpoint')
+                        if vca.get('vdu_id') == r.get('entities')[1].get('id') and vca.get('config_sw_installed'):
+                            to_vca_ee_id = vca.get('ee_id')
+                            to_vca_endpoint = r.get('entities')[1].get('endpoint')
+                    if from_vca_ee_id and to_vca_ee_id:
+                        # add relation
+                        await self.n2vc.add_relation(
+                            ee_id_1=from_vca_ee_id,
+                            ee_id_2=to_vca_ee_id,
+                            endpoint_1=from_vca_endpoint,
+                            endpoint_2=to_vca_endpoint)
+                        # remove entry from relations list
+                        vnf_relations.remove(r)
+                    else:
+                        # check failed peers
+                        try:
+                            vca_status_list = db_nsr.get('configurationStatus')
+                            if vca_status_list:
+                                for i in range(len(vca_list)):
+                                    vca = vca_list[i]
+                                    vca_status = vca_status_list[i]
+                                    if vca.get('vdu_id') == r.get('entities')[0].get('id'):
+                                        if vca_status.get('status') == 'BROKEN':
+                                            # peer broken: remove relation from list
+                                            ns_relations.remove(r)
+                                    if vca.get('vdu_id') == r.get('entities')[1].get('id'):
+                                        if vca_status.get('status') == 'BROKEN':
+                                            # peer broken: remove relation from list
+                                            ns_relations.remove(r)
+                        except Exception:
+                            # ignore
+                            pass
+
+                # wait for next try
+                await asyncio.sleep(5.0)
+
+                if not ns_relations and not vnf_relations:
+                    self.logger.debug('Relations added')
+                    break
+
+            return True
+
+        except Exception as e:
+            self.logger.warn(logging_text + ' ERROR adding relations: {}'.format(e))
+            return False
+
+    async def deploy_kdus(self, logging_text, nsr_id, db_nsr, db_vnfrs, db_vnfds):
         # Launch kdus if present in the descriptor
 
         deployed_ok = True
@@ -1852,6 +2057,8 @@ class NsLcm(LcmBase):
                     k8sclustertype = None
                     error_text = None
                     cluster_uuid = None
+                    vnfd_id = vnfr_data.get('vnfd-id')
+                    pkgdir = deep_get(db_vnfds.get(vnfd_id), ('_admin', 'storage', 'pkg-dir'))
                     if kdur.get("helm-chart"):
                         kdumodel = kdur["helm-chart"]
                         k8sclustertype = "chart"
@@ -1863,9 +2070,37 @@ class NsLcm(LcmBase):
                     else:
                         error_text = "kdu type is neither helm-chart nor juju-bundle. Maybe an old NBI version is" \
                                      " running"
+                    # check if kdumodel is a file and exists
+                    try:
+                        # path format: /vnfdid/pkkdir/kdumodel
+                        filename = '{}/{}/{}s/{}'.format(vnfd_id, pkgdir, k8sclustertype_full, kdumodel)
+                        if self.fs.file_exists(filename, mode='file') or self.fs.file_exists(filename, mode='dir'):
+                            kdumodel = self.fs.path + filename
+                    except Exception:
+                        # it is not a file
+                        pass
+
+                    step = "Prepare instantiate KDU {} in k8s cluster {}".format(
+                        kdur["kdu-name"], kdur["k8s-cluster"]["id"])
+
                     try:
                         if not error_text:
                             cluster_uuid = _get_cluster_id(kdur["k8s-cluster"]["id"], k8sclustertype_full)
+
+                            updated_cluster_list = []
+                            if k8sclustertype == "chart" and cluster_uuid not in updated_cluster_list:
+                                del_repo_list, added_repo_dict = await asyncio.ensure_future(
+                                    self.k8sclusterhelm.synchronize_repos(cluster_uuid=cluster_uuid))
+                                if del_repo_list or added_repo_dict:
+                                    unset = {'_admin.helm_charts_added.' + item: None for item in del_repo_list}
+                                    updated = {'_admin.helm_charts_added.' +
+                                               item: name for item, name in added_repo_dict.items()}
+                                    self.logger.debug(logging_text + "repos synchronized, to_delete: {}, to_add: {}".
+                                                      format(del_repo_list, added_repo_dict))
+                                    self.db.set_one("k8sclusters", {"_id": kdur["k8s-cluster"]["id"]},
+                                                    updated, unset=unset)
+                                updated_cluster_list.append(cluster_uuid)
+
                     except LcmException as e:
                         error_text = str(e)
                         deployed_ok = False
@@ -1884,6 +2119,7 @@ class NsLcm(LcmBase):
 
                     db_dict = {"collection": "nsrs", "filter": {"_id": nsr_id}, "path": "_admin.deployed.K8s."
                                                                                         "{}".format(index)}
+
                     if k8sclustertype == "chart":
                         task = asyncio.ensure_future(
                             self.k8sclusterhelm.install(cluster_uuid=cluster_uuid, kdu_model=kdumodel, atomic=True,
@@ -1893,37 +2129,39 @@ class NsLcm(LcmBase):
                         task = asyncio.ensure_future(
                             self.k8sclusterjuju.install(cluster_uuid=cluster_uuid, kdu_model=kdumodel,
                                                         atomic=True, params=desc_params,
-                                                        db_dict=db_dict, timeout=600)
+                                                        db_dict=db_dict, timeout=600,
+                                                        kdu_name=kdur["kdu-name"])
                         )
 
                     pending_tasks[task] = "_admin.deployed.K8s.{}.".format(index)
                     index += 1
-            if not pending_tasks:
-                return
-            self.logger.debug(logging_text + 'Waiting for terminate pending tasks...')
-            pending_list = list(pending_tasks.keys())
-            while pending_list:
-                done_list, pending_list = await asyncio.wait(pending_list, timeout=30*60,
-                                                             return_when=asyncio.FIRST_COMPLETED)
-                if not done_list:   # timeout
-                    for task in pending_list:
-                        db_nsr_update[pending_tasks(task) + "detailed-status"] = "Timeout"
-                        deployed_ok = False
-                    break
-                for task in done_list:
-                    exc = task.exception()
-                    if exc:
-                        db_nsr_update[pending_tasks[task] + "detailed-status"] = "{}".format(exc)
-                        deployed_ok = False
-                    else:
-                        db_nsr_update[pending_tasks[task] + "kdu-instance"] = task.result()
+
+            if pending_tasks:
+                self.logger.debug(logging_text + 'Waiting for terminate pending tasks...')
+                pending_list = list(pending_tasks.keys())
+                while pending_list:
+                    done_list, pending_list = await asyncio.wait(pending_list, timeout=30*60,
+                                                                 return_when=asyncio.FIRST_COMPLETED)
+                    if not done_list:   # timeout
+                        for task in pending_list:
+                            db_nsr_update[pending_tasks[task] + "detailed-status"] = "Timeout"
+                            deployed_ok = False
+                        break
+                    for task in done_list:
+                        exc = task.exception()
+                        if exc:
+                            db_nsr_update[pending_tasks[task] + "detailed-status"] = "{}".format(exc)
+                            deployed_ok = False
+                        else:
+                            db_nsr_update[pending_tasks[task] + "kdu-instance"] = task.result()
 
             if not deployed_ok:
                 raise LcmException('Cannot deploy KDUs')
 
         except Exception as e:
-            self.logger.critical(logging_text + "Exit Exception {} while '{}': {}".format(type(e).__name__, step, e))
-            raise LcmException("{} Exit Exception {} while '{}': {}".format(logging_text, type(e).__name__, step, e))
+            msg = "{} Exit Exception {} while '{}': {}".format(logging_text, type(e).__name__, step, e)
+            self.logger.error(msg)
+            raise LcmException(msg)
         finally:
             if db_nsr_update:
                 self.update_db_2("nsrs", nsr_id, db_nsr_update)