fix 931: No need to wait for VM up on ns configuration
[osm/LCM.git] / osm_lcm / ns.py
index a76c121..6ecb04d 100644 (file)
@@ -455,8 +455,7 @@ class NsLcm(LcmBase):
                 if isinstance(vld_params["ns-net"], dict):
                     for vld_id, instance_scenario_id in vld_params["ns-net"].items():
                         RO_vld_ns_net = {"instance_scenario_id": instance_scenario_id, "osm_id": vld_id}
-                if RO_vld_ns_net:
-                    populate_dict(RO_ns_params, ("networks", vld_params["name"], "use-network"), RO_vld_ns_net)            
+                        populate_dict(RO_ns_params, ("networks", vld_params["name"], "use-network"), RO_vld_ns_net)
             if "vnfd-connection-point-ref" in vld_params:
                 for cp_params in vld_params["vnfd-connection-point-ref"]:
                     # look for interface
@@ -629,15 +628,16 @@ class NsLcm(LcmBase):
             else:
                 raise LcmException("ns_update_vnfr: Not found member_vnf_index={} from VIM info".format(vnf_index))
 
-    @staticmethod
-    def _get_ns_config_info(vca_deployed_list):
+    def _get_ns_config_info(self, nsr_id):
         """
         Generates a mapping between vnf,vdu elements and the N2VC id
-        :param vca_deployed_list: List of database _admin.deploy.VCA that contains this list
+        :param nsr_id: id of nsr to get last  database _admin.deployed.VCA that contains this list
         :return: a dictionary with {osm-config-mapping: {}} where its element contains:
             "<member-vnf-index>": <N2VC-id>  for a vnf configuration, or
             "<member-vnf-index>.<vdu.id>.<vdu replica(0, 1,..)>": <N2VC-id>  for a vdu configuration
         """
+        db_nsr = self.db.get_one("nsrs", {"_id": nsr_id})
+        vca_deployed_list = db_nsr["_admin"]["deployed"]["VCA"]
         mapping = {}
         ns_config_info = {"osm-config-mapping": mapping}
         for vca in vca_deployed_list:
@@ -961,6 +961,33 @@ class NsLcm(LcmBase):
 
         return ip_address
 
+    async def _wait_dependent_n2vc(self, nsr_id, vca_deployed_list, vca_index):
+        """
+        Wait until dependent VCA deployments have been finished. NS wait for VNFs and VDUs. VNFs for VDUs
+        """
+        my_vca = vca_deployed_list[vca_index]
+        if my_vca.get("vdu_id") or my_vca.get("kdu_name"):
+            return
+        timeout = 300
+        while timeout >= 0:
+            for index, vca_deployed in enumerate(vca_deployed_list):
+                if index == vca_index:
+                    continue
+                if not my_vca.get("member-vnf-index") or \
+                        (vca_deployed.get("member-vnf-index") == my_vca.get("member-vnf-index")):
+                    if not vca_deployed.get("instantiation"):
+                        break   # wait
+                    if vca_deployed["instantiation"] == "FAILED":
+                        raise LcmException("Configuration aborted because dependent charm/s has failed")
+            else:
+                return
+            await asyncio.sleep(10)
+            timeout -= 1
+            db_nsr = self.db.get_one("nsrs", {"_id": nsr_id})
+            vca_deployed_list = db_nsr["_admin"]["deployed"]["VCA"]
+
+        raise LcmException("Configuration aborted because dependent charm/s timeout")
+
     async def instantiate_N2VC(self, logging_text, vca_index, nsi_id, db_nsr, db_vnfr, vdu_id,
                                kdu_name, vdu_index, config_descriptor, deploy_params, base_folder):
         nsr_id = db_nsr["_id"]
@@ -972,9 +999,6 @@ class NsLcm(LcmBase):
             'filter': {'_id': nsr_id},
             'path': db_update_entry
         }
-        logging_text += "member_vnf_index={} vdu_id={}, vdu_index={} ".format(db_vnfr["member-vnf-index-ref"],
-                                                                              vdu_id, vdu_index)
-
         step = ""
         try:
             vnfr_id = None
@@ -1072,8 +1096,11 @@ class NsLcm(LcmBase):
 
                 # n2vc_redesign STEP 5.1
                 # wait for RO (ip-address) Insert pub_key into VM
-                rw_mgmt_ip = await self.wait_vm_up_insert_key_ro(logging_text, nsr_id, vnfr_id, vdu_id, vdu_index,
-                                                                 user=user, pub_key=pub_key)
+                if vnfr_id:
+                    rw_mgmt_ip = await self.wait_vm_up_insert_key_ro(logging_text, nsr_id, vnfr_id, vdu_id, vdu_index,
+                                                                     user=user, pub_key=pub_key)
+                else:
+                    rw_mgmt_ip = None   # This is for a NS configuration
 
                 self.logger.debug(logging_text + ' VM_ip_address={}'.format(rw_mgmt_ip))
 
@@ -1093,11 +1120,12 @@ class NsLcm(LcmBase):
             # add config if not present for NS charm
             initial_config_primitive_list = self._get_initial_config_primitive_list(initial_config_primitive_list,
                                                                                     vca_deployed)
-
+            if initial_config_primitive_list:
+                await self._wait_dependent_n2vc(nsr_id, vca_deployed_list, vca_index)
             for initial_config_primitive in initial_config_primitive_list:
                 # adding information on the vca_deployed if it is a NS execution environment
                 if not vca_deployed["member-vnf-index"]:
-                    deploy_params["ns_config_info"] = self._get_ns_config_info(vca_deployed_list)
+                    deploy_params["ns_config_info"] = self._get_ns_config_info(nsr_id)
                 # TODO check if already done
                 primitive_params_ = self._map_primitive_params(initial_config_primitive, {}, deploy_params)
 
@@ -1112,12 +1140,28 @@ class NsLcm(LcmBase):
                 # TODO register in database that primitive is done
 
             step = "instantiated at VCA"
+            self.update_db_2("nsrs", nsr_id, {db_update_entry + "instantiation": "COMPLETED"})
             self.logger.debug(logging_text + step)
 
         except Exception as e:  # TODO not use Exception but N2VC exception
+            self.update_db_2("nsrs", nsr_id, {db_update_entry + "instantiation": "FAILED"})
             raise Exception("{} {}".format(step, e)) from e
             # TODO raise N2VC exception with 'step' extra information
 
+    def _write_ns_status(self, nsr_id: str, ns_state: str, current_operation: str, current_operation_id: str,
+                         error_description: str = None, error_detail: str = None):
+        try:
+            db_dict = dict()
+            if ns_state:
+                db_dict["nsState"] = ns_state
+            db_dict["currentOperation"] = current_operation
+            db_dict["currentOperationID"] = current_operation_id
+            db_dict["errorDescription"] = error_description
+            db_dict["errorDetail"] = error_detail
+            self.update_db_2("nsrs", nsr_id, db_dict)
+        except Exception as e:
+            self.logger.warn('Error writing NS status: {}'.format(e))
+
     async def instantiate(self, nsr_id, nslcmop_id):
         """
 
@@ -1165,6 +1209,14 @@ class NsLcm(LcmBase):
 
             # STEP 0: Reading database (nslcmops, nsrs, nsds, vnfrs, vnfds)
 
+            # nsState="BUILDING", currentOperation="INSTANTIATING", currentOperationID=nslcmop_id
+            self._write_ns_status(
+                nsr_id=nsr_id,
+                ns_state="BUILDING",
+                current_operation="INSTANTIATING",
+                current_operation_id=nslcmop_id
+            )
+
             # read from db: operation
             step = "Getting nslcmop={} from db".format(nslcmop_id)
             db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id})
@@ -1290,7 +1342,7 @@ class NsLcm(LcmBase):
                 descriptor_config = vnfd.get("vnf-configuration")
                 if descriptor_config and descriptor_config.get("juju"):
                     self._deploy_n2vc(
-                        logging_text=logging_text,
+                        logging_text=logging_text + "member_vnf_index={} ".format(member_vnf_index),
                         db_nsr=db_nsr,
                         db_vnfr=db_vnfr,
                         nslcmop_id=nslcmop_id,
@@ -1332,7 +1384,8 @@ class NsLcm(LcmBase):
                         for vdu_index in range(int(vdud.get("count", 1))):
                             # TODO vnfr_params["rw_mgmt_ip"] = vdur["ip-address"]
                             self._deploy_n2vc(
-                                logging_text=logging_text,
+                                logging_text=logging_text + "member_vnf_index={}, vdu_id={}, vdu_index={} ".format(
+                                    member_vnf_index, vdu_id, vdu_index),
                                 db_nsr=db_nsr,
                                 db_vnfr=db_vnfr,
                                 nslcmop_id=nslcmop_id,
@@ -1427,27 +1480,56 @@ class NsLcm(LcmBase):
 
             # while time() <= start_deploy + self.total_deploy_timeout:
             error_text_list = []
-            timeout = 3600  # time() - start_deploy
+            timeout = 3600
+
+            # let's begin with all OK
+            instantiated_ok = True
+            # let's begin with RO 'running' status (later we can change it)
+            db_nsr_update["operational-status"] = "running"
+            # let's begin with VCA 'configured' status (later we can change it)
+            db_nsr_update["config-status"] = "configured"
+
             if task_instantiation_list:
+                # wait for all tasks completion
                 done, pending = await asyncio.wait(task_instantiation_list, timeout=timeout)
-                if pending:
-                    for task in pending:
-                        error_text_list.append(task_instantiation_info[task] + ": Timeout")
+
+                for task in pending:
+                    instantiated_ok = False
+                    if task == task_ro:
+                        db_nsr_update["operational-status"] = "failed"
+                    else:
+                        db_nsr_update["config-status"] = "failed"
+                    self.logger.error(logging_text + task_instantiation_info[task] + ": Timeout")
+                    error_text_list.append(task_instantiation_info[task] + ": Timeout")
                 for task in done:
                     if task.cancelled():
+                        instantiated_ok = False
+                        if task == task_ro:
+                            db_nsr_update["operational-status"] = "failed"
+                        else:
+                            db_nsr_update["config-status"] = "failed"
+                        self.logger.warn(logging_text + task_instantiation_info[task] + ": Cancelled")
                         error_text_list.append(task_instantiation_info[task] + ": Cancelled")
-                    elif task.done():
+                    else:
                         exc = task.exception()
                         if exc:
+                            instantiated_ok = False
+                            if task == task_ro:
+                                db_nsr_update["operational-status"] = "failed"
+                            else:
+                                db_nsr_update["config-status"] = "failed"
+                            self.logger.error(logging_text + task_instantiation_info[task] + ": Failed")
                             if isinstance(exc, (N2VCException, ROclient.ROClientException)):
                                 error_text_list.append(task_instantiation_info[task] + ": {}".format(exc))
                             else:
-                                error_text_list.append(task_instantiation_info[task] + ": " + "".
-                                                       join(traceback.format_exception(None, exc, exc.__traceback__)))
+                                exc_traceback = "".join(traceback.format_exception(None, exc, exc.__traceback__))
+                                self.logger.error(logging_text + task_instantiation_info[task] + exc_traceback)
+                                error_text_list.append(task_instantiation_info[task] + ": " + exc_traceback)
+                        else:
+                            self.logger.debug(logging_text + task_instantiation_info[task] + ": Done")
 
             if error_text_list:
                 error_text = "\n".join(error_text_list)
-                db_nsr_update["config-status"] = "failed"
                 db_nsr_update["detailed-status"] = error_text
                 db_nslcmop_update["operationState"] = nslcmop_operation_state = "FAILED_TEMP"
                 db_nslcmop_update["detailed-status"] = error_text
@@ -1457,7 +1539,6 @@ class NsLcm(LcmBase):
                 db_nslcmop_update["operationState"] = nslcmop_operation_state = "COMPLETED"
                 db_nslcmop_update["statusEnteredTime"] = time()
                 db_nslcmop_update["detailed-status"] = "done"
-                db_nsr_update["config-status"] = "configured"
                 db_nsr_update["detailed-status"] = "done"
 
         except (ROclient.ROClientException, DbException, LcmException) as e:
@@ -1475,6 +1556,7 @@ class NsLcm(LcmBase):
                 if db_nsr:
                     db_nsr_update["detailed-status"] = "ERROR {}: {}".format(step, exc)
                     db_nsr_update["operational-status"] = "failed"
+                    db_nsr_update["config-status"] = "failed"
                 if db_nslcmop:
                     db_nslcmop_update["detailed-status"] = "FAILED {}: {}".format(step, exc)
                     db_nslcmop_update["operationState"] = nslcmop_operation_state = "FAILED"
@@ -1485,6 +1567,26 @@ class NsLcm(LcmBase):
                     db_nsr_update["_admin.current-operation"] = None
                     db_nsr_update["_admin.operation-type"] = None
                     self.update_db_2("nsrs", nsr_id, db_nsr_update)
+
+                    # nsState="READY/BROKEN", currentOperation="IDLE", currentOperationID=None
+                    ns_state = None
+                    error_description = None
+                    error_detail = None
+                    if instantiated_ok:
+                        ns_state = "READY"
+                    else:
+                        ns_state = "BROKEN"
+                        error_description = 'Operation: INSTANTIATING.{}, step: {}'.format(nslcmop_id, step)
+                        error_detail = error_text
+                    self._write_ns_status(
+                        nsr_id=nsr_id,
+                        ns_state=ns_state,
+                        current_operation="IDLE",
+                        current_operation_id=None,
+                        error_description=error_description,
+                        error_detail=error_detail
+                    )
+
                 if db_nslcmop_update:
                     self.update_db_2("nslcmops", nslcmop_id, db_nslcmop_update)
             except DbException as e:
@@ -1972,6 +2074,13 @@ class NsLcm(LcmBase):
             step = "Waiting for previous operations to terminate"
             await self.lcm_tasks.waitfor_related_HA("ns", 'nslcmops', nslcmop_id)
 
+            self._write_ns_status(
+                nsr_id=nsr_id,
+                ns_state="TERMINATING",
+                current_operation="TERMINATING",
+                current_operation_id=nslcmop_id
+            )
+
             step = "Getting nslcmop={} from db".format(nslcmop_id)
             db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id})
             step = "Getting nsr={} from db".format(nsr_id)
@@ -2146,6 +2255,7 @@ class NsLcm(LcmBase):
                             self.logger.error(logging_text + failed_detail[-1])
 
             if failed_detail:
+                terminate_ok = False
                 self.logger.error(logging_text + " ;".join(failed_detail))
                 db_nsr_update["operational-status"] = "failed"
                 db_nsr_update["detailed-status"] = "Deletion errors " + "; ".join(failed_detail)
@@ -2153,6 +2263,7 @@ class NsLcm(LcmBase):
                 db_nslcmop_update["operationState"] = nslcmop_operation_state = "FAILED"
                 db_nslcmop_update["statusEnteredTime"] = time()
             else:
+                terminate_ok = True
                 db_nsr_update["operational-status"] = "terminated"
                 db_nsr_update["detailed-status"] = "Done"
                 db_nsr_update["_admin.nsState"] = "NOT_INSTANTIATED"
@@ -2184,6 +2295,25 @@ class NsLcm(LcmBase):
                     db_nsr_update["_admin.current-operation"] = None
                     db_nsr_update["_admin.operation-type"] = None
                     self.update_db_2("nsrs", nsr_id, db_nsr_update)
+
+                    if terminate_ok:
+                        ns_state = "IDLE"
+                        error_description = None
+                        error_detail = None
+                    else:
+                        ns_state = "BROKEN"
+                        error_description = 'Operation: TERMINATING.{}, step: {}'.format(nslcmop_id, step)
+                        error_detail = "; ".join(failed_detail)
+
+                    self._write_ns_status(
+                        nsr_id=nsr_id,
+                        ns_state=ns_state,
+                        current_operation="IDLE",
+                        current_operation_id=None,
+                        error_description=error_description,
+                        error_detail=error_detail
+                    )
+
             except DbException as e:
                 self.logger.error(logging_text + "Cannot update database: {}".format(e))
             if nslcmop_operation_state:
@@ -2330,6 +2460,13 @@ class NsLcm(LcmBase):
             step = "Waiting for previous operations to terminate"
             await self.lcm_tasks.waitfor_related_HA('ns', 'nslcmops', nslcmop_id)
 
+            self._write_ns_status(
+                nsr_id=nsr_id,
+                ns_state=None,
+                current_operation="RUNNING ACTION",
+                current_operation_id=nslcmop_id
+            )
+
             step = "Getting information from database"
             db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id})
             db_nsr = self.db.get_one("nsrs", {"_id": nsr_id})
@@ -2522,6 +2659,12 @@ class NsLcm(LcmBase):
                     db_nsr_update["_admin.nslcmop"] = None
                     db_nsr_update["_admin.current-operation"] = None
                     self.update_db_2("nsrs", nsr_id, db_nsr_update)
+                    self._write_ns_status(
+                        nsr_id=nsr_id,
+                        ns_state=None,
+                        current_operation="IDLE",
+                        current_operation_id=None
+                    )
             except DbException as e:
                 self.logger.error(logging_text + "Cannot update database: {}".format(e))
             self.logger.debug(logging_text + "Exit")
@@ -2565,6 +2708,13 @@ class NsLcm(LcmBase):
             step = "Waiting for previous operations to terminate"
             await self.lcm_tasks.waitfor_related_HA('ns', 'nslcmops', nslcmop_id)
 
+            self._write_ns_status(
+                nsr_id=nsr_id,
+                ns_state=None,
+                current_operation="SCALING",
+                current_operation_id=nslcmop_id
+            )
+
             step = "Getting nslcmop from database"
             self.logger.debug(step + " after having waited for previous tasks to be completed")
             db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id})
@@ -2899,7 +3049,7 @@ class NsLcm(LcmBase):
                         # Post-scale reintent check: Check if this sub-operation has been executed before
                         op_index = self._check_or_add_scale_suboperation(
                             db_nslcmop, nslcmop_id, vnf_index, vnf_config_primitive, primitive_params, 'POST-SCALE')
-                        if (op_index == self.SUBOPERATION_STATUS_SKIP):
+                        if op_index == self.SUBOPERATION_STATUS_SKIP:
                             # Skip sub-operation
                             result = 'COMPLETED'
                             result_detail = 'Done'
@@ -2907,7 +3057,7 @@ class NsLcm(LcmBase):
                                               "vnf_config_primitive={} Skipped sub-operation, result {} {}".
                                               format(vnf_config_primitive, result, result_detail))
                         else:
-                            if (op_index == self.SUBOPERATION_STATUS_NEW):
+                            if op_index == self.SUBOPERATION_STATUS_NEW:
                                 # New sub-operation: Get index of this sub-operation
                                 op_index = len(db_nslcmop.get('_admin', {}).get('operations')) - 1
                                 self.logger.debug(logging_text + "vnf_config_primitive={} New sub-operation".
@@ -2978,6 +3128,14 @@ class NsLcm(LcmBase):
                     db_nsr_update["_admin.operation-type"] = None
                     db_nsr_update["_admin.nslcmop"] = None
                     self.update_db_2("nsrs", nsr_id, db_nsr_update)
+
+                    self._write_ns_status(
+                        nsr_id=nsr_id,
+                        ns_state=None,
+                        current_operation="IDLE",
+                        current_operation_id=None
+                    )
+
             except DbException as e:
                 self.logger.error(logging_text + "Cannot update database: {}".format(e))
             if nslcmop_operation_state: