fix 931: issue at logging text
[osm/LCM.git] / osm_lcm / ns.py
index 67926c4..47d25be 100644 (file)
@@ -32,6 +32,7 @@ from osm_common.dbbase import DbException
 from osm_common.fsbase import FsException
 
 from n2vc.n2vc_juju_conn import N2VCJujuConnector
+from n2vc.exceptions import N2VCException
 
 from copy import copy, deepcopy
 from http import HTTPStatus
@@ -454,8 +455,7 @@ class NsLcm(LcmBase):
                 if isinstance(vld_params["ns-net"], dict):
                     for vld_id, instance_scenario_id in vld_params["ns-net"].items():
                         RO_vld_ns_net = {"instance_scenario_id": instance_scenario_id, "osm_id": vld_id}
-                if RO_vld_ns_net:
-                    populate_dict(RO_ns_params, ("networks", vld_params["name"], "use-network"), RO_vld_ns_net)            
+                        populate_dict(RO_ns_params, ("networks", vld_params["name"], "use-network"), RO_vld_ns_net)
             if "vnfd-connection-point-ref" in vld_params:
                 for cp_params in vld_params["vnfd-connection-point-ref"]:
                     # look for interface
@@ -881,9 +881,14 @@ class NsLcm(LcmBase):
         ip_address = None
         nb_tries = 0
         target_vdu_id = None
+        ro_retries = 0
 
         while True:
 
+            ro_retries += 1
+            if ro_retries >= 360:  # 1 hour
+                raise LcmException("Not found _admin.deployed.RO.nsr_id for nsr_id: {}".format(nsr_id))
+
             await asyncio.sleep(10, loop=self.loop)
             # wait until NS is deployed at RO
             if not ro_nsr_id:
@@ -895,23 +900,29 @@ class NsLcm(LcmBase):
             # get ip address
             if not target_vdu_id:
                 db_vnfr = self.db.get_one("vnfrs", {"_id": vnfr_id})
-                if not vdu_id:
+
+                if not vdu_id:  # for the VNF case
                     ip_address = db_vnfr.get("ip-address")
                     if not ip_address:
                         continue
-                for vdur in get_iterable(db_vnfr, "vdur"):
-                    if (vdur["vdu-id-ref"] == vdu_id and vdur["count-index"] == vdu_index) or \
-                            (ip_address and vdur.get("ip-address") == ip_address):
-                        if vdur.get("status") == "ACTIVE":
-                            target_vdu_id = vdur["vdu-id-ref"]
-                        elif vdur.get("status") == "ERROR":
-                            raise LcmException("Cannot inject ssh-key because target VM is in error state")
-                        break
-                else:
+                    vdur = next((x for x in get_iterable(db_vnfr, "vdur") if x.get("ip-address") == ip_address), None)
+                else:  # VDU case
+                    vdur = next((x for x in get_iterable(db_vnfr, "vdur")
+                                 if x.get("vdu-id-ref") == vdu_id and x.get("count-index") == vdu_index), None)
+
+                if not vdur:
                     raise LcmException("Not found vnfr_id={}, vdu_index={}, vdu_index={}".format(
                         vnfr_id, vdu_id, vdu_index
                     ))
 
+                if vdur.get("status") == "ACTIVE":
+                    ip_address = vdur.get("ip-address")
+                    if not ip_address:
+                        continue
+                    target_vdu_id = vdur["vdu-id-ref"]
+                elif vdur.get("status") == "ERROR":
+                    raise LcmException("Cannot inject ssh-key because target VM is in error state")
+
             if not target_vdu_id:
                 continue
 
@@ -960,9 +971,6 @@ class NsLcm(LcmBase):
             'filter': {'_id': nsr_id},
             'path': db_update_entry
         }
-        logging_text += "member_vnf_index={} vdu_id={}, vdu_index={} ".format(db_vnfr["member-vnf-index-ref"],
-                                                                              vdu_id, vdu_index)
-
         step = ""
         try:
             vnfr_id = None
@@ -978,7 +986,7 @@ class NsLcm(LcmBase):
                     namespace += ".{}-{}".format(vdu_id, vdu_index or 0)
 
             # Get artifact path
-            artifact_path = "/{}/{}/charms/{}".format(
+            artifact_path = "{}/{}/charms/{}".format(
                 base_folder["folder"],
                 base_folder["pkg-dir"],
                 config_descriptor["juju"]["charm"]
@@ -1106,6 +1114,20 @@ class NsLcm(LcmBase):
             raise Exception("{} {}".format(step, e)) from e
             # TODO raise N2VC exception with 'step' extra information
 
+    def _write_ns_status(self, nsr_id: str, ns_state: str, current_operation: str, current_operation_id: str,
+                         error_description: str = None, error_detail: str = None):
+        try:
+            db_dict = dict()
+            if ns_state:
+                db_dict["nsState"] = ns_state
+            db_dict["currentOperation"] = current_operation
+            db_dict["currentOperationID"] = current_operation_id
+            db_dict["errorDescription"] = error_description
+            db_dict["errorDetail"] = error_detail
+            self.update_db_2("nsrs", nsr_id, db_dict)
+        except Exception as e:
+            self.logger.warn('Error writing NS status: {}'.format(e))
+
     async def instantiate(self, nsr_id, nslcmop_id):
         """
 
@@ -1144,6 +1166,7 @@ class NsLcm(LcmBase):
         db_vnfrs = {}     # vnf's info indexed by member-index
         # n2vc_info = {}
         task_instantiation_list = []
+        task_instantiation_info = {}  # from task to info text
         exc = None
         try:
             # wait for any previous tasks in process
@@ -1152,6 +1175,14 @@ class NsLcm(LcmBase):
 
             # STEP 0: Reading database (nslcmops, nsrs, nsds, vnfrs, vnfds)
 
+            # nsState="BUILDING", currentOperation="INSTANTIATING", currentOperationID=nslcmop_id
+            self._write_ns_status(
+                nsr_id=nsr_id,
+                ns_state="BUILDING",
+                current_operation="INSTANTIATING",
+                current_operation_id=nslcmop_id
+            )
+
             # read from db: operation
             step = "Getting nslcmop={} from db".format(nslcmop_id)
             db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id})
@@ -1226,6 +1257,7 @@ class NsLcm(LcmBase):
                 )
             )
             self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "instantiate_KDUs", task_kdu)
+            task_instantiation_info[task_kdu] = "Deploy KDUs"
             task_instantiation_list.append(task_kdu)
             # n2vc_redesign STEP 1 Get VCA public ssh-key
             # feature 1429. Add n2vc public key to needed VMs
@@ -1248,6 +1280,7 @@ class NsLcm(LcmBase):
                 )
             )
             self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "instantiate_RO", task_ro)
+            task_instantiation_info[task_ro] = "Deploy at VIM"
             task_instantiation_list.append(task_ro)
 
             # n2vc_redesign STEP 3 to 6 Deploy N2VC
@@ -1275,7 +1308,7 @@ class NsLcm(LcmBase):
                 descriptor_config = vnfd.get("vnf-configuration")
                 if descriptor_config and descriptor_config.get("juju"):
                     self._deploy_n2vc(
-                        logging_text=logging_text,
+                        logging_text=logging_text + "member_vnf_index={} ".format(member_vnf_index),
                         db_nsr=db_nsr,
                         db_vnfr=db_vnfr,
                         nslcmop_id=nslcmop_id,
@@ -1290,7 +1323,8 @@ class NsLcm(LcmBase):
                         deploy_params=deploy_params,
                         descriptor_config=descriptor_config,
                         base_folder=base_folder,
-                        task_instantiation_list=task_instantiation_list
+                        task_instantiation_list=task_instantiation_list,
+                        task_instantiation_info=task_instantiation_info
                     )
 
                 # Deploy charms for each VDU that supports one.
@@ -1316,7 +1350,8 @@ class NsLcm(LcmBase):
                         for vdu_index in range(int(vdud.get("count", 1))):
                             # TODO vnfr_params["rw_mgmt_ip"] = vdur["ip-address"]
                             self._deploy_n2vc(
-                                logging_text=logging_text,
+                                logging_text=logging_text + "member_vnf_index={}, vdu_id={}, vdu_index={} ".format(
+                                    member_vnf_index, vdu_id, vdu_index),
                                 db_nsr=db_nsr,
                                 db_vnfr=db_vnfr,
                                 nslcmop_id=nslcmop_id,
@@ -1331,7 +1366,8 @@ class NsLcm(LcmBase):
                                 deploy_params=deploy_params_vdu,
                                 descriptor_config=descriptor_config,
                                 base_folder=base_folder,
-                                task_instantiation_list=task_instantiation_list
+                                task_instantiation_list=task_instantiation_list,
+                                task_instantiation_info=task_instantiation_info
                             )
                 for kdud in get_iterable(vnfd, 'kdu'):
                     kdu_name = kdud["name"]
@@ -1366,7 +1402,8 @@ class NsLcm(LcmBase):
                             deploy_params=deploy_params,
                             descriptor_config=descriptor_config,
                             base_folder=base_folder,
-                            task_instantiation_list=task_instantiation_list
+                            task_instantiation_list=task_instantiation_list,
+                            task_instantiation_info=task_instantiation_info
                         )
 
             # Check if this NS has a charm configuration
@@ -1401,33 +1438,64 @@ class NsLcm(LcmBase):
                     deploy_params=deploy_params,
                     descriptor_config=descriptor_config,
                     base_folder=base_folder,
-                    task_instantiation_list=task_instantiation_list
+                    task_instantiation_list=task_instantiation_list,
+                    task_instantiation_info=task_instantiation_info
                 )
 
             # Wait until all tasks of "task_instantiation_list" have been finished
 
             # while time() <= start_deploy + self.total_deploy_timeout:
-            error_text = None
-            timeout = 3600  # time() - start_deploy
-            task_instantiation_set = set(task_instantiation_list)    # build a set with tasks
-            done = None
-            pending = None
-            if len(task_instantiation_set) > 0:
-                done, pending = await asyncio.wait(task_instantiation_set, timeout=timeout)
-            if pending:
-                error_text = "timeout"
-            for task in done:
-                if task.cancelled():
-                    if not error_text:
-                        error_text = "cancelled"
-                elif task.done():
-                    exc = task.exception()
-                    if exc:
-                        error_text = str(exc)
+            error_text_list = []
+            timeout = 3600
+
+            # let's begin with all OK
+            instantiated_ok = True
+            # let's begin with RO 'running' status (later we can change it)
+            db_nsr_update["operational-status"] = "running"
+            # let's begin with VCA 'configured' status (later we can change it)
+            db_nsr_update["config-status"] = "configured"
+
+            if task_instantiation_list:
+                # wait for all tasks completion
+                done, pending = await asyncio.wait(task_instantiation_list, timeout=timeout)
+
+                for task in pending:
+                    instantiated_ok = False
+                    if task == task_ro:
+                        db_nsr_update["operational-status"] = "failed"
+                    else:
+                        db_nsr_update["config-status"] = "failed"
+                    self.logger.error(logging_text + task_instantiation_info[task] + ": Timeout")
+                    error_text_list.append(task_instantiation_info[task] + ": Timeout")
+                for task in done:
+                    if task.cancelled():
+                        instantiated_ok = False
+                        if task == task_ro:
+                            db_nsr_update["operational-status"] = "failed"
+                        else:
+                            db_nsr_update["config-status"] = "failed"
+                        self.logger.warn(logging_text + task_instantiation_info[task] + ": Cancelled")
+                        error_text_list.append(task_instantiation_info[task] + ": Cancelled")
+                    else:
+                        exc = task.exception()
+                        if exc:
+                            instantiated_ok = False
+                            if task == task_ro:
+                                db_nsr_update["operational-status"] = "failed"
+                            else:
+                                db_nsr_update["config-status"] = "failed"
+                            self.logger.error(logging_text + task_instantiation_info[task] + ": Failed")
+                            if isinstance(exc, (N2VCException, ROclient.ROClientException)):
+                                error_text_list.append(task_instantiation_info[task] + ": {}".format(exc))
+                            else:
+                                exc_traceback = "".join(traceback.format_exception(None, exc, exc.__traceback__))
+                                self.logger.error(logging_text + task_instantiation_info[task] + exc_traceback)
+                                error_text_list.append(task_instantiation_info[task] + ": " + exc_traceback)
+                        else:
+                            self.logger.debug(logging_text + task_instantiation_info[task] + ": Done")
 
-            if error_text:
-                db_nsr_update["config-status"] = "failed"
-                error_text = "fail configuring " + error_text
+            if error_text_list:
+                error_text = "\n".join(error_text_list)
                 db_nsr_update["detailed-status"] = error_text
                 db_nslcmop_update["operationState"] = nslcmop_operation_state = "FAILED_TEMP"
                 db_nslcmop_update["detailed-status"] = error_text
@@ -1437,7 +1505,6 @@ class NsLcm(LcmBase):
                 db_nslcmop_update["operationState"] = nslcmop_operation_state = "COMPLETED"
                 db_nslcmop_update["statusEnteredTime"] = time()
                 db_nslcmop_update["detailed-status"] = "done"
-                db_nsr_update["config-status"] = "configured"
                 db_nsr_update["detailed-status"] = "done"
 
         except (ROclient.ROClientException, DbException, LcmException) as e:
@@ -1455,6 +1522,7 @@ class NsLcm(LcmBase):
                 if db_nsr:
                     db_nsr_update["detailed-status"] = "ERROR {}: {}".format(step, exc)
                     db_nsr_update["operational-status"] = "failed"
+                    db_nsr_update["config-status"] = "failed"
                 if db_nslcmop:
                     db_nslcmop_update["detailed-status"] = "FAILED {}: {}".format(step, exc)
                     db_nslcmop_update["operationState"] = nslcmop_operation_state = "FAILED"
@@ -1465,6 +1533,26 @@ class NsLcm(LcmBase):
                     db_nsr_update["_admin.current-operation"] = None
                     db_nsr_update["_admin.operation-type"] = None
                     self.update_db_2("nsrs", nsr_id, db_nsr_update)
+
+                    # nsState="READY/BROKEN", currentOperation="IDLE", currentOperationID=None
+                    ns_state = None
+                    error_description = None
+                    error_detail = None
+                    if instantiated_ok:
+                        ns_state = "READY"
+                    else:
+                        ns_state = "BROKEN"
+                        error_description = 'Operation: INSTANTIATING.{}, step: {}'.format(nslcmop_id, step)
+                        error_detail = error_text
+                    self._write_ns_status(
+                        nsr_id=nsr_id,
+                        ns_state=ns_state,
+                        current_operation="IDLE",
+                        current_operation_id=None,
+                        error_description=error_description,
+                        error_detail=error_detail
+                    )
+
                 if db_nslcmop_update:
                     self.update_db_2("nslcmops", nslcmop_id, db_nslcmop_update)
             except DbException as e:
@@ -1584,7 +1672,7 @@ class NsLcm(LcmBase):
 
     def _deploy_n2vc(self, logging_text, db_nsr, db_vnfr, nslcmop_id, nsr_id, nsi_id, vnfd_id, vdu_id,
                      kdu_name, member_vnf_index, vdu_index, vdu_name, deploy_params, descriptor_config,
-                     base_folder, task_instantiation_list):
+                     base_folder, task_instantiation_list, task_instantiation_info):
         # launch instantiate_N2VC in a asyncio task and register task object
         # Look where information of this charm is at database <nsrs>._admin.deployed.VCA
         # if not found, create one entry and update database
@@ -1633,6 +1721,7 @@ class NsLcm(LcmBase):
             )
         )
         self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "instantiate_N2VC-{}".format(vca_index), task_n2vc)
+        task_instantiation_info[task_n2vc] = "Deploy VCA {}.{}".format(member_vnf_index or "", vdu_id or "")
         task_instantiation_list.append(task_n2vc)
 
     # Check if this VNFD has a configured terminate action
@@ -1951,6 +2040,13 @@ class NsLcm(LcmBase):
             step = "Waiting for previous operations to terminate"
             await self.lcm_tasks.waitfor_related_HA("ns", 'nslcmops', nslcmop_id)
 
+            self._write_ns_status(
+                nsr_id=nsr_id,
+                ns_state="TERMINATING",
+                current_operation="TERMINATING",
+                current_operation_id=nslcmop_id
+            )
+
             step = "Getting nslcmop={} from db".format(nslcmop_id)
             db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id})
             step = "Getting nsr={} from db".format(nsr_id)
@@ -2125,6 +2221,7 @@ class NsLcm(LcmBase):
                             self.logger.error(logging_text + failed_detail[-1])
 
             if failed_detail:
+                terminate_ok = False
                 self.logger.error(logging_text + " ;".join(failed_detail))
                 db_nsr_update["operational-status"] = "failed"
                 db_nsr_update["detailed-status"] = "Deletion errors " + "; ".join(failed_detail)
@@ -2132,6 +2229,7 @@ class NsLcm(LcmBase):
                 db_nslcmop_update["operationState"] = nslcmop_operation_state = "FAILED"
                 db_nslcmop_update["statusEnteredTime"] = time()
             else:
+                terminate_ok = True
                 db_nsr_update["operational-status"] = "terminated"
                 db_nsr_update["detailed-status"] = "Done"
                 db_nsr_update["_admin.nsState"] = "NOT_INSTANTIATED"
@@ -2163,6 +2261,25 @@ class NsLcm(LcmBase):
                     db_nsr_update["_admin.current-operation"] = None
                     db_nsr_update["_admin.operation-type"] = None
                     self.update_db_2("nsrs", nsr_id, db_nsr_update)
+
+                    if terminate_ok:
+                        ns_state = "IDLE"
+                        error_description = None
+                        error_detail = None
+                    else:
+                        ns_state = "BROKEN"
+                        error_description = 'Operation: TERMINATING.{}, step: {}'.format(nslcmop_id, step)
+                        error_detail = "; ".join(failed_detail)
+
+                    self._write_ns_status(
+                        nsr_id=nsr_id,
+                        ns_state=ns_state,
+                        current_operation="IDLE",
+                        current_operation_id=None,
+                        error_description=error_description,
+                        error_detail=error_detail
+                    )
+
             except DbException as e:
                 self.logger.error(logging_text + "Cannot update database: {}".format(e))
             if nslcmop_operation_state:
@@ -2309,6 +2426,13 @@ class NsLcm(LcmBase):
             step = "Waiting for previous operations to terminate"
             await self.lcm_tasks.waitfor_related_HA('ns', 'nslcmops', nslcmop_id)
 
+            self._write_ns_status(
+                nsr_id=nsr_id,
+                ns_state=None,
+                current_operation="RUNNING ACTION",
+                current_operation_id=nslcmop_id
+            )
+
             step = "Getting information from database"
             db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id})
             db_nsr = self.db.get_one("nsrs", {"_id": nsr_id})
@@ -2352,7 +2476,7 @@ class NsLcm(LcmBase):
                                 break
             elif kdu_name:
                 self.logger.debug(logging_text + "Checking actions in KDUs")
-                kdur = next((x for x in db_vnfr["kdur"] if x["kdu_name"] == kdu_name), None)
+                kdur = next((x for x in db_vnfr["kdur"] if x["kdu-name"] == kdu_name), None)
                 desc_params = self._format_additional_params(kdur.get("additionalParams")) or {}
                 if primitive_params:
                     desc_params.update(primitive_params)
@@ -2501,6 +2625,12 @@ class NsLcm(LcmBase):
                     db_nsr_update["_admin.nslcmop"] = None
                     db_nsr_update["_admin.current-operation"] = None
                     self.update_db_2("nsrs", nsr_id, db_nsr_update)
+                    self._write_ns_status(
+                        nsr_id=nsr_id,
+                        ns_state=None,
+                        current_operation="IDLE",
+                        current_operation_id=None
+                    )
             except DbException as e:
                 self.logger.error(logging_text + "Cannot update database: {}".format(e))
             self.logger.debug(logging_text + "Exit")
@@ -2544,6 +2674,13 @@ class NsLcm(LcmBase):
             step = "Waiting for previous operations to terminate"
             await self.lcm_tasks.waitfor_related_HA('ns', 'nslcmops', nslcmop_id)
 
+            self._write_ns_status(
+                nsr_id=nsr_id,
+                ns_state=None,
+                current_operation="SCALING",
+                current_operation_id=nslcmop_id
+            )
+
             step = "Getting nslcmop from database"
             self.logger.debug(step + " after having waited for previous tasks to be completed")
             db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id})
@@ -2878,7 +3015,7 @@ class NsLcm(LcmBase):
                         # Post-scale reintent check: Check if this sub-operation has been executed before
                         op_index = self._check_or_add_scale_suboperation(
                             db_nslcmop, nslcmop_id, vnf_index, vnf_config_primitive, primitive_params, 'POST-SCALE')
-                        if (op_index == self.SUBOPERATION_STATUS_SKIP):
+                        if op_index == self.SUBOPERATION_STATUS_SKIP:
                             # Skip sub-operation
                             result = 'COMPLETED'
                             result_detail = 'Done'
@@ -2886,7 +3023,7 @@ class NsLcm(LcmBase):
                                               "vnf_config_primitive={} Skipped sub-operation, result {} {}".
                                               format(vnf_config_primitive, result, result_detail))
                         else:
-                            if (op_index == self.SUBOPERATION_STATUS_NEW):
+                            if op_index == self.SUBOPERATION_STATUS_NEW:
                                 # New sub-operation: Get index of this sub-operation
                                 op_index = len(db_nslcmop.get('_admin', {}).get('operations')) - 1
                                 self.logger.debug(logging_text + "vnf_config_primitive={} New sub-operation".
@@ -2957,6 +3094,14 @@ class NsLcm(LcmBase):
                     db_nsr_update["_admin.operation-type"] = None
                     db_nsr_update["_admin.nslcmop"] = None
                     self.update_db_2("nsrs", nsr_id, db_nsr_update)
+
+                    self._write_ns_status(
+                        nsr_id=nsr_id,
+                        ns_state=None,
+                        current_operation="IDLE",
+                        current_operation_id=None
+                    )
+
             except DbException as e:
                 self.logger.error(logging_text + "Cannot update database: {}".format(e))
             if nslcmop_operation_state: