Bug Fix - 2305: Automated scaling of Vnf is not happening through metrics collected...
[osm/LCM.git] / osm_lcm / ns.py
index 3628e50..76f0bf5 100644 (file)
@@ -2954,7 +2954,15 @@ class NsLcm(LcmBase):
                 stage[1] = stage[2] = ""
             except asyncio.CancelledError:
                 error_list.append("Cancelled")
-                # TODO cancel all tasks
+                await self._cancel_pending_tasks(logging_text, tasks_dict_info)
+                await self._wait_for_tasks(
+                    logging_text,
+                    tasks_dict_info,
+                    timeout_ns_deploy,
+                    stage,
+                    nslcmop_id,
+                    nsr_id=nsr_id,
+                )
             except Exception as exc:
                 error_list.append(str(exc))
 
@@ -4614,7 +4622,14 @@ class NsLcm(LcmBase):
                 stage[1] = stage[2] = ""
             except asyncio.CancelledError:
                 error_list.append("Cancelled")
-                # TODO cancell all tasks
+                await self._cancel_pending_tasks(logging_text, tasks_dict_info)
+                await self._wait_for_tasks(
+                    logging_text,
+                    tasks_dict_info,
+                    timeout_ns_terminate,
+                    stage,
+                    nslcmop_id,
+                )
             except Exception as exc:
                 error_list.append(str(exc))
             # update status at database
@@ -4777,6 +4792,11 @@ class NsLcm(LcmBase):
             self._write_op_status(nslcmop_id, stage)
         return error_detail_list
 
+    async def _cancel_pending_tasks(self, logging_text, created_tasks_info):
+        for task, name in created_tasks_info.items():
+            self.logger.debug(logging_text + "Cancelling task: " + name)
+            task.cancel()
+
     @staticmethod
     def _map_primitive_params(primitive_desc, params, instantiation_params):
         """
@@ -5883,7 +5903,25 @@ class NsLcm(LcmBase):
                         if result == "FAILED":
                             nslcmop_operation_state = result
                             error_description_nslcmop = detailed_status
+                            old_operational_status = "failed"
                         db_nslcmop_update["detailed-status"] = detailed_status
+                        db_nsr_update["detailed-status"] = detailed_status
+                        scaling_aspect = get_scaling_aspect(latest_vnfd)
+                        scaling_group_desc = db_nsr.get("_admin").get(
+                            "scaling-group", None
+                        )
+                        if scaling_group_desc:
+                            for aspect in scaling_aspect:
+                                scaling_group_id = aspect.get("id")
+                                for scale_index, scaling_group in enumerate(
+                                    scaling_group_desc
+                                ):
+                                    if scaling_group.get("name") == scaling_group_id:
+                                        db_nsr_update[
+                                            "_admin.scaling-group.{}.nb-scale-op".format(
+                                                scale_index
+                                            )
+                                        ] = 0
                         self.logger.debug(
                             logging_text
                             + " step {} Done with result {} {}".format(
@@ -6212,6 +6250,7 @@ class NsLcm(LcmBase):
         old_operational_status = ""
         old_config_status = ""
         nsi_id = None
+        prom_job_name = ""
         try:
             # wait for any previous tasks in process
             step = "Waiting for previous operations to terminate"
@@ -6322,6 +6361,15 @@ class NsLcm(LcmBase):
                         vdud = get_vdu(db_vnfd, vdu_delta["id"])
                         # vdu_index also provides the number of instance of the targeted vdu
                         vdu_count = vdu_index = get_vdur_index(db_vnfr, vdu_delta)
+                        if vdu_index <= len(db_vnfr["vdur"]):
+                            vdu_name_id = db_vnfr["vdur"][vdu_index - 1]["vdu-name"]
+                            prom_job_name = (
+                                db_vnfr["_id"] + vdu_name_id + str(vdu_index - 1)
+                            )
+                            prom_job_name = prom_job_name.replace("_", "")
+                            prom_job_name = prom_job_name.replace("-", "")
+                        else:
+                            prom_job_name = None
                         cloud_init_text = self._get_vdu_cloud_init_content(
                             vdud, db_vnfd
                         )
@@ -7110,7 +7158,69 @@ class NsLcm(LcmBase):
                         db_nsr_update["config-status"] = old_config_status
                         scale_process = None
             # POST-SCALE END
+            # Check if each vnf has exporter for metric collection if so update prometheus job records
+            if scaling_type == "SCALE_OUT":
+                if "exporters-endpoints" in db_vnfd.get("df")[0]:
+                    vnfr_id = db_vnfr["id"]
+                    db_vnfr = self.db.get_one("vnfrs", {"_id": vnfr_id})
+                    exporter_config = db_vnfd.get("df")[0].get("exporters-endpoints")
+                    self.logger.debug("exporter config :{}".format(exporter_config))
+                    artifact_path = "{}/{}/{}".format(
+                        base_folder["folder"],
+                        base_folder["pkg-dir"],
+                        "exporter-endpoint",
+                    )
+                    ee_id = None
+                    ee_config_descriptor = exporter_config
+                    rw_mgmt_ip = await self.wait_vm_up_insert_key_ro(
+                        logging_text,
+                        nsr_id,
+                        vnfr_id,
+                        vdu_id=db_vnfr["vdur"][-1]["vdu-id-ref"],
+                        vdu_index=db_vnfr["vdur"][-1]["count-index"],
+                        user=None,
+                        pub_key=None,
+                    )
+                    self.logger.debug("rw_mgmt_ip:{}".format(rw_mgmt_ip))
+                    self.logger.debug("Artifact_path:{}".format(artifact_path))
+                    vdu_id_for_prom = None
+                    vdu_index_for_prom = None
+                    for x in get_iterable(db_vnfr, "vdur"):
+                        vdu_id_for_prom = x.get("vdu-id-ref")
+                        vdu_index_for_prom = x.get("count-index")
+                    vnfr_id = vnfr_id + vdu_id + str(vdu_index)
+                    vnfr_id = vnfr_id.replace("_", "")
+                    prometheus_jobs = await self.extract_prometheus_scrape_jobs(
+                        ee_id=ee_id,
+                        artifact_path=artifact_path,
+                        ee_config_descriptor=ee_config_descriptor,
+                        vnfr_id=vnfr_id,
+                        nsr_id=nsr_id,
+                        target_ip=rw_mgmt_ip,
+                        element_type="VDU",
+                        vdu_id=vdu_id_for_prom,
+                        vdu_index=vdu_index_for_prom,
+                    )
+
+                    self.logger.debug("Prometheus job:{}".format(prometheus_jobs))
+                    if prometheus_jobs:
+                        db_nsr_update[
+                            "_admin.deployed.prometheus_jobs"
+                        ] = prometheus_jobs
+                        self.update_db_2(
+                            "nsrs",
+                            nsr_id,
+                            db_nsr_update,
+                        )
 
+                        for job in prometheus_jobs:
+                            self.db.set_one(
+                                "prometheus_jobs",
+                                {"job_name": ""},
+                                job,
+                                upsert=True,
+                                fail_on_empty=False,
+                            )
             db_nsr_update[
                 "detailed-status"
             ] = ""  # "scaled {} {}".format(scaling_group, scaling_type)
@@ -7141,16 +7251,31 @@ class NsLcm(LcmBase):
                 exc_info=True,
             )
         finally:
+            error_list = list()
+            if exc:
+                error_list.append(str(exc))
             self._write_ns_status(
                 nsr_id=nsr_id,
                 ns_state=None,
                 current_operation="IDLE",
                 current_operation_id=None,
             )
-            if tasks_dict_info:
-                stage[1] = "Waiting for instantiate pending tasks."
-                self.logger.debug(logging_text + stage[1])
-                exc = await self._wait_for_tasks(
+            try:
+                if tasks_dict_info:
+                    stage[1] = "Waiting for instantiate pending tasks."
+                    self.logger.debug(logging_text + stage[1])
+                    exc = await self._wait_for_tasks(
+                        logging_text,
+                        tasks_dict_info,
+                        self.timeout.ns_deploy,
+                        stage,
+                        nslcmop_id,
+                        nsr_id=nsr_id,
+                    )
+            except asyncio.CancelledError:
+                error_list.append("Cancelled")
+                await self._cancel_pending_tasks(logging_text, tasks_dict_info)
+                await self._wait_for_tasks(
                     logging_text,
                     tasks_dict_info,
                     self.timeout.ns_deploy,
@@ -7158,10 +7283,13 @@ class NsLcm(LcmBase):
                     nslcmop_id,
                     nsr_id=nsr_id,
                 )
-            if exc:
+            if error_list:
+                error_detail = "; ".join(error_list)
                 db_nslcmop_update[
                     "detailed-status"
-                ] = error_description_nslcmop = "FAILED {}: {}".format(step, exc)
+                ] = error_description_nslcmop = "FAILED {}: {}".format(
+                    step, error_detail
+                )
                 nslcmop_operation_state = "FAILED"
                 if db_nsr:
                     db_nsr_update["operational-status"] = old_operational_status
@@ -7175,12 +7303,18 @@ class NsLcm(LcmBase):
                         db_nsr_update[
                             "detailed-status"
                         ] = "FAILED scaling nslcmop={} {}: {}".format(
-                            nslcmop_id, step, exc
+                            nslcmop_id, step, error_detail
                         )
             else:
                 error_description_nslcmop = None
                 nslcmop_operation_state = "COMPLETED"
                 db_nslcmop_update["detailed-status"] = "Done"
+                if scaling_type == "SCALE_IN" and prom_job_name is not None:
+                    self.db.del_one(
+                        "prometheus_jobs",
+                        {"job_name": prom_job_name},
+                        fail_on_empty=False,
+                    )
 
             self._write_op_status(
                 op_id=nslcmop_id,
@@ -7924,10 +8058,25 @@ class NsLcm(LcmBase):
                 exc_info=True,
             )
         finally:
-            if tasks_dict_info:
-                stage[1] = "Waiting for healing pending tasks."
-                self.logger.debug(logging_text + stage[1])
-                exc = await self._wait_for_tasks(
+            error_list = list()
+            if exc:
+                error_list.append(str(exc))
+            try:
+                if tasks_dict_info:
+                    stage[1] = "Waiting for healing pending tasks."
+                    self.logger.debug(logging_text + stage[1])
+                    exc = await self._wait_for_tasks(
+                        logging_text,
+                        tasks_dict_info,
+                        self.timeout.ns_deploy,
+                        stage,
+                        nslcmop_id,
+                        nsr_id=nsr_id,
+                    )
+            except asyncio.CancelledError:
+                error_list.append("Cancelled")
+                await self._cancel_pending_tasks(logging_text, tasks_dict_info)
+                await self._wait_for_tasks(
                     logging_text,
                     tasks_dict_info,
                     self.timeout.ns_deploy,
@@ -7935,17 +8084,22 @@ class NsLcm(LcmBase):
                     nslcmop_id,
                     nsr_id=nsr_id,
                 )
-            if exc:
+            if error_list:
+                error_detail = "; ".join(error_list)
                 db_nslcmop_update[
                     "detailed-status"
-                ] = error_description_nslcmop = "FAILED {}: {}".format(step, exc)
+                ] = error_description_nslcmop = "FAILED {}: {}".format(
+                    step, error_detail
+                )
                 nslcmop_operation_state = "FAILED"
                 if db_nsr:
                     db_nsr_update["operational-status"] = old_operational_status
                     db_nsr_update["config-status"] = old_config_status
                     db_nsr_update[
                         "detailed-status"
-                    ] = "FAILED healing nslcmop={} {}: {}".format(nslcmop_id, step, exc)
+                    ] = "FAILED healing nslcmop={} {}: {}".format(
+                        nslcmop_id, step, error_detail
+                    )
                     for task, task_name in tasks_dict_info.items():
                         if not task.done() or task.cancelled() or task.exception():
                             if task_name.startswith(self.task_name_deploy_vca):
@@ -8685,6 +8839,10 @@ class NsLcm(LcmBase):
         db_nslcmop = None
         db_nslcmop_update = {}
         nslcmop_operation_state = None
+        old_db_update = {}
+        q_filter = {}
+        old_vdu_index = None
+        old_flavor_id = None
         db_nsr_update = {}
         target = {}
         exc = None
@@ -8708,6 +8866,59 @@ class NsLcm(LcmBase):
             )
             db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id})
             operationParams = db_nslcmop.get("operationParams")
+            # Update the VNFRS and NSRS with the requested flavour detail, So that ro tasks can function properly
+            db_nsr = self.db.get_one("nsrs", {"_id": nsr_id})
+            db_flavor = db_nsr.get("flavor")
+            db_flavor_index = str(len(db_flavor))
+            change_vnf_flavor_data = operationParams["changeVnfFlavorData"]
+            flavor_dict = change_vnf_flavor_data["additionalParams"]
+            count_index = flavor_dict["vduCountIndex"]
+            vdu_id_ref = flavor_dict["vduid"]
+            flavor_dict_update = {
+                "id": db_flavor_index,
+                "memory-mb": flavor_dict["virtualMemory"],
+                "name": f"{vdu_id_ref}-{count_index}-flv",
+                "storage-gb": flavor_dict["sizeOfStorage"],
+                "vcpu-count": flavor_dict["numVirtualCpu"],
+            }
+            db_flavor.append(flavor_dict_update)
+            db_update = {}
+            db_update["flavor"] = db_flavor
+            ns_q_filter = {
+                "_id": nsr_id,
+            }
+            self.db.set_one(
+                "nsrs",
+                q_filter=ns_q_filter,
+                update_dict=db_update,
+                fail_on_empty=True,
+            )
+            db_vnfr = self.db.get_one(
+                "vnfrs", {"_id": change_vnf_flavor_data["vnfInstanceId"]}
+            )
+            for vdu_index, vdur in enumerate(db_vnfr.get("vdur", ())):
+                if (
+                    vdur.get("count-index") == count_index
+                    and vdur.get("vdu-id-ref") == vdu_id_ref
+                ):
+                    old_flavor_id = vdur.get("ns-flavor-id", 0)
+                    old_vdu_index = vdu_index
+                    filter_text = {
+                        "_id": change_vnf_flavor_data["vnfInstanceId"],
+                        "vdur.count-index": count_index,
+                        "vdur.vdu-id-ref": vdu_id_ref,
+                    }
+                    q_filter.update(filter_text)
+                    db_update = {}
+                    db_update[
+                        "vdur.{}.ns-flavor-id".format(vdu_index)
+                    ] = db_flavor_index
+                    self.db.set_one(
+                        "vnfrs",
+                        q_filter=q_filter,
+                        update_dict=db_update,
+                        fail_on_empty=True,
+                    )
             target = {}
             target.update(operationParams)
             desc = await self.RO.vertical_scale(nsr_id, target)
@@ -8721,7 +8932,12 @@ class NsLcm(LcmBase):
                 self.timeout.verticalscale,
                 operation="verticalscale",
             )
-        except (ROclient.ROClientException, DbException, LcmException) as e:
+        except (
+            NgRoException,
+            ROclient.ROClientException,
+            DbException,
+            LcmException,
+        ) as e:
             self.logger.error("Exit Exception {}".format(e))
             exc = e
         except asyncio.CancelledError:
@@ -8742,6 +8958,9 @@ class NsLcm(LcmBase):
             if exc:
                 db_nslcmop_update["detailed-status"] = "FAILED {}: {}".format(step, exc)
                 nslcmop_operation_state = "FAILED"
+                old_db_update[
+                    "vdur.{}.ns-flavor-id".format(old_vdu_index)
+                ] = old_flavor_id
             else:
                 nslcmop_operation_state = "COMPLETED"
                 db_nslcmop_update["detailed-status"] = "Done"
@@ -8754,6 +8973,16 @@ class NsLcm(LcmBase):
                 operation_state=nslcmop_operation_state,
                 other_update=db_nslcmop_update,
             )
+            if old_vdu_index and old_db_update != {}:
+                self.logger.critical(
+                    "Reverting Old Flavor -- : {}".format(old_db_update)
+                )
+                self.db.set_one(
+                    "vnfrs",
+                    q_filter=q_filter,
+                    update_dict=old_db_update,
+                    fail_on_empty=True,
+                )
             if nslcmop_operation_state:
                 try:
                     msg = {