Prepare LCM tasks for HA
[osm/LCM.git] / osm_lcm / ns.py
index ea70a84..a04f0a8 100644 (file)
@@ -628,6 +628,12 @@ class NsLcm(LcmBase):
                 raise LcmException("ns_update_vnfr: Not found member_vnf_index={} at RO info".format(vnf_index))
 
     async def instantiate(self, nsr_id, nslcmop_id):
+
+        # Try to lock HA task here
+        task_is_locked_by_me = self.lcm_tasks.lock_HA('ns', 'nslcmops', nslcmop_id)
+        if not task_is_locked_by_me:
+            return
+
         logging_text = "Task ns={} instantiate={} ".format(nsr_id, nslcmop_id)
         self.logger.debug(logging_text + "Enter")
         # get all needed from database
@@ -644,6 +650,9 @@ class NsLcm(LcmBase):
         n2vc_key_list = []  # list of public keys to be injected as authorized to VMs
         exc = None
         try:
+            # wait for any previous tasks in process
+            await self.lcm_tasks.waitfor_related_HA('ns', 'nslcmops', nslcmop_id)
+
             step = "Getting nslcmop={} from db".format(nslcmop_id)
             db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id})
             step = "Getting nsr={} from db".format(nsr_id)
@@ -652,17 +661,6 @@ class NsLcm(LcmBase):
             nsd = db_nsr["nsd"]
             nsr_name = db_nsr["name"]   # TODO short-name??
 
-            # look if previous tasks in process
-            task_name, task_dependency = self.lcm_tasks.lookfor_related("ns", nsr_id, nslcmop_id)
-            if task_dependency:
-                step = db_nslcmop_update["detailed-status"] = \
-                    "Waiting for related tasks to be completed: {}".format(task_name)
-                self.logger.debug(logging_text + step)
-                self.update_db_2("nslcmops", nslcmop_id, db_nslcmop_update)
-                _, pending = await asyncio.wait(task_dependency, timeout=3600)
-                if pending:
-                    raise LcmException("Timeout waiting related tasks to be completed")
-
             step = "Getting vnfrs from db"
             db_vnfrs_list = self.db.get_list("vnfrs", {"nsr-id-ref": nsr_id})
             db_vnfds_ref = {}
@@ -1208,8 +1206,8 @@ class NsLcm(LcmBase):
                         for vdu_index, vdu in enumerate(get_iterable(vnfd, 'vdu')):
                             if vdu["id"] == vdu_id:
                                 initial_config_primitive_list = vdu['vdu-configuration'].get(
-                                    'initial-config-primitive', ())
-                            break
+                                    'initial-config-primitive', [])
+                                break
                         else:
                             raise LcmException("Not found vdu_id={} at vnfd:vdu".format(vdu_id))
                         vdur = db_vnfrs[vnf_index]["vdur"][vdu_index]
@@ -1220,7 +1218,7 @@ class NsLcm(LcmBase):
                         add_params["rw_mgmt_ip"] = vdur["ip-address"]
                     else:
                         add_params["rw_mgmt_ip"] = db_vnfrs[vnf_index]["ip-address"]
-                        initial_config_primitive_list = vnfd["vnf-configuration"].get('initial-config-primitive', ())
+                        initial_config_primitive_list = vnfd["vnf-configuration"].get('initial-config-primitive', [])
                 else:
                     if db_nsr.get("additionalParamsForNs"):
                         add_params = db_nsr["additionalParamsForNs"].copy()
@@ -1228,19 +1226,25 @@ class NsLcm(LcmBase):
                         if isinstance(v, str) and v.startswith("!!yaml "):
                             add_params[k] = yaml.safe_load(v[7:])
                     add_params["rw_mgmt_ip"] = None
+                    initial_config_primitive_list = nsd["ns-configuration"].get('initial-config-primitive', [])
 
                 # add primitive verify-ssh-credentials to the list after config only when is a vnf or vdu charm
                 initial_config_primitive_list = initial_config_primitive_list.copy()
                 if initial_config_primitive_list and vnf_index:
-                    initial_config_primitive_list.insert(1, {"name": "verify-ssh-credentials", "paramter": []})
+                    initial_config_primitive_list.insert(1, {"name": "verify-ssh-credentials", "parameter": []})
 
                 for initial_config_primitive in initial_config_primitive_list:
+                    primitive_params_ = self._map_primitive_params(initial_config_primitive, {}, add_params)
+                    self.logger.debug(logging_text + step + " primitive '{}' params '{}'"
+                                      .format(initial_config_primitive["name"], primitive_params_))
                     primitive_result, primitive_detail = await self._ns_execute_primitive(
                         db_nsr["_admin"]["deployed"], vnf_index, vdu_id, vdu_name, vdu_count_index,
                         initial_config_primitive["name"],
-                        self._map_primitive_params(initial_config_primitive, {}, add_params))
+                        primitive_params_,
+                        retries=10 if initial_config_primitive["name"] == "verify-ssh-credentials" else 0,
+                        retries_interval=30)
                     if primitive_result != "COMPLETED":
-                        raise LcmException("charm error executing primitive {} for  member_vnf_index={} vdu_id={}: '{}'"
+                        raise LcmException("charm error executing primitive {} for member_vnf_index={} vdu_id={}: '{}'"
                                            .format(initial_config_primitive["name"], vca_deployed["member-vnf-index"],
                                                    vca_deployed["vdu_id"], primitive_detail))
 
@@ -1391,6 +1395,7 @@ class NsLcm(LcmBase):
             # waiting all charms are ok
             configuration_failed = False
             if number_to_configure:
+                step = "Waiting all charms are active"
                 old_status = "configuring: init: {}".format(number_to_configure)
                 db_nsr_update["config-status"] = old_status
                 db_nsr_update["detailed-status"] = old_status
@@ -1643,6 +1648,12 @@ class NsLcm(LcmBase):
                             vnf_index, seq.get("name"), nslcmop_operation_state_detail))
 
     async def terminate(self, nsr_id, nslcmop_id):
+
+        # Try to lock HA task here
+        task_is_locked_by_me = self.lcm_tasks.lock_HA('ns', 'nslcmops', nslcmop_id)
+        if not task_is_locked_by_me:
+            return
+
         logging_text = "Task ns={} terminate={} ".format(nsr_id, nslcmop_id)
         self.logger.debug(logging_text + "Enter")
         db_nsr = None
@@ -1655,6 +1666,9 @@ class NsLcm(LcmBase):
         nslcmop_operation_state = None
         autoremove = False  # autoremove after terminated
         try:
+            # wait for any previous tasks in process
+            await self.lcm_tasks.waitfor_related_HA("ns", 'nslcmops', nslcmop_id)
+
             step = "Getting nslcmop={} from db".format(nslcmop_id)
             db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id})
             step = "Getting nsr={} from db".format(nsr_id)
@@ -1926,7 +1940,7 @@ class NsLcm(LcmBase):
         return calculated_params
 
     async def _ns_execute_primitive(self, db_deployed, member_vnf_index, vdu_id, vdu_name, vdu_count_index,
-                                    primitive, primitive_params):
+                                    primitive, primitive_params, retries=0, retries_interval=30):
         start_primitive_time = time()
         try:
             for vca_deployed in db_deployed["VCA"]:
@@ -1956,34 +1970,47 @@ class NsLcm(LcmBase):
             await self.n2vc.login()
             if primitive == "config":
                 primitive_params = {"params": primitive_params}
-            primitive_id = await self.n2vc.ExecutePrimitive(
-                model_name,
-                application_name,
-                primitive,
-                callback,
-                *callback_args,
-                **primitive_params
-            )
-            while time() - start_primitive_time < self.timeout_primitive:
-                primitive_result_ = await self.n2vc.GetPrimitiveStatus(model_name, primitive_id)
-                if primitive_result_ in ("completed", "failed"):
-                    primitive_result = "COMPLETED" if primitive_result_ == "completed" else "FAILED"
-                    detailed_result = await self.n2vc.GetPrimitiveOutput(model_name, primitive_id)
-                    break
-                elif primitive_result_ is None and primitive == "config":
-                    primitive_result = "COMPLETED"
-                    detailed_result = None
+            while retries >= 0:
+                primitive_id = await self.n2vc.ExecutePrimitive(
+                    model_name,
+                    application_name,
+                    primitive,
+                    callback,
+                    *callback_args,
+                    **primitive_params
+                )
+                while time() - start_primitive_time < self.timeout_primitive:
+                    primitive_result_ = await self.n2vc.GetPrimitiveStatus(model_name, primitive_id)
+                    if primitive_result_ in ("completed", "failed"):
+                        primitive_result = "COMPLETED" if primitive_result_ == "completed" else "FAILED"
+                        detailed_result = await self.n2vc.GetPrimitiveOutput(model_name, primitive_id)
+                        break
+                    elif primitive_result_ is None and primitive == "config":
+                        primitive_result = "COMPLETED"
+                        detailed_result = None
+                        break
+                    else:  # ("running", "pending", None):
+                        pass
+                    await asyncio.sleep(5)
+                else:
+                    raise LcmException("timeout after {} seconds".format(self.timeout_primitive))
+                if primitive_result == "COMPLETED":
                     break
-                else:  # ("running", "pending", None):
-                    pass
-                await asyncio.sleep(5)
-            else:
-                raise LcmException("timeout after {} seconds".format(self.timeout_primitive))
+                retries -= 1
+                if retries >= 0:
+                    await asyncio.sleep(retries_interval)
+
             return primitive_result, detailed_result
         except (N2VCPrimitiveExecutionFailed, LcmException) as e:
             return "FAILED", str(e)
 
     async def action(self, nsr_id, nslcmop_id):
+
+        # Try to lock HA task here
+        task_is_locked_by_me = self.lcm_tasks.lock_HA('ns', 'nslcmops', nslcmop_id)
+        if not task_is_locked_by_me:
+            return
+
         logging_text = "Task ns={} action={} ".format(nsr_id, nslcmop_id)
         self.logger.debug(logging_text + "Enter")
         # get all needed from database
@@ -1995,6 +2022,9 @@ class NsLcm(LcmBase):
         nslcmop_operation_state_detail = None
         exc = None
         try:
+            # wait for any previous tasks in process
+            await self.lcm_tasks.waitfor_related_HA('ns', 'nslcmops', nslcmop_id)
+
             step = "Getting information from database"
             db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id})
             db_nsr = self.db.get_one("nsrs", {"_id": nsr_id})
@@ -2017,17 +2047,6 @@ class NsLcm(LcmBase):
                     step = "Getting nsd from database"
                     db_nsd = self.db.get_one("nsds", {"_id": db_nsr["nsd-id"]})
 
-            # look if previous tasks in process
-            task_name, task_dependency = self.lcm_tasks.lookfor_related("ns", nsr_id, nslcmop_id)
-            if task_dependency:
-                step = db_nslcmop_update["detailed-status"] = \
-                    "Waiting for related tasks to be completed: {}".format(task_name)
-                self.logger.debug(logging_text + step)
-                self.update_db_2("nslcmops", nslcmop_id, db_nslcmop_update)
-                _, pending = await asyncio.wait(task_dependency, timeout=3600)
-                if pending:
-                    raise LcmException("Timeout waiting related tasks to be completed")
-
             # for backward compatibility
             if nsr_deployed and isinstance(nsr_deployed.get("VCA"), dict):
                 nsr_deployed["VCA"] = list(nsr_deployed["VCA"].values())
@@ -2115,6 +2134,12 @@ class NsLcm(LcmBase):
             return nslcmop_operation_state, nslcmop_operation_state_detail
 
     async def scale(self, nsr_id, nslcmop_id):
+
+        # Try to lock HA task here
+        task_is_locked_by_me = self.lcm_tasks.lock_HA('ns', 'nslcmops', nslcmop_id)
+        if not task_is_locked_by_me:
+            return
+
         logging_text = "Task ns={} scale={} ".format(nsr_id, nslcmop_id)
         self.logger.debug(logging_text + "Enter")
         # get all needed from database
@@ -2130,16 +2155,8 @@ class NsLcm(LcmBase):
         old_config_status = ""
         vnfr_scaled = False
         try:
-            # look if previous tasks in process
-            task_name, task_dependency = self.lcm_tasks.lookfor_related("ns", nsr_id, nslcmop_id)
-            if task_dependency:
-                step = db_nslcmop_update["detailed-status"] = \
-                    "Waiting for related tasks to be completed: {}".format(task_name)
-                self.logger.debug(logging_text + step)
-                self.update_db_2("nslcmops", nslcmop_id, db_nslcmop_update)
-                _, pending = await asyncio.wait(task_dependency, timeout=3600)
-                if pending:
-                    raise LcmException("Timeout waiting related tasks to be completed")
+            # wait for any previous tasks in process
+            await self.lcm_tasks.waitfor_related_HA('ns', 'nslcmops', nslcmop_id)
 
             step = "Getting nslcmop from database"
             self.logger.debug(step + " after having waited for previous tasks to be completed")