X-Git-Url: https://osm.etsi.org/gitweb/?a=blobdiff_plain;f=osm_lcm%2Fns.py;h=1148a821f7caa6ffe56e944103d6df3882ae34e9;hb=refs%2Fchanges%2F26%2F8326%2F2;hp=f66c23ae7bf46fbb0672d60171e2e28601ebf2c2;hpb=626e0150294da773c136d444d000e1a8b4521dea;p=osm%2FLCM.git diff --git a/osm_lcm/ns.py b/osm_lcm/ns.py index f66c23a..1148a82 100644 --- a/osm_lcm/ns.py +++ b/osm_lcm/ns.py @@ -26,11 +26,13 @@ from jinja2 import Environment, Template, meta, TemplateError, TemplateNotFound, from osm_lcm import ROclient from osm_lcm.lcm_utils import LcmException, LcmExceptionNoMgmtIP, LcmBase, deep_get from n2vc.k8s_helm_conn import K8sHelmConnector +from n2vc.k8s_juju_conn import K8sJujuConnector from osm_common.dbbase import DbException from osm_common.fsbase import FsException from n2vc.n2vc_juju_conn import N2VCJujuConnector +from n2vc.exceptions import N2VCException from copy import copy, deepcopy from http import HTTPStatus @@ -125,6 +127,15 @@ class NsLcm(LcmBase): on_update_db=None, ) + self.k8sclusterjuju = K8sJujuConnector( + kubectl_command=self.vca_config.get("kubectlpath"), + juju_command=self.vca_config.get("jujupath"), + fs=self.fs, + log=self.logger, + db=self.db, + on_update_db=None, + ) + # create RO client self.RO = ROclient.ROClient(self.loop, **self.ro_config) @@ -871,9 +882,14 @@ class NsLcm(LcmBase): ip_address = None nb_tries = 0 target_vdu_id = None + ro_retries = 0 while True: + ro_retries += 1 + if ro_retries >= 360: # 1 hour + raise LcmException("Not found _admin.deployed.RO.nsr_id for nsr_id: {}".format(nsr_id)) + await asyncio.sleep(10, loop=self.loop) # wait until NS is deployed at RO if not ro_nsr_id: @@ -885,23 +901,29 @@ class NsLcm(LcmBase): # get ip address if not target_vdu_id: db_vnfr = self.db.get_one("vnfrs", {"_id": vnfr_id}) - if not vdu_id: + + if not vdu_id: # for the VNF case ip_address = db_vnfr.get("ip-address") if not ip_address: continue - for vdur in get_iterable(db_vnfr, "vdur"): - if (vdur["vdu-id-ref"] == vdu_id and vdur["count-index"] == vdu_index) or \ - (ip_address and vdur.get("ip-address") == ip_address): - if vdur.get("status") == "ACTIVE": - target_vdu_id = vdur["vdu-id-ref"] - elif vdur.get("status") == "ERROR": - raise LcmException("Cannot inject ssh-key because target VM is in error state") - break - else: + vdur = next((x for x in get_iterable(db_vnfr, "vdur") if x.get("ip-address") == ip_address), None) + else: # VDU case + vdur = next((x for x in get_iterable(db_vnfr, "vdur") + if x.get("vdu-id-ref") == vdu_id and x.get("count-index") == vdu_index), None) + + if not vdur: raise LcmException("Not found vnfr_id={}, vdu_index={}, vdu_index={}".format( vnfr_id, vdu_id, vdu_index )) + if vdur.get("status") == "ACTIVE": + ip_address = vdur.get("ip-address") + if not ip_address: + continue + target_vdu_id = vdur["vdu-id-ref"] + elif vdur.get("status") == "ERROR": + raise LcmException("Cannot inject ssh-key because target VM is in error state") + if not target_vdu_id: continue @@ -968,7 +990,7 @@ class NsLcm(LcmBase): namespace += ".{}-{}".format(vdu_id, vdu_index or 0) # Get artifact path - artifact_path = "/{}/{}/charms/{}".format( + artifact_path = "{}/{}/charms/{}".format( base_folder["folder"], base_folder["pkg-dir"], config_descriptor["juju"]["charm"] @@ -1096,6 +1118,20 @@ class NsLcm(LcmBase): raise Exception("{} {}".format(step, e)) from e # TODO raise N2VC exception with 'step' extra information + def _write_ns_status(self, nsr_id: str, ns_state: str, current_operation: str, current_operation_id: str, + error_description: str = None, error_detail: str = None): + try: + db_dict = dict() + if ns_state: + db_dict["nsState"] = ns_state + db_dict["currentOperation"] = current_operation + db_dict["currentOperationID"] = current_operation_id + db_dict["errorDescription"] = error_description + db_dict["errorDetail"] = error_detail + self.update_db_2("nsrs", nsr_id, db_dict) + except Exception as e: + self.logger.warn('Error writing NS status: {}'.format(e)) + async def instantiate(self, nsr_id, nslcmop_id): """ @@ -1134,6 +1170,7 @@ class NsLcm(LcmBase): db_vnfrs = {} # vnf's info indexed by member-index # n2vc_info = {} task_instantiation_list = [] + task_instantiation_info = {} # from task to info text exc = None try: # wait for any previous tasks in process @@ -1142,6 +1179,14 @@ class NsLcm(LcmBase): # STEP 0: Reading database (nslcmops, nsrs, nsds, vnfrs, vnfds) + # nsState="BUILDING", currentOperation="INSTANTIATING", currentOperationID=nslcmop_id + self._write_ns_status( + nsr_id=nsr_id, + ns_state="BUILDING", + current_operation="INSTANTIATING", + current_operation_id=nslcmop_id + ) + # read from db: operation step = "Getting nslcmop={} from db".format(nslcmop_id) db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id}) @@ -1216,6 +1261,7 @@ class NsLcm(LcmBase): ) ) self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "instantiate_KDUs", task_kdu) + task_instantiation_info[task_kdu] = "Deploy KDUs" task_instantiation_list.append(task_kdu) # n2vc_redesign STEP 1 Get VCA public ssh-key # feature 1429. Add n2vc public key to needed VMs @@ -1238,6 +1284,7 @@ class NsLcm(LcmBase): ) ) self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "instantiate_RO", task_ro) + task_instantiation_info[task_ro] = "Deploy at VIM" task_instantiation_list.append(task_ro) # n2vc_redesign STEP 3 to 6 Deploy N2VC @@ -1280,7 +1327,8 @@ class NsLcm(LcmBase): deploy_params=deploy_params, descriptor_config=descriptor_config, base_folder=base_folder, - task_instantiation_list=task_instantiation_list + task_instantiation_list=task_instantiation_list, + task_instantiation_info=task_instantiation_info ) # Deploy charms for each VDU that supports one. @@ -1321,7 +1369,8 @@ class NsLcm(LcmBase): deploy_params=deploy_params_vdu, descriptor_config=descriptor_config, base_folder=base_folder, - task_instantiation_list=task_instantiation_list + task_instantiation_list=task_instantiation_list, + task_instantiation_info=task_instantiation_info ) for kdud in get_iterable(vnfd, 'kdu'): kdu_name = kdud["name"] @@ -1356,7 +1405,8 @@ class NsLcm(LcmBase): deploy_params=deploy_params, descriptor_config=descriptor_config, base_folder=base_folder, - task_instantiation_list=task_instantiation_list + task_instantiation_list=task_instantiation_list, + task_instantiation_info=task_instantiation_info ) # Check if this NS has a charm configuration @@ -1391,33 +1441,64 @@ class NsLcm(LcmBase): deploy_params=deploy_params, descriptor_config=descriptor_config, base_folder=base_folder, - task_instantiation_list=task_instantiation_list + task_instantiation_list=task_instantiation_list, + task_instantiation_info=task_instantiation_info ) # Wait until all tasks of "task_instantiation_list" have been finished # while time() <= start_deploy + self.total_deploy_timeout: - error_text = None - timeout = 3600 # time() - start_deploy - task_instantiation_set = set(task_instantiation_list) # build a set with tasks - done = None - pending = None - if len(task_instantiation_set) > 0: - done, pending = await asyncio.wait(task_instantiation_set, timeout=timeout) - if pending: - error_text = "timeout" - for task in done: - if task.cancelled(): - if not error_text: - error_text = "cancelled" - elif task.done(): - exc = task.exception() - if exc: - error_text = str(exc) + error_text_list = [] + timeout = 3600 + + # let's begin with all OK + instantiated_ok = True + # let's begin with RO 'running' status (later we can change it) + db_nsr_update["operational-status"] = "running" + # let's begin with VCA 'configured' status (later we can change it) + db_nsr_update["config-status"] = "configured" + + if task_instantiation_list: + # wait for all tasks completion + done, pending = await asyncio.wait(task_instantiation_list, timeout=timeout) + + for task in pending: + instantiated_ok = False + if task == task_ro: + db_nsr_update["operational-status"] = "failed" + else: + db_nsr_update["config-status"] = "failed" + self.logger.error(logging_text + task_instantiation_info[task] + ": Timeout") + error_text_list.append(task_instantiation_info[task] + ": Timeout") + for task in done: + if task.cancelled(): + instantiated_ok = False + if task == task_ro: + db_nsr_update["operational-status"] = "failed" + else: + db_nsr_update["config-status"] = "failed" + self.logger.warn(logging_text + task_instantiation_info[task] + ": Cancelled") + error_text_list.append(task_instantiation_info[task] + ": Cancelled") + else: + exc = task.exception() + if exc: + instantiated_ok = False + if task == task_ro: + db_nsr_update["operational-status"] = "failed" + else: + db_nsr_update["config-status"] = "failed" + self.logger.error(logging_text + task_instantiation_info[task] + ": Failed") + if isinstance(exc, (N2VCException, ROclient.ROClientException)): + error_text_list.append(task_instantiation_info[task] + ": {}".format(exc)) + else: + exc_traceback = "".join(traceback.format_exception(None, exc, exc.__traceback__)) + self.logger.error(logging_text + task_instantiation_info[task] + exc_traceback) + error_text_list.append(task_instantiation_info[task] + ": " + exc_traceback) + else: + self.logger.debug(logging_text + task_instantiation_info[task] + ": Done") - if error_text: - db_nsr_update["config-status"] = "failed" - error_text = "fail configuring " + error_text + if error_text_list: + error_text = "\n".join(error_text_list) db_nsr_update["detailed-status"] = error_text db_nslcmop_update["operationState"] = nslcmop_operation_state = "FAILED_TEMP" db_nslcmop_update["detailed-status"] = error_text @@ -1427,7 +1508,6 @@ class NsLcm(LcmBase): db_nslcmop_update["operationState"] = nslcmop_operation_state = "COMPLETED" db_nslcmop_update["statusEnteredTime"] = time() db_nslcmop_update["detailed-status"] = "done" - db_nsr_update["config-status"] = "configured" db_nsr_update["detailed-status"] = "done" except (ROclient.ROClientException, DbException, LcmException) as e: @@ -1445,6 +1525,7 @@ class NsLcm(LcmBase): if db_nsr: db_nsr_update["detailed-status"] = "ERROR {}: {}".format(step, exc) db_nsr_update["operational-status"] = "failed" + db_nsr_update["config-status"] = "failed" if db_nslcmop: db_nslcmop_update["detailed-status"] = "FAILED {}: {}".format(step, exc) db_nslcmop_update["operationState"] = nslcmop_operation_state = "FAILED" @@ -1455,6 +1536,26 @@ class NsLcm(LcmBase): db_nsr_update["_admin.current-operation"] = None db_nsr_update["_admin.operation-type"] = None self.update_db_2("nsrs", nsr_id, db_nsr_update) + + # nsState="READY/BROKEN", currentOperation="IDLE", currentOperationID=None + ns_state = None + error_description = None + error_detail = None + if instantiated_ok: + ns_state = "READY" + else: + ns_state = "BROKEN" + error_description = 'Operation: INSTANTIATING.{}, step: {}'.format(nslcmop_id, step) + error_detail = error_text + self._write_ns_status( + nsr_id=nsr_id, + ns_state=ns_state, + current_operation="IDLE", + current_operation_id=None, + error_description=error_description, + error_detail=error_detail + ) + if db_nslcmop_update: self.update_db_2("nslcmops", nslcmop_id, db_nslcmop_update) except DbException as e: @@ -1540,8 +1641,10 @@ class NsLcm(LcmBase): params=desc_params, db_dict=db_dict, timeout=3600) ) else: - # TODO juju-bundle connector in place - pass + task = self.k8sclusterjuju.install(cluster_uuid=cluster_uuid, kdu_model=kdumodel, + atomic=True, params=desc_params, + db_dict=db_dict, timeout=600) + pending_tasks[task] = "_admin.deployed.K8s.{}.".format(index) index += 1 if not pending_tasks: @@ -1572,7 +1675,7 @@ class NsLcm(LcmBase): def _deploy_n2vc(self, logging_text, db_nsr, db_vnfr, nslcmop_id, nsr_id, nsi_id, vnfd_id, vdu_id, kdu_name, member_vnf_index, vdu_index, vdu_name, deploy_params, descriptor_config, - base_folder, task_instantiation_list): + base_folder, task_instantiation_list, task_instantiation_info): # launch instantiate_N2VC in a asyncio task and register task object # Look where information of this charm is at database ._admin.deployed.VCA # if not found, create one entry and update database @@ -1621,6 +1724,7 @@ class NsLcm(LcmBase): ) ) self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "instantiate_N2VC-{}".format(vca_index), task_n2vc) + task_instantiation_info[task_n2vc] = "Deploy VCA {}.{}".format(member_vnf_index or "", vdu_id or "") task_instantiation_list.append(task_n2vc) # Check if this VNFD has a configured terminate action @@ -1939,6 +2043,13 @@ class NsLcm(LcmBase): step = "Waiting for previous operations to terminate" await self.lcm_tasks.waitfor_related_HA("ns", 'nslcmops', nslcmop_id) + self._write_ns_status( + nsr_id=nsr_id, + ns_state="TERMINATING", + current_operation="TERMINATING", + current_operation_id=nslcmop_id + ) + step = "Getting nslcmop={} from db".format(nslcmop_id) db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id}) step = "Getting nsr={} from db".format(nsr_id) @@ -1984,8 +2095,9 @@ class NsLcm(LcmBase): self.k8sclusterhelm.uninstall(cluster_uuid=kdu.get("k8scluster-uuid"), kdu_instance=kdu_instance)) elif kdu.get("k8scluster-type") == "juju": - # TODO Juju connector needed - continue + task_delete_kdu_instance = asyncio.ensure_future( + self.k8sclusterjuju.uninstall(cluster_uuid=kdu.get("k8scluster-uuid"), + kdu_instance=kdu_instance)) else: self.error(logging_text + "Unknown k8s deployment type {}". format(kdu.get("k8scluster-type"))) @@ -2112,6 +2224,7 @@ class NsLcm(LcmBase): self.logger.error(logging_text + failed_detail[-1]) if failed_detail: + terminate_ok = False self.logger.error(logging_text + " ;".join(failed_detail)) db_nsr_update["operational-status"] = "failed" db_nsr_update["detailed-status"] = "Deletion errors " + "; ".join(failed_detail) @@ -2119,6 +2232,7 @@ class NsLcm(LcmBase): db_nslcmop_update["operationState"] = nslcmop_operation_state = "FAILED" db_nslcmop_update["statusEnteredTime"] = time() else: + terminate_ok = True db_nsr_update["operational-status"] = "terminated" db_nsr_update["detailed-status"] = "Done" db_nsr_update["_admin.nsState"] = "NOT_INSTANTIATED" @@ -2150,6 +2264,25 @@ class NsLcm(LcmBase): db_nsr_update["_admin.current-operation"] = None db_nsr_update["_admin.operation-type"] = None self.update_db_2("nsrs", nsr_id, db_nsr_update) + + if terminate_ok: + ns_state = "IDLE" + error_description = None + error_detail = None + else: + ns_state = "BROKEN" + error_description = 'Operation: TERMINATING.{}, step: {}'.format(nslcmop_id, step) + error_detail = "; ".join(failed_detail) + + self._write_ns_status( + nsr_id=nsr_id, + ns_state=ns_state, + current_operation="IDLE", + current_operation_id=None, + error_description=error_description, + error_detail=error_detail + ) + except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) if nslcmop_operation_state: @@ -2296,6 +2429,13 @@ class NsLcm(LcmBase): step = "Waiting for previous operations to terminate" await self.lcm_tasks.waitfor_related_HA('ns', 'nslcmops', nslcmop_id) + self._write_ns_status( + nsr_id=nsr_id, + ns_state=None, + current_operation="RUNNING ACTION", + current_operation_id=nslcmop_id + ) + step = "Getting information from database" db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id}) db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) @@ -2339,7 +2479,7 @@ class NsLcm(LcmBase): break elif kdu_name: self.logger.debug(logging_text + "Checking actions in KDUs") - kdur = next((x for x in db_vnfr["kdur"] if x["kdu_name"] == kdu_name), None) + kdur = next((x for x in db_vnfr["kdur"] if x["kdu-name"] == kdu_name), None) desc_params = self._format_additional_params(kdur.get("additionalParams")) or {} if primitive_params: desc_params.update(primitive_params) @@ -2366,8 +2506,12 @@ class NsLcm(LcmBase): params=desc_params, db_dict=db_dict, timeout=300) elif kdu.get("k8scluster-type") == "juju": - # TODO Juju connector needed - pass + output = await self.k8sclusterjuju.upgrade(cluster_uuid=kdu.get("k8scluster-uuid"), + kdu_instance=kdu.get("kdu-instance"), + atomic=True, kdu_model=kdu_model, + params=desc_params, db_dict=db_dict, + timeout=300) + else: msg = "k8scluster-type not defined" raise LcmException(msg) @@ -2380,8 +2524,9 @@ class NsLcm(LcmBase): kdu_instance=kdu.get("kdu-instance"), db_dict=db_dict) elif kdu.get("k8scluster-type") == "juju": - # TODO Juju connector needed - pass + output = await self.k8sclusterjuju.rollback(cluster_uuid=kdu.get("k8scluster-uuid"), + kdu_instance=kdu.get("kdu-instance"), + db_dict=db_dict) else: msg = "k8scluster-type not defined" raise LcmException(msg) @@ -2391,8 +2536,8 @@ class NsLcm(LcmBase): output = await self.k8sclusterhelm.status_kdu(cluster_uuid=kdu.get("k8scluster-uuid"), kdu_instance=kdu.get("kdu-instance")) elif kdu.get("k8scluster-type") == "juju": - # TODO Juju connector needed - pass + output = await self.k8sclusterjuju.status_kdu(cluster_uuid=kdu.get("k8scluster-uuid"), + kdu_instance=kdu.get("kdu-instance")) else: msg = "k8scluster-type not defined" raise LcmException(msg) @@ -2483,6 +2628,12 @@ class NsLcm(LcmBase): db_nsr_update["_admin.nslcmop"] = None db_nsr_update["_admin.current-operation"] = None self.update_db_2("nsrs", nsr_id, db_nsr_update) + self._write_ns_status( + nsr_id=nsr_id, + ns_state=None, + current_operation="IDLE", + current_operation_id=None + ) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.logger.debug(logging_text + "Exit") @@ -2526,6 +2677,13 @@ class NsLcm(LcmBase): step = "Waiting for previous operations to terminate" await self.lcm_tasks.waitfor_related_HA('ns', 'nslcmops', nslcmop_id) + self._write_ns_status( + nsr_id=nsr_id, + ns_state=None, + current_operation="SCALING", + current_operation_id=nslcmop_id + ) + step = "Getting nslcmop from database" self.logger.debug(step + " after having waited for previous tasks to be completed") db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id}) @@ -2860,7 +3018,7 @@ class NsLcm(LcmBase): # Post-scale reintent check: Check if this sub-operation has been executed before op_index = self._check_or_add_scale_suboperation( db_nslcmop, nslcmop_id, vnf_index, vnf_config_primitive, primitive_params, 'POST-SCALE') - if (op_index == self.SUBOPERATION_STATUS_SKIP): + if op_index == self.SUBOPERATION_STATUS_SKIP: # Skip sub-operation result = 'COMPLETED' result_detail = 'Done' @@ -2868,7 +3026,7 @@ class NsLcm(LcmBase): "vnf_config_primitive={} Skipped sub-operation, result {} {}". format(vnf_config_primitive, result, result_detail)) else: - if (op_index == self.SUBOPERATION_STATUS_NEW): + if op_index == self.SUBOPERATION_STATUS_NEW: # New sub-operation: Get index of this sub-operation op_index = len(db_nslcmop.get('_admin', {}).get('operations')) - 1 self.logger.debug(logging_text + "vnf_config_primitive={} New sub-operation". @@ -2939,6 +3097,14 @@ class NsLcm(LcmBase): db_nsr_update["_admin.operation-type"] = None db_nsr_update["_admin.nslcmop"] = None self.update_db_2("nsrs", nsr_id, db_nsr_update) + + self._write_ns_status( + nsr_id=nsr_id, + ns_state=None, + current_operation="IDLE", + current_operation_id=None + ) + except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) if nslcmop_operation_state: