X-Git-Url: https://osm.etsi.org/gitweb/?a=blobdiff_plain;f=osm_lcm%2Fns.py;h=4176db06fe12888f1c720f2a1a6153b36ca40232;hb=771c32b24679051e6fc8f70fc86268e4087c875b;hp=a7affce48120fc2377d1ac16fa359b7156eaf87b;hpb=e876f6786da64cb81571ab37a25fb9f2cf61d359;p=osm%2FLCM.git diff --git a/osm_lcm/ns.py b/osm_lcm/ns.py index a7affce..4176db0 100644 --- a/osm_lcm/ns.py +++ b/osm_lcm/ns.py @@ -33,7 +33,7 @@ from osm_common.dbbase import DbException from osm_common.fsbase import FsException from n2vc.n2vc_juju_conn import N2VCJujuConnector -from n2vc.exceptions import N2VCException +from n2vc.exceptions import N2VCException, N2VCNotFound, K8sException from copy import copy, deepcopy from http import HTTPStatus @@ -49,11 +49,12 @@ class NsLcm(LcmBase): timeout_ns_terminate = 1800 # default global timeout for un deployment a ns timeout_charm_delete = 10 * 60 timeout_primitive = 10 * 60 # timeout for primitive execution + timeout_progress_primitive = 2 * 60 # timeout for some progress in a primitive execution SUBOPERATION_STATUS_NOT_FOUND = -1 SUBOPERATION_STATUS_NEW = -2 SUBOPERATION_STATUS_SKIP = -3 - task_name_deploy_vca = "Deploy VCA" + task_name_deploy_vca = "Deploying VCA" def __init__(self, db, msg, fs, lcm_tasks, config, loop): """ @@ -104,6 +105,12 @@ class NsLcm(LcmBase): on_update_db=None, ) + self.k8scluster_map = { + "helm-chart": self.k8sclusterhelm, + "chart": self.k8sclusterhelm, + "juju-bundle": self.k8sclusterjuju, + "juju": self.k8sclusterjuju, + } # create RO client self.RO = ROclient.ROClient(self.loop, **self.ro_config) @@ -207,11 +214,11 @@ class NsLcm(LcmBase): # write to database self.update_db_2("nsrs", nsr_id, db_dict) + except (asyncio.CancelledError, asyncio.TimeoutError): + raise except Exception as e: self.logger.warn('Error updating NS state for ns={}: {}'.format(nsr_id, e)) - return - def vnfd2RO(self, vnfd, new_id=None, additionalParams=None, nsrId=None): """ Converts creates a new vnfd descriptor for RO base on input OSM IM vnfd @@ -270,7 +277,7 @@ class NsLcm(LcmBase): raise LcmException("Error parsing Jinja2 to cloud-init content at vnfd[id={}]:vdu[id={}]: {}". format(vnfd["id"], vdu["id"], e)) - def ns_params_2_RO(self, ns_params, nsd, vnfd_dict, n2vc_key_list): + def _ns_params_2_RO(self, ns_params, nsd, vnfd_dict, n2vc_key_list): """ Creates a RO ns descriptor from OSM ns_instantiate params :param ns_params: OSM instantiate params @@ -758,7 +765,7 @@ class NsLcm(LcmBase): # Check for and optionally request placement optimization. Database will be updated if placement activated stage[2] = "Waiting for Placement." - await self.do_placement(logging_text, db_nslcmop, db_vnfrs) + await self._do_placement(logging_text, db_nslcmop, db_vnfrs) # deploy RO @@ -891,7 +898,7 @@ class NsLcm(LcmBase): await asyncio.wait(task_dependency, timeout=3600) stage[2] = "Checking instantiation parameters." - RO_ns_params = self.ns_params_2_RO(ns_params, nsd, db_vnfds_ref, n2vc_key_list) + RO_ns_params = self._ns_params_2_RO(ns_params, nsd, db_vnfds_ref, n2vc_key_list) stage[2] = "Deploying ns at VIM." db_nsr_update["detailed-status"] = " ".join(stage) self.update_db_2("nsrs", nsr_id, db_nsr_update) @@ -957,6 +964,7 @@ class NsLcm(LcmBase): # await self._on_update_n2vc_db("nsrs", {"_id": nsr_id}, "_admin.deployed", db_nsr_update) # self.logger.debug(logging_text + "Deployed at VIM") except (ROclient.ROClientException, LcmException, DbException) as e: + stage[2] = "ERROR deploying at VIM" self.set_vnfr_at_error(db_vnfrs, str(e)) raise @@ -1129,6 +1137,10 @@ class NsLcm(LcmBase): namespace += ".{}-{}".format(vdu_id, vdu_index or 0) element_type = 'VDU' element_under_configuration = "{}-{}".format(vdu_id, vdu_index or 0) + elif kdu_name: + namespace += ".{}".format(kdu_name) + element_type = 'KDU' + element_under_configuration = kdu_name # Get artifact path self.fs.sync() # Sync from FSMongo @@ -1204,9 +1216,9 @@ class NsLcm(LcmBase): ee_id_parts = ee_id.split('.') model_name = ee_id_parts[0] application_name = ee_id_parts[1] - self.update_db_2("nsrs", nsr_id, {db_update_entry + "model": model_name, - db_update_entry + "application": application_name, - db_update_entry + "ee_id": ee_id}) + db_nsr_update = {db_update_entry + "model": model_name, + db_update_entry + "application": application_name, + db_update_entry + "ee_id": ee_id} # n2vc_redesign STEP 3.3 @@ -1217,12 +1229,30 @@ class NsLcm(LcmBase): vca_index=vca_index, status='INSTALLING SW', element_under_configuration=element_under_configuration, - element_type=element_type + element_type=element_type, + other_update=db_nsr_update ) # TODO check if already done self.logger.debug(logging_text + step) - await self.n2vc.install_configuration_sw(ee_id=ee_id, artifact_path=artifact_path, db_dict=db_dict) + config = None + if not is_proxy_charm: + initial_config_primitive_list = config_descriptor.get('initial-config-primitive') + if initial_config_primitive_list: + for primitive in initial_config_primitive_list: + if primitive["name"] == "config": + config = self._map_primitive_params( + primitive, + {}, + deploy_params + ) + break + await self.n2vc.install_configuration_sw( + ee_id=ee_id, + artifact_path=artifact_path, + db_dict=db_dict, + config=config + ) # write in db flag of configuration_sw already installed self.update_db_2("nsrs", nsr_id, {db_update_entry + "config_sw_installed": True}) @@ -1347,7 +1377,7 @@ class NsLcm(LcmBase): raise LcmException("{} {}".format(step, e)) from e def _write_ns_status(self, nsr_id: str, ns_state: str, current_operation: str, current_operation_id: str, - error_description: str = None, other_update: dict = None): + error_description: str = None, error_detail: str = None, other_update: dict = None): """ Update db_nsr fields. :param nsr_id: @@ -1355,6 +1385,7 @@ class NsLcm(LcmBase): :param current_operation: :param current_operation_id: :param error_description: + :param error_detail: :param other_update: Other required changes at database if provided, will be cleared :return: """ @@ -1366,6 +1397,7 @@ class NsLcm(LcmBase): db_dict["currentOperation"] = current_operation db_dict["currentOperationID"] = current_operation_id db_dict["errorDescription"] = error_description + db_dict["errorDetail"] = error_detail if ns_state: db_dict["nsState"] = ns_state @@ -1393,33 +1425,30 @@ class NsLcm(LcmBase): except DbException as e: self.logger.warn('Error writing OPERATION status for op_id: {} -> {}'.format(op_id, e)) - def _write_all_config_status(self, nsr_id: str, status: str): + def _write_all_config_status(self, db_nsr: dict, status: str): try: - # nsrs record - db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) + nsr_id = db_nsr["_id"] # configurationStatus config_status = db_nsr.get('configurationStatus') if config_status: + db_nsr_update = {"configurationStatus.{}.status".format(index): status for index, v in + enumerate(config_status) if v} # update status - db_dict = dict() - db_dict['configurationStatus'] = list() - for c in config_status: - c['status'] = status - db_dict['configurationStatus'].append(c) - self.update_db_2("nsrs", nsr_id, db_dict) + self.update_db_2("nsrs", nsr_id, db_nsr_update) except DbException as e: self.logger.warn('Error writing all configuration status, ns={}: {}'.format(nsr_id, e)) def _write_configuration_status(self, nsr_id: str, vca_index: int, status: str = None, - element_under_configuration: str = None, element_type: str = None): + element_under_configuration: str = None, element_type: str = None, + other_update: dict = None): # self.logger.debug('_write_configuration_status(): vca_index={}, status={}' # .format(vca_index, status)) try: db_path = 'configurationStatus.{}.'.format(vca_index) - db_dict = dict() + db_dict = other_update or {} if status: db_dict[db_path + 'status'] = status if element_under_configuration: @@ -1431,28 +1460,40 @@ class NsLcm(LcmBase): self.logger.warn('Error writing configuration status={}, ns={}, vca_index={}: {}' .format(status, nsr_id, vca_index, e)) - async def do_placement(self, logging_text, db_nslcmop, db_vnfrs): + async def _do_placement(self, logging_text, db_nslcmop, db_vnfrs): + """ + Check and computes the placement, (vim account where to deploy). If it is decided by an external tool, it + sends the request via kafka and wait until the result is wrote at database (nslcmops _admin.plca). + Database is used because the result can be obtained from a different LCM worker in case of HA. + :param logging_text: contains the prefix for logging, with the ns and nslcmop identifiers + :param db_nslcmop: database content of nslcmop + :param db_vnfrs: database content of vnfrs, indexed by member-vnf-index. + :return: None. Modifies database vnfrs and parameter db_vnfr with the computed 'vim-account-id' + """ + nslcmop_id = db_nslcmop['_id'] placement_engine = deep_get(db_nslcmop, ('operationParams', 'placement-engine')) if placement_engine == "PLA": - self.logger.debug(logging_text + "Invoke placement optimization for nslcmopId={}".format(db_nslcmop['id'])) - await self.msg.aiowrite("pla", "get_placement", {'nslcmopId': db_nslcmop['_id']}, loop=self.loop) + self.logger.debug(logging_text + "Invoke and wait for placement optimization") + await self.msg.aiowrite("pla", "get_placement", {'nslcmopId': nslcmop_id}, loop=self.loop) db_poll_interval = 5 - wait = db_poll_interval * 4 + wait = db_poll_interval * 10 pla_result = None while not pla_result and wait >= 0: await asyncio.sleep(db_poll_interval) wait -= db_poll_interval - db_nslcmop = self.db.get_one("nslcmops", {"_id": db_nslcmop["_id"]}) + db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id}) pla_result = deep_get(db_nslcmop, ('_admin', 'pla')) if not pla_result: - raise LcmException("Placement timeout for nslcmopId={}".format(db_nslcmop['id'])) + raise LcmException("Placement timeout for nslcmopId={}".format(nslcmop_id)) for pla_vnf in pla_result['vnf']: vnfr = db_vnfrs.get(pla_vnf['member-vnf-index']) if not pla_vnf.get('vimAccountId') or not vnfr: continue self.db.set_one("vnfrs", {"_id": vnfr["_id"]}, {"vim-account-id": pla_vnf['vimAccountId']}) + # Modifies db_vnfrs + vnfr["vim-account-id"] = pla_vnf['vimAccountId'] return def update_nsrs_with_pla_result(self, params): @@ -1632,7 +1673,7 @@ class NsLcm(LcmBase): ) ) self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "instantiate_RO", task_ro) - tasks_dict_info[task_ro] = "Deploy at VIM" + tasks_dict_info[task_ro] = "Deploying at VIM" # n2vc_redesign STEP 3 to 6 Deploy N2VC stage[1] = "Deploying Execution Environments." @@ -1717,7 +1758,8 @@ class NsLcm(LcmBase): deploy_params=deploy_params_vdu, descriptor_config=descriptor_config, base_folder=base_folder, - task_instantiation_info=tasks_dict_info + task_instantiation_info=tasks_dict_info, + stage=stage ) for kdud in get_iterable(vnfd, 'kdu'): kdu_name = kdud["name"] @@ -1752,7 +1794,8 @@ class NsLcm(LcmBase): deploy_params=deploy_params, descriptor_config=descriptor_config, base_folder=base_folder, - task_instantiation_info=tasks_dict_info + task_instantiation_info=tasks_dict_info, + stage=stage ) # Check if this NS has a charm configuration @@ -1787,7 +1830,8 @@ class NsLcm(LcmBase): deploy_params=deploy_params, descriptor_config=descriptor_config, base_folder=base_folder, - task_instantiation_info=tasks_dict_info + task_instantiation_info=tasks_dict_info, + stage=stage ) # rest of staff will be done at finally @@ -1833,16 +1877,17 @@ class NsLcm(LcmBase): # update status at database if error_list: - error_detail = "; ".join(error_list) + error_detail = ". ".join(error_list) self.logger.error(logging_text + error_detail) - error_description_nslcmop = 'Stage: {} {} Detail: {}'.format(stage[0], stage[1], error_detail) - error_description_nsr = 'Operation: INSTANTIATING.{}, {}'.format(nslcmop_id, error_description_nslcmop) + error_description_nslcmop = 'Stage: {}. Detail: {}'.format(stage[0], error_detail) + error_description_nsr = 'Operation: INSTANTIATING.{}, Stage {}'.format(nslcmop_id, stage[0]) - db_nsr_update["detailed-status"] = error_description_nsr + db_nsr_update["detailed-status"] = error_description_nsr + " Detail: " + error_detail db_nslcmop_update["detailed-status"] = error_detail nslcmop_operation_state = "FAILED" ns_state = "BROKEN" else: + error_detail = None error_description_nsr = error_description_nslcmop = None ns_state = "READY" db_nsr_update["detailed-status"] = "Done" @@ -1856,6 +1901,7 @@ class NsLcm(LcmBase): current_operation="IDLE", current_operation_id=None, error_description=error_description_nsr, + error_detail=error_detail, other_update=db_nsr_update ) if db_nslcmop: @@ -2070,7 +2116,7 @@ class NsLcm(LcmBase): for kdur in get_iterable(vnfr_data, "kdur"): desc_params = self._format_additional_params(kdur.get("additionalParams")) vnfd_id = vnfr_data.get('vnfd-id') - pkgdir = deep_get(db_vnfds.get(vnfd_id), ('_admin', 'storage', 'pkg-dir')) + namespace = kdur.get("k8s-namespace") if kdur.get("helm-chart"): kdumodel = kdur["helm-chart"] k8sclustertype = "helm-chart" @@ -2083,11 +2129,14 @@ class NsLcm(LcmBase): format(vnfr_data["member-vnf-index-ref"], kdur["kdu-name"])) # check if kdumodel is a file and exists try: - # path format: /vnfdid/pkkdir/kdumodel - filename = '{}/{}/{}s/{}'.format(vnfd_id, pkgdir, k8sclustertype, kdumodel) - if self.fs.file_exists(filename, mode='file') or self.fs.file_exists(filename, mode='dir'): - kdumodel = self.fs.path + filename - except asyncio.CancelledError: + storage = deep_get(db_vnfds.get(vnfd_id), ('_admin', 'storage')) + if storage and storage.get('pkg-dir'): # may be not present if vnfd has not artifacts + # path format: /vnfdid/pkkdir/helm-charts|juju-bundles/kdumodel + filename = '{}/{}/{}s/{}'.format(storage["folder"], storage["'pkg-dir"], k8sclustertype, + kdumodel) + if self.fs.file_exists(filename, mode='file') or self.fs.file_exists(filename, mode='dir'): + kdumodel = self.fs.path + filename + except (asyncio.TimeoutError, asyncio.CancelledError): raise except Exception: # it is not a file pass @@ -2112,9 +2161,13 @@ class NsLcm(LcmBase): step = "Instantiating KDU {}.{} in k8s cluster {}".format(vnfr_data["member-vnf-index-ref"], kdur["kdu-name"], k8s_cluster_id) - k8s_instace_info = {"kdu-instance": None, "k8scluster-uuid": cluster_uuid, + k8s_instace_info = {"kdu-instance": None, + "k8scluster-uuid": cluster_uuid, "k8scluster-type": k8sclustertype, - "kdu-name": kdur["kdu-name"], "kdu-model": kdumodel} + "member-vnf-index": vnfr_data["member-vnf-index-ref"], + "kdu-name": kdur["kdu-name"], + "kdu-model": kdumodel, + "namespace": namespace} db_nsr_update["_admin.deployed.K8s.{}".format(index)] = k8s_instace_info self.update_db_2("nsrs", nsr_id, db_nsr_update) @@ -2122,21 +2175,14 @@ class NsLcm(LcmBase): "filter": {"_id": nsr_id}, "path": "_admin.deployed.K8s.{}".format(index)} - if k8sclustertype == "helm-chart": - task = asyncio.ensure_future( - self.k8sclusterhelm.install(cluster_uuid=cluster_uuid, kdu_model=kdumodel, atomic=True, - params=desc_params, db_dict=db_dict, timeout=3600) - ) - else: - task = asyncio.ensure_future( - self.k8sclusterjuju.install(cluster_uuid=cluster_uuid, kdu_model=kdumodel, - atomic=True, params=desc_params, - db_dict=db_dict, timeout=600, - kdu_name=kdur["kdu-name"]) - ) + task = asyncio.ensure_future( + self.k8scluster_map[k8sclustertype].install(cluster_uuid=cluster_uuid, kdu_model=kdumodel, + atomic=True, params=desc_params, + db_dict=db_dict, timeout=600, + kdu_name=kdur["kdu-name"], namespace=namespace)) self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "instantiate_KDU-{}".format(index), task) - task_instantiation_info[task] = "Deploy KDU {}".format(kdur["kdu-name"]) + task_instantiation_info[task] = "Deploying KDU {}".format(kdur["kdu-name"]) index += 1 @@ -2288,9 +2334,9 @@ class NsLcm(LcmBase): # sub-operations - def _reintent_or_skip_suboperation(self, db_nslcmop, op_index): - op = db_nslcmop.get('_admin', {}).get('operations', [])[op_index] - if (op.get('operationState') == 'COMPLETED'): + def _retry_or_skip_suboperation(self, db_nslcmop, op_index): + op = deep_get(db_nslcmop, ('_admin', 'operations'), [])[op_index] + if op.get('operationState') == 'COMPLETED': # b. Skip sub-operation # _ns_execute_primitive() or RO.create_action() will NOT be executed return self.SUBOPERATION_STATUS_SKIP @@ -2397,7 +2443,7 @@ class NsLcm(LcmBase): 'lcmOperationType': operationType } op_index = self._find_suboperation(db_nslcmop, match) - if (op_index == self.SUBOPERATION_STATUS_NOT_FOUND): + if op_index == self.SUBOPERATION_STATUS_NOT_FOUND: # a. New sub-operation # The sub-operation does not exist, add it. # _ns_execute_primitive() will be called from scale() as usual, with non-modified arguments @@ -2405,7 +2451,7 @@ class NsLcm(LcmBase): vdu_id = None vdu_count_index = None vdu_name = None - if (RO_nsr_id and RO_scaling_info): + if RO_nsr_id and RO_scaling_info: vnf_config_primitive = None primitive_params = None else: @@ -2431,7 +2477,7 @@ class NsLcm(LcmBase): else: # Return either SUBOPERATION_STATUS_SKIP (operationState == 'COMPLETED'), # or op_index (operationState != 'COMPLETED') - return self._reintent_or_skip_suboperation(db_nslcmop, op_index) + return self._retry_or_skip_suboperation(db_nslcmop, op_index) # Function to return execution_environment id @@ -2498,11 +2544,14 @@ class NsLcm(LcmBase): if destroy_ee: await self.n2vc.delete_execution_environment(vca_deployed["ee_id"]) - async def _delete_N2VC(self, nsr_id: str): - self._write_all_config_status(nsr_id=nsr_id, status='TERMINATING') - namespace = "." + nsr_id - await self.n2vc.delete_namespace(namespace=namespace) - self._write_all_config_status(nsr_id=nsr_id, status='DELETED') + async def _delete_all_N2VC(self, db_nsr: dict): + self._write_all_config_status(db_nsr=db_nsr, status='TERMINATING') + namespace = "." + db_nsr["_id"] + try: + await self.n2vc.delete_namespace(namespace=namespace, total_timeout=self.timeout_charm_delete) + except N2VCNotFound: # already deleted. Skip + pass + self._write_all_config_status(db_nsr=db_nsr, status='DELETED') async def _terminate_RO(self, logging_text, nsr_deployed, nsr_id, nslcmop_id, stage): """ @@ -2583,11 +2632,11 @@ class NsLcm(LcmBase): db_nsr_update["_admin.deployed.RO.nsr_delete_action_id"] = None self.logger.debug(logging_text + "RO_ns_id={} already deleted".format(ro_nsr_id)) elif isinstance(e, ROclient.ROClientException) and e.http_code == 409: # conflict - failed_detail.append("RO_ns_id={} delete conflict: {}".format(ro_nsr_id, e)) - self.logger.debug(logging_text + failed_detail[-1]) + failed_detail.append("delete conflict: {}".format(e)) + self.logger.debug(logging_text + "RO_ns_id={} delete conflict: {}".format(ro_nsr_id, e)) else: - failed_detail.append("RO_ns_id={} delete error: {}".format(ro_nsr_id, e)) - self.logger.error(logging_text + failed_detail[-1]) + failed_detail.append("delete error: {}".format(e)) + self.logger.error(logging_text + "RO_ns_id={} delete error: {}".format(ro_nsr_id, e)) # Delete nsd if not failed_detail and deep_get(nsr_deployed, ("RO", "nsd_id")): @@ -2636,13 +2685,16 @@ class NsLcm(LcmBase): failed_detail.append("ro_vnfd_id={} delete error: {}".format(ro_vnfd_id, e)) self.logger.error(logging_text + failed_detail[-1]) - stage[2] = "Deleted from VIM" + if failed_detail: + stage[2] = "Error deleting from VIM" + else: + stage[2] = "Deleted from VIM" db_nsr_update["detailed-status"] = " ".join(stage) self.update_db_2("nsrs", nsr_id, db_nsr_update) self._write_op_status(nslcmop_id, stage) if failed_detail: - raise LcmException("Error while {}: {}".format(stage[2], "; ".join(failed_detail))) + raise LcmException("; ".join(failed_detail)) async def terminate(self, nsr_id, nslcmop_id): # Try to lock HA task here @@ -2746,12 +2798,14 @@ class NsLcm(LcmBase): # remove All execution environments at once stage[0] = "Stage 3/3 delete all." - stage[1] = "Deleting all execution environments." - self.logger.debug(logging_text + stage[1]) - task_delete_ee = asyncio.ensure_future(self._delete_N2VC(nsr_id=nsr_id)) - # task_delete_ee = asyncio.ensure_future(self.n2vc.delete_namespace(namespace="." + nsr_id)) - tasks_dict_info[task_delete_ee] = "Terminating all VCA" + if nsr_deployed.get("VCA"): + stage[1] = "Deleting all execution environments." + self.logger.debug(logging_text + stage[1]) + task_delete_ee = asyncio.ensure_future(asyncio.wait_for(self._delete_all_N2VC(db_nsr=db_nsr), + timeout=self.timeout_charm_delete)) + # task_delete_ee = asyncio.ensure_future(self.n2vc.delete_namespace(namespace="." + nsr_id)) + tasks_dict_info[task_delete_ee] = "Terminating all VCA" # Delete from k8scluster stage[1] = "Deleting KDUs." @@ -2761,14 +2815,11 @@ class NsLcm(LcmBase): if not kdu or not kdu.get("kdu-instance"): continue kdu_instance = kdu.get("kdu-instance") - if kdu.get("k8scluster-type") in ("helm-chart", "chart"): - task_delete_kdu_instance = asyncio.ensure_future( - self.k8sclusterhelm.uninstall(cluster_uuid=kdu.get("k8scluster-uuid"), - kdu_instance=kdu_instance)) - elif kdu.get("k8scluster-type") in ("juju-bundle", "juju"): + if kdu.get("k8scluster-type") in self.k8scluster_map: task_delete_kdu_instance = asyncio.ensure_future( - self.k8sclusterjuju.uninstall(cluster_uuid=kdu.get("k8scluster-uuid"), - kdu_instance=kdu_instance)) + self.k8scluster_map[kdu["k8scluster-type"]].uninstall( + cluster_uuid=kdu.get("k8scluster-uuid"), + kdu_instance=kdu_instance)) else: self.logger.error(logging_text + "Unknown k8s deployment type {}". format(kdu.get("k8scluster-type"))) @@ -2812,15 +2863,16 @@ class NsLcm(LcmBase): if error_list: error_detail = "; ".join(error_list) # self.logger.error(logging_text + error_detail) - error_description_nslcmop = 'Stage: {} {} Detail: {}'.format(stage[0], stage[1], error_detail) - error_description_nsr = 'Operation: TERMINATING.{}, {}'.format(nslcmop_id, error_description_nslcmop) + error_description_nslcmop = 'Stage: {}. Detail: {}'.format(stage[0], error_detail) + error_description_nsr = 'Operation: TERMINATING.{}, Stage {}.'.format(nslcmop_id, stage[0]) db_nsr_update["operational-status"] = "failed" - db_nsr_update["detailed-status"] = error_description_nsr + db_nsr_update["detailed-status"] = error_description_nsr + " Detail: " + error_detail db_nslcmop_update["detailed-status"] = error_detail nslcmop_operation_state = "FAILED" ns_state = "BROKEN" else: + error_detail = None error_description_nsr = error_description_nslcmop = None ns_state = "NOT_INSTANTIATED" db_nsr_update["operational-status"] = "terminated" @@ -2836,6 +2888,7 @@ class NsLcm(LcmBase): current_operation="IDLE", current_operation_id=None, error_description=error_description_nsr, + error_detail=error_detail, other_update=db_nsr_update ) if db_nslcmop: @@ -2861,47 +2914,51 @@ class NsLcm(LcmBase): async def _wait_for_tasks(self, logging_text, created_tasks_info, timeout, stage, nslcmop_id, nsr_id=None): time_start = time() + error_detail_list = [] error_list = [] pending_tasks = list(created_tasks_info.keys()) num_tasks = len(pending_tasks) num_done = 0 stage[1] = "{}/{}.".format(num_done, num_tasks) self._write_op_status(nslcmop_id, stage) - new_error = False while pending_tasks: + new_error = None _timeout = timeout + time_start - time() done, pending_tasks = await asyncio.wait(pending_tasks, timeout=_timeout, return_when=asyncio.FIRST_COMPLETED) num_done += len(done) if not done: # Timeout for task in pending_tasks: - error_list.append(created_tasks_info[task] + ": Timeout") + new_error = created_tasks_info[task] + ": Timeout" + error_detail_list.append(new_error) + error_list.append(new_error) break for task in done: if task.cancelled(): - self.logger.warn(logging_text + created_tasks_info[task] + ": Cancelled") - error_list.append(created_tasks_info[task] + ": Cancelled") - new_error = True + exc = "Cancelled" else: exc = task.exception() - if exc: - error_list.append(created_tasks_info[task] + ": {}".format(exc)) - new_error = True - if isinstance(exc, (DbException, N2VCException, ROclient.ROClientException, LcmException)): - self.logger.error(logging_text + created_tasks_info[task] + ": " + str(exc)) - else: - exc_traceback = "".join(traceback.format_exception(None, exc, exc.__traceback__)) - self.logger.error(logging_text + created_tasks_info[task] + exc_traceback) + if exc: + if isinstance(exc, asyncio.TimeoutError): + exc = "Timeout" + new_error = created_tasks_info[task] + ": {}".format(exc) + error_list.append(created_tasks_info[task]) + error_detail_list.append(new_error) + if isinstance(exc, (str, DbException, N2VCException, ROclient.ROClientException, LcmException)): + self.logger.error(logging_text + new_error) else: - self.logger.debug(logging_text + created_tasks_info[task] + ": Done") + exc_traceback = "".join(traceback.format_exception(None, exc, exc.__traceback__)) + self.logger.error(logging_text + created_tasks_info[task] + exc_traceback) + else: + self.logger.debug(logging_text + created_tasks_info[task] + ": Done") stage[1] = "{}/{}.".format(num_done, num_tasks) if new_error: - stage[1] += "Errors: " + ". ".join(error_list) + "." - new_error = False + stage[1] += " Errors: " + ". ".join(error_detail_list) + "." if nsr_id: # update also nsr - self.update_db_2("nsrs", nsr_id, {"errorDescription": ". ".join(error_list)}) + self.update_db_2("nsrs", nsr_id, {"errorDescription": "Error at: " + ", ".join(error_list), + "errorDetail": ". ".join(error_detail_list)}) self._write_op_status(nslcmop_id, stage) - return error_list + return error_detail_list @staticmethod def _map_primitive_params(primitive_desc, params, instantiation_params): @@ -2946,15 +3003,13 @@ class NsLcm(LcmBase): calculated_params["ns_config_info"] = instantiation_params["ns_config_info"] return calculated_params - def _look_for_deployed_vca(self, deployed_vca, member_vnf_index, vdu_id, vdu_name, vdu_count_index, kdu_name=None): + def _look_for_deployed_vca(self, deployed_vca, member_vnf_index, vdu_id, vdu_count_index, kdu_name=None): # find vca_deployed record for this action. Raise LcmException if not found or there is not any id. for vca in deployed_vca: if not vca: continue if member_vnf_index != vca["member-vnf-index"] or vdu_id != vca["vdu_id"]: continue - if vdu_name and vdu_name != vca["vdu_name"]: - continue if vdu_count_index is not None and vdu_count_index != vca["vdu_count_index"]: continue if kdu_name and kdu_name != vca["kdu_name"]: @@ -2962,44 +3017,51 @@ class NsLcm(LcmBase): break else: # vca_deployed not found - raise LcmException("charm for member_vnf_index={} vdu_id={} vdu_name={} vdu_count_index={} is not " - "deployed".format(member_vnf_index, vdu_id, vdu_name, vdu_count_index)) + raise LcmException("charm for member_vnf_index={} vdu_id={} kdu_name={} vdu_count_index={} is not " + "deployed".format(member_vnf_index, vdu_id, kdu_name, vdu_count_index)) # get ee_id ee_id = vca.get("ee_id") if not ee_id: - raise LcmException("charm for member_vnf_index={} vdu_id={} vdu_name={} vdu_count_index={} has not " + raise LcmException("charm for member_vnf_index={} vdu_id={} kdu_name={} vdu_count_index={} has not " "execution environment" - .format(member_vnf_index, vdu_id, vdu_name, vdu_count_index)) + .format(member_vnf_index, vdu_id, kdu_name, vdu_count_index)) return ee_id async def _ns_execute_primitive(self, ee_id, primitive, primitive_params, retries=0, - retries_interval=30) -> (str, str): + retries_interval=30, timeout=None) -> (str, str): try: if primitive == "config": primitive_params = {"params": primitive_params} while retries >= 0: try: - output = await self.n2vc.exec_primitive( - ee_id=ee_id, - primitive_name=primitive, - params_dict=primitive_params - ) + output = await asyncio.wait_for( + self.n2vc.exec_primitive( + ee_id=ee_id, + primitive_name=primitive, + params_dict=primitive_params, + progress_timeout=self.timeout_progress_primitive, + total_timeout=self.timeout_primitive), + timeout=timeout or self.timeout_primitive) # execution was OK break - except Exception as e: + except asyncio.CancelledError: + raise + except Exception as e: # asyncio.TimeoutError + if isinstance(e, asyncio.TimeoutError): + e = "Timeout" retries -= 1 if retries >= 0: self.logger.debug('Error executing action {} on {} -> {}'.format(primitive, ee_id, e)) # wait and retry await asyncio.sleep(retries_interval, loop=self.loop) else: - return 'FAIL', 'Cannot execute action {} on {}: {}'.format(primitive, ee_id, e) + return 'FAILED', str(e) return 'COMPLETED', output - except LcmException: + except (LcmException, asyncio.CancelledError): raise except Exception as e: return 'FAIL', 'Error executing action {}: {}'.format(primitive, e) @@ -3019,7 +3081,7 @@ class NsLcm(LcmBase): db_nsr_update = {} db_nslcmop_update = {} nslcmop_operation_state = None - nslcmop_operation_state_detail = None + error_description_nslcmop = None exc = None try: # wait for any previous tasks in process @@ -3042,7 +3104,9 @@ class NsLcm(LcmBase): vdu_id = db_nslcmop["operationParams"].get("vdu_id") kdu_name = db_nslcmop["operationParams"].get("kdu_name") vdu_count_index = db_nslcmop["operationParams"].get("vdu_count_index") - vdu_name = db_nslcmop["operationParams"].get("vdu_name") + primitive = db_nslcmop["operationParams"]["primitive"] + primitive_params = db_nslcmop["operationParams"]["primitive_params"] + timeout_ns_action = db_nslcmop["operationParams"].get("timeout_ns_action", self.timeout_primitive) if vnf_index: step = "Getting vnfr from database" @@ -3050,11 +3114,8 @@ class NsLcm(LcmBase): step = "Getting vnfd from database" db_vnfd = self.db.get_one("vnfds", {"_id": db_vnfr["vnfd-id"]}) else: - if db_nsr.get("nsd"): - db_nsd = db_nsr.get("nsd") # TODO this will be removed - else: - step = "Getting nsd from database" - db_nsd = self.db.get_one("nsds", {"_id": db_nsr["nsd-id"]}) + step = "Getting nsd from database" + db_nsd = self.db.get_one("nsds", {"_id": db_nsr["nsd-id"]}) # for backward compatibility if nsr_deployed and isinstance(nsr_deployed.get("VCA"), dict): @@ -3062,173 +3123,178 @@ class NsLcm(LcmBase): db_nsr_update["_admin.deployed.VCA"] = nsr_deployed["VCA"] self.update_db_2("nsrs", nsr_id, db_nsr_update) - primitive = db_nslcmop["operationParams"]["primitive"] - primitive_params = db_nslcmop["operationParams"]["primitive_params"] - # look for primitive config_primitive_desc = None if vdu_id: for vdu in get_iterable(db_vnfd, "vdu"): if vdu_id == vdu["id"]: - for config_primitive in vdu.get("vdu-configuration", {}).get("config-primitive", ()): + for config_primitive in deep_get(vdu, ("vdu-configuration", "config-primitive"), ()): if config_primitive["name"] == primitive: config_primitive_desc = config_primitive break + break elif kdu_name: - self.logger.debug(logging_text + "Checking actions in KDUs") - kdur = next((x for x in db_vnfr["kdur"] if x["kdu-name"] == kdu_name), None) - desc_params = self._format_additional_params(kdur.get("additionalParams")) or {} - if primitive_params: - desc_params.update(primitive_params) - # TODO Check if we will need something at vnf level - index = 0 - for kdu in get_iterable(nsr_deployed, "K8s"): - if kdu_name == kdu["kdu-name"]: - db_dict = {"collection": "nsrs", "filter": {"_id": nsr_id}, - "path": "_admin.deployed.K8s.{}".format(index)} - if primitive == "upgrade": - if desc_params.get("kdu_model"): - kdu_model = desc_params.get("kdu_model") - del desc_params["kdu_model"] - else: - kdu_model = kdu.get("kdu-model") - parts = kdu_model.split(sep=":") - if len(parts) == 2: - kdu_model = parts[0] - - if kdu.get("k8scluster-type") in ("helm-chart", "chart"): - output = await self.k8sclusterhelm.upgrade(cluster_uuid=kdu.get("k8scluster-uuid"), - kdu_instance=kdu.get("kdu-instance"), - atomic=True, kdu_model=kdu_model, - params=desc_params, db_dict=db_dict, - timeout=300) - elif kdu.get("k8scluster-type")in ("juju-bundle", "juju"): - output = await self.k8sclusterjuju.upgrade(cluster_uuid=kdu.get("k8scluster-uuid"), - kdu_instance=kdu.get("kdu-instance"), - atomic=True, kdu_model=kdu_model, - params=desc_params, db_dict=db_dict, - timeout=300) - - else: - msg = "k8scluster-type not defined" - raise LcmException(msg) - - self.logger.debug(logging_text + " Upgrade of kdu {} done".format(output)) - break - elif primitive == "rollback": - if kdu.get("k8scluster-type") in ("helm-chart", "chart"): - output = await self.k8sclusterhelm.rollback(cluster_uuid=kdu.get("k8scluster-uuid"), - kdu_instance=kdu.get("kdu-instance"), - db_dict=db_dict) - elif kdu.get("k8scluster-type") in ("juju-bundle", "juju"): - output = await self.k8sclusterjuju.rollback(cluster_uuid=kdu.get("k8scluster-uuid"), - kdu_instance=kdu.get("kdu-instance"), - db_dict=db_dict) - else: - msg = "k8scluster-type not defined" - raise LcmException(msg) - break - elif primitive == "status": - if kdu.get("k8scluster-type") in ("helm-chart", "chart"): - output = await self.k8sclusterhelm.status_kdu(cluster_uuid=kdu.get("k8scluster-uuid"), - kdu_instance=kdu.get("kdu-instance")) - elif kdu.get("k8scluster-type") in ("juju-bundle", "juju"): - output = await self.k8sclusterjuju.status_kdu(cluster_uuid=kdu.get("k8scluster-uuid"), - kdu_instance=kdu.get("kdu-instance")) - else: - msg = "k8scluster-type not defined" - raise LcmException(msg) - break - index += 1 - - else: - raise LcmException("KDU '{}' not found".format(kdu_name)) - if output: - db_nslcmop_update["detailed-status"] = output - db_nslcmop_update["operationState"] = 'COMPLETED' - db_nslcmop_update["statusEnteredTime"] = time() - else: - db_nslcmop_update["detailed-status"] = '' - db_nslcmop_update["operationState"] = 'FAILED' - db_nslcmop_update["statusEnteredTime"] = time() - return + for kdu in get_iterable(db_vnfd, "kdu"): + if kdu_name == kdu["name"]: + for config_primitive in deep_get(kdu, ("kdu-configuration", "config-primitive"), ()): + if config_primitive["name"] == primitive: + config_primitive_desc = config_primitive + break + break elif vnf_index: - for config_primitive in db_vnfd.get("vnf-configuration", {}).get("config-primitive", ()): + for config_primitive in deep_get(db_vnfd, ("vnf-configuration", "config-primitive"), ()): if config_primitive["name"] == primitive: config_primitive_desc = config_primitive break else: - for config_primitive in db_nsd.get("ns-configuration", {}).get("config-primitive", ()): + for config_primitive in deep_get(db_nsd, ("ns-configuration", "config-primitive"), ()): if config_primitive["name"] == primitive: config_primitive_desc = config_primitive break - if not config_primitive_desc: + if not config_primitive_desc and not (kdu_name and primitive in ("upgrade", "rollback", "status")): raise LcmException("Primitive {} not found at [ns|vnf|vdu]-configuration:config-primitive ". format(primitive)) - desc_params = {} if vnf_index: - if db_vnfr.get("additionalParamsForVnf"): - desc_params = self._format_additional_params(db_vnfr["additionalParamsForVnf"]) if vdu_id: vdur = next((x for x in db_vnfr["vdur"] if x["vdu-id-ref"] == vdu_id), None) - if vdur.get("additionalParams"): - desc_params = self._format_additional_params(vdur["additionalParams"]) + desc_params = self._format_additional_params(vdur.get("additionalParams")) + elif kdu_name: + kdur = next((x for x in db_vnfr["kdur"] if x["kdu-name"] == kdu_name), None) + desc_params = self._format_additional_params(kdur.get("additionalParams")) + else: + desc_params = self._format_additional_params(db_vnfr.get("additionalParamsForVnf")) else: - if db_nsr.get("additionalParamsForNs"): - desc_params.update(self._format_additional_params(db_nsr["additionalParamsForNs"])) + desc_params = self._format_additional_params(db_nsr.get("additionalParamsForNs")) + + if kdu_name: + kdu_action = True if not deep_get(kdu, ("kdu-configuration", "juju")) else False # TODO check if ns is in a proper status - result, detailed_status = await self._ns_execute_primitive( - self._look_for_deployed_vca(nsr_deployed["VCA"], - member_vnf_index=vnf_index, - vdu_id=vdu_id, - vdu_name=vdu_name, - vdu_count_index=vdu_count_index), - primitive=primitive, - primitive_params=self._map_primitive_params(config_primitive_desc, primitive_params, desc_params)) - - db_nslcmop_update["detailed-status"] = nslcmop_operation_state_detail = detailed_status - db_nslcmop_update["operationState"] = nslcmop_operation_state = result - db_nslcmop_update["statusEnteredTime"] = time() - self.logger.debug(logging_text + " task Done with result {} {}".format(result, detailed_status)) + if kdu_name and (primitive in ("upgrade", "rollback", "status") or kdu_action): + # kdur and desc_params already set from before + if primitive_params: + desc_params.update(primitive_params) + # TODO Check if we will need something at vnf level + for index, kdu in enumerate(get_iterable(nsr_deployed, "K8s")): + if kdu_name == kdu["kdu-name"] and kdu["member-vnf-index"] == vnf_index: + break + else: + raise LcmException("KDU '{}' for vnf '{}' not deployed".format(kdu_name, vnf_index)) + + if kdu.get("k8scluster-type") not in self.k8scluster_map: + msg = "unknown k8scluster-type '{}'".format(kdu.get("k8scluster-type")) + raise LcmException(msg) + + db_dict = {"collection": "nsrs", + "filter": {"_id": nsr_id}, + "path": "_admin.deployed.K8s.{}".format(index)} + self.logger.debug(logging_text + "Exec k8s {} on {}.{}".format(primitive, vnf_index, kdu_name)) + step = "Executing kdu {}".format(primitive) + if primitive == "upgrade": + if desc_params.get("kdu_model"): + kdu_model = desc_params.get("kdu_model") + del desc_params["kdu_model"] + else: + kdu_model = kdu.get("kdu-model") + parts = kdu_model.split(sep=":") + if len(parts) == 2: + kdu_model = parts[0] + + detailed_status = await asyncio.wait_for( + self.k8scluster_map[kdu["k8scluster-type"]].upgrade( + cluster_uuid=kdu.get("k8scluster-uuid"), + kdu_instance=kdu.get("kdu-instance"), + atomic=True, kdu_model=kdu_model, + params=desc_params, db_dict=db_dict, + timeout=timeout_ns_action), + timeout=timeout_ns_action + 10) + self.logger.debug(logging_text + " Upgrade of kdu {} done".format(detailed_status)) + elif primitive == "rollback": + detailed_status = await asyncio.wait_for( + self.k8scluster_map[kdu["k8scluster-type"]].rollback( + cluster_uuid=kdu.get("k8scluster-uuid"), + kdu_instance=kdu.get("kdu-instance"), + db_dict=db_dict), + timeout=timeout_ns_action) + elif primitive == "status": + detailed_status = await asyncio.wait_for( + self.k8scluster_map[kdu["k8scluster-type"]].status_kdu( + cluster_uuid=kdu.get("k8scluster-uuid"), + kdu_instance=kdu.get("kdu-instance")), + timeout=timeout_ns_action) + else: + kdu_instance = kdu.get("kdu-instance") or "{}-{}".format(kdu["kdu-name"], nsr_id) + params = self._map_primitive_params(config_primitive_desc, primitive_params, desc_params) + + detailed_status = await asyncio.wait_for( + self.k8scluster_map[kdu["k8scluster-type"]].exec_primitive( + cluster_uuid=kdu.get("k8scluster-uuid"), + kdu_instance=kdu_instance, + primitive_name=primitive, + params=params, db_dict=db_dict, + timeout=timeout_ns_action), + timeout=timeout_ns_action) + + if detailed_status: + nslcmop_operation_state = 'COMPLETED' + else: + detailed_status = '' + nslcmop_operation_state = 'FAILED' + else: + nslcmop_operation_state, detailed_status = await self._ns_execute_primitive( + self._look_for_deployed_vca(nsr_deployed["VCA"], + member_vnf_index=vnf_index, + vdu_id=vdu_id, + vdu_count_index=vdu_count_index), + primitive=primitive, + primitive_params=self._map_primitive_params(config_primitive_desc, primitive_params, desc_params), + timeout=timeout_ns_action) + + db_nslcmop_update["detailed-status"] = detailed_status + error_description_nslcmop = detailed_status if nslcmop_operation_state == "FAILED" else "" + self.logger.debug(logging_text + " task Done with result {} {}".format(nslcmop_operation_state, + detailed_status)) return # database update is called inside finally - except (DbException, LcmException) as e: + except (DbException, LcmException, N2VCException, K8sException) as e: self.logger.error(logging_text + "Exit Exception {}".format(e)) exc = e except asyncio.CancelledError: self.logger.error(logging_text + "Cancelled Exception while '{}'".format(step)) exc = "Operation was cancelled" + except asyncio.TimeoutError: + self.logger.error(logging_text + "Timeout while '{}'".format(step)) + exc = "Timeout" except Exception as e: exc = traceback.format_exc() self.logger.critical(logging_text + "Exit Exception {} {}".format(type(e).__name__, e), exc_info=True) finally: - if exc and db_nslcmop: - db_nslcmop_update["detailed-status"] = nslcmop_operation_state_detail = \ + if exc: + db_nslcmop_update["detailed-status"] = detailed_status = error_description_nslcmop = \ "FAILED {}: {}".format(step, exc) - db_nslcmop_update["operationState"] = nslcmop_operation_state = "FAILED" - db_nslcmop_update["statusEnteredTime"] = time() - try: - if db_nslcmop_update: - self.update_db_2("nslcmops", nslcmop_id, db_nslcmop_update) - if db_nsr: - self._write_ns_status( - nsr_id=nsr_id, - ns_state=None, - current_operation="IDLE", - current_operation_id=None, - other_update=db_nsr_update - ) - if exc: - self._write_op_status( - op_id=nslcmop_id, - error_message=nslcmop_operation_state_detail - ) - except DbException as e: - self.logger.error(logging_text + "Cannot update database: {}".format(e)) - self.logger.debug(logging_text + "Exit") + nslcmop_operation_state = "FAILED" + if db_nsr: + self._write_ns_status( + nsr_id=nsr_id, + ns_state=db_nsr["nsState"], # TODO check if degraded. For the moment use previous status + current_operation="IDLE", + current_operation_id=None, + # error_description=error_description_nsr, + # error_detail=error_detail, + other_update=db_nsr_update + ) + + if db_nslcmop: + self._write_op_status( + op_id=nslcmop_id, + stage="", + error_message=error_description_nslcmop, + operation_state=nslcmop_operation_state, + other_update=db_nslcmop_update, + ) + if nslcmop_operation_state: try: await self.msg.aiowrite("ns", "actioned", {"nsr_id": nsr_id, "nslcmop_id": nslcmop_id, @@ -3238,7 +3304,7 @@ class NsLcm(LcmBase): self.logger.error(logging_text + "kafka_write notification Exception {}".format(e)) self.logger.debug(logging_text + "Exit") self.lcm_tasks.remove("ns", nsr_id, nslcmop_id, "ns_action") - return nslcmop_operation_state, nslcmop_operation_state_detail + return nslcmop_operation_state, detailed_status async def scale(self, nsr_id, nslcmop_id): @@ -3452,7 +3518,6 @@ class NsLcm(LcmBase): self._look_for_deployed_vca(nsr_deployed["VCA"], member_vnf_index=vnf_index, vdu_id=None, - vdu_name=None, vdu_count_index=None), vnf_config_primitive, primitive_params) self.logger.debug(logging_text + "vnf_config_primitive={} Done with result {} {}".format( @@ -3646,7 +3711,6 @@ class NsLcm(LcmBase): self._look_for_deployed_vca(nsr_deployed["VCA"], member_vnf_index=vnf_index, vdu_id=None, - vdu_name=None, vdu_count_index=None), vnf_config_primitive, primitive_params) self.logger.debug(logging_text + "vnf_config_primitive={} Done with result {} {}".format(