X-Git-Url: https://osm.etsi.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=osm_lcm%2Fns.py;h=87eaecd578b9b6f2fdf50d2b1343eb7a8f8a38c4;hb=refs%2Fchanges%2F95%2F8395%2F2;hp=c4488230f286bf1dcdb735bb0adec73e015c0f2b;hpb=a508819e00e382aca98e2ead4e8aad9cf6b8b950;p=osm%2FLCM.git diff --git a/osm_lcm/ns.py b/osm_lcm/ns.py index c448823..87eaecd 100644 --- a/osm_lcm/ns.py +++ b/osm_lcm/ns.py @@ -21,16 +21,19 @@ import yaml import logging import logging.handlers import traceback +import json from jinja2 import Environment, Template, meta, TemplateError, TemplateNotFound, TemplateSyntaxError from osm_lcm import ROclient -from osm_lcm.lcm_utils import LcmException, LcmExceptionNoMgmtIP, LcmBase +from osm_lcm.lcm_utils import LcmException, LcmExceptionNoMgmtIP, LcmBase, deep_get from n2vc.k8s_helm_conn import K8sHelmConnector +from n2vc.k8s_juju_conn import K8sJujuConnector from osm_common.dbbase import DbException from osm_common.fsbase import FsException from n2vc.n2vc_juju_conn import N2VCJujuConnector +from n2vc.exceptions import N2VCException from copy import copy, deepcopy from http import HTTPStatus @@ -68,21 +71,6 @@ def populate_dict(target_dict, key_list, value): target_dict[key_list[-1]] = value -def deep_get(target_dict, key_list): - """ - Get a value from target_dict entering in the nested keys. If keys does not exist, it returns None - Example target_dict={a: {b: 5}}; key_list=[a,b] returns 5; both key_list=[a,b,c] and key_list=[f,h] return None - :param target_dict: dictionary to be read - :param key_list: list of keys to read from target_dict - :return: The wanted value if exist, None otherwise - """ - for key in key_list: - if not isinstance(target_dict, dict) or key not in target_dict: - return None - target_dict = target_dict[key] - return target_dict - - class NsLcm(LcmBase): timeout_vca_on_error = 5 * 60 # Time for charm from first time at blocked,error status to mark as failed total_deploy_timeout = 2 * 3600 # global timeout for deployment @@ -114,6 +102,8 @@ class NsLcm(LcmBase): self.vca_config['public_key'] = self.vca_config['pubkey'] if 'cacert' in self.vca_config: self.vca_config['ca_cert'] = self.vca_config['cacert'] + if 'apiproxy' in self.vca_config: + self.vca_config['api_proxy'] = self.vca_config['apiproxy'] # create N2VC connector self.n2vc = N2VCJujuConnector( @@ -124,10 +114,9 @@ class NsLcm(LcmBase): url='{}:{}'.format(self.vca_config['host'], self.vca_config['port']), username=self.vca_config.get('user', None), vca_config=self.vca_config, - on_update_db=self._on_update_n2vc_db - # TODO - # New N2VC argument - # api_proxy=vca_config.get('apiproxy') + on_update_db=self._on_update_n2vc_db, + # ca_cert=self.vca_config.get('cacert'), + # api_proxy=self.vca_config.get('apiproxy'), ) self.k8sclusterhelm = K8sHelmConnector( @@ -139,6 +128,15 @@ class NsLcm(LcmBase): on_update_db=None, ) + self.k8sclusterjuju = K8sJujuConnector( + kubectl_command=self.vca_config.get("kubectlpath"), + juju_command=self.vca_config.get("jujupath"), + fs=self.fs, + log=self.logger, + db=self.db, + on_update_db=None, + ) + # create RO client self.RO = ROclient.ROClient(self.loop, **self.ro_config) @@ -458,8 +456,7 @@ class NsLcm(LcmBase): if isinstance(vld_params["ns-net"], dict): for vld_id, instance_scenario_id in vld_params["ns-net"].items(): RO_vld_ns_net = {"instance_scenario_id": instance_scenario_id, "osm_id": vld_id} - if RO_vld_ns_net: - populate_dict(RO_ns_params, ("networks", vld_params["name"], "use-network"), RO_vld_ns_net) + populate_dict(RO_ns_params, ("networks", vld_params["name"], "use-network"), RO_vld_ns_net) if "vnfd-connection-point-ref" in vld_params: for cp_params in vld_params["vnfd-connection-point-ref"]: # look for interface @@ -632,15 +629,16 @@ class NsLcm(LcmBase): else: raise LcmException("ns_update_vnfr: Not found member_vnf_index={} from VIM info".format(vnf_index)) - @staticmethod - def _get_ns_config_info(vca_deployed_list): + def _get_ns_config_info(self, nsr_id): """ Generates a mapping between vnf,vdu elements and the N2VC id - :param vca_deployed_list: List of database _admin.deploy.VCA that contains this list + :param nsr_id: id of nsr to get last database _admin.deployed.VCA that contains this list :return: a dictionary with {osm-config-mapping: {}} where its element contains: "": for a vnf configuration, or "..": for a vdu configuration """ + db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) + vca_deployed_list = db_nsr["_admin"]["deployed"]["VCA"] mapping = {} ns_config_info = {"osm-config-mapping": mapping} for vca in vca_deployed_list: @@ -885,9 +883,14 @@ class NsLcm(LcmBase): ip_address = None nb_tries = 0 target_vdu_id = None + ro_retries = 0 while True: + ro_retries += 1 + if ro_retries >= 360: # 1 hour + raise LcmException("Not found _admin.deployed.RO.nsr_id for nsr_id: {}".format(nsr_id)) + await asyncio.sleep(10, loop=self.loop) # wait until NS is deployed at RO if not ro_nsr_id: @@ -899,23 +902,29 @@ class NsLcm(LcmBase): # get ip address if not target_vdu_id: db_vnfr = self.db.get_one("vnfrs", {"_id": vnfr_id}) - if not vdu_id: + + if not vdu_id: # for the VNF case ip_address = db_vnfr.get("ip-address") if not ip_address: continue - for vdur in get_iterable(db_vnfr, "vdur"): - if (vdur["vdu-id-ref"] == vdu_id and vdur["count-index"] == vdu_index) or \ - (ip_address and vdur.get("ip-address") == ip_address): - if vdur["status"] == "ACTIVE": - target_vdu_id = vdur["vdu-id-ref"] - elif vdur["status"] == "ERROR": - raise LcmException("Cannot inject ssh-key because target VM is in error state") - break - else: + vdur = next((x for x in get_iterable(db_vnfr, "vdur") if x.get("ip-address") == ip_address), None) + else: # VDU case + vdur = next((x for x in get_iterable(db_vnfr, "vdur") + if x.get("vdu-id-ref") == vdu_id and x.get("count-index") == vdu_index), None) + + if not vdur: raise LcmException("Not found vnfr_id={}, vdu_index={}, vdu_index={}".format( vnfr_id, vdu_id, vdu_index )) + if vdur.get("status") == "ACTIVE": + ip_address = vdur.get("ip-address") + if not ip_address: + continue + target_vdu_id = vdur["vdu-id-ref"] + elif vdur.get("status") == "ERROR": + raise LcmException("Cannot inject ssh-key because target VM is in error state") + if not target_vdu_id: continue @@ -953,6 +962,33 @@ class NsLcm(LcmBase): return ip_address + async def _wait_dependent_n2vc(self, nsr_id, vca_deployed_list, vca_index): + """ + Wait until dependent VCA deployments have been finished. NS wait for VNFs and VDUs. VNFs for VDUs + """ + my_vca = vca_deployed_list[vca_index] + if my_vca.get("vdu_id") or my_vca.get("kdu_name"): + return + timeout = 300 + while timeout >= 0: + for index, vca_deployed in enumerate(vca_deployed_list): + if index == vca_index: + continue + if not my_vca.get("member-vnf-index") or \ + (vca_deployed.get("member-vnf-index") == my_vca.get("member-vnf-index")): + if not vca_deployed.get("instantiation"): + break # wait + if vca_deployed["instantiation"] == "FAILED": + raise LcmException("Configuration aborted because dependent charm/s has failed") + else: + return + await asyncio.sleep(10) + timeout -= 1 + db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) + vca_deployed_list = db_nsr["_admin"]["deployed"]["VCA"] + + raise LcmException("Configuration aborted because dependent charm/s timeout") + async def instantiate_N2VC(self, logging_text, vca_index, nsi_id, db_nsr, db_vnfr, vdu_id, kdu_name, vdu_index, config_descriptor, deploy_params, base_folder): nsr_id = db_nsr["_id"] @@ -964,9 +1000,6 @@ class NsLcm(LcmBase): 'filter': {'_id': nsr_id}, 'path': db_update_entry } - logging_text += "member_vnf_index={} vdu_id={}, vdu_index={} ".format(db_vnfr["member-vnf-index-ref"], - vdu_id, vdu_index) - step = "" try: vnfr_id = None @@ -982,7 +1015,7 @@ class NsLcm(LcmBase): namespace += ".{}-{}".format(vdu_id, vdu_index or 0) # Get artifact path - artifact_path = "/{}/{}/charms/{}".format( + artifact_path = "{}/{}/charms/{}".format( base_folder["folder"], base_folder["pkg-dir"], config_descriptor["juju"]["charm"] @@ -1001,35 +1034,35 @@ class NsLcm(LcmBase): if is_proxy_charm: step = "create execution environment" self.logger.debug(logging_text + step) - ee_id, credentials = await self.n2vc.create_execution_environment( - namespace=namespace, - reuse_ee_id=ee_id, - db_dict=db_dict - ) - + ee_id, credentials = await self.n2vc.create_execution_environment(namespace=namespace, + reuse_ee_id=ee_id, + db_dict=db_dict) else: - step = "register execution environment" - # TODO wait until deployed by RO, when IP address has been filled. By pooling???? - credentials = {} # TODO db_credentials["ip_address"] + step = "Waiting to VM being up and getting IP address" + self.logger.debug(logging_text + step) + rw_mgmt_ip = await self.wait_vm_up_insert_key_ro(logging_text, nsr_id, vnfr_id, vdu_id, vdu_index, + user=None, pub_key=None) + credentials = {"hostname": rw_mgmt_ip} # get username + username = deep_get(config_descriptor, ("config-access", "ssh-access", "default-user")) # TODO remove this when changes on IM regarding config-access:ssh-access:default-user were # merged. Meanwhile let's get username from initial-config-primitive - if config_descriptor.get("initial-config-primitive"): - for param in config_descriptor["initial-config-primitive"][0].get("parameter", ()): - if param["name"] == "ssh-username": - credentials["username"] = param["value"] - if config_descriptor.get("config-access") and config_descriptor["config-access"].get("ssh-access"): - if config_descriptor["config-access"]["ssh-access"].get("required"): - credentials["username"] = \ - config_descriptor["config-access"]["ssh-access"].get("default-user") - + if not username and config_descriptor.get("initial-config-primitive"): + for config_primitive in config_descriptor["initial-config-primitive"]: + for param in config_primitive.get("parameter", ()): + if param["name"] == "ssh-username": + username = param["value"] + break + if not username: + raise LcmException("Cannot determine the username neither with 'initial-config-promitive' nor with " + "'config-access.ssh-access.default-user'") + credentials["username"] = username # n2vc_redesign STEP 3.2 + + step = "register execution environment {}".format(credentials) self.logger.debug(logging_text + step) - ee_id = await self.n2vc.register_execution_environment( - credentials=credentials, - namespace=namespace, - db_dict=db_dict - ) + ee_id = await self.n2vc.register_execution_environment(credentials=credentials, namespace=namespace, + db_dict=db_dict) # for compatibility with MON/POL modules, the need model and application name at database # TODO ask to N2VC instead of assuming the format "model_name.application_name" @@ -1041,44 +1074,36 @@ class NsLcm(LcmBase): db_update_entry + "ee_id": ee_id}) # n2vc_redesign STEP 3.3 - # TODO check if already done + step = "Install configuration Software" + # TODO check if already done self.logger.debug(logging_text + step) - await self.n2vc.install_configuration_sw( - ee_id=ee_id, - artifact_path=artifact_path, - db_dict=db_dict - ) + await self.n2vc.install_configuration_sw(ee_id=ee_id, artifact_path=artifact_path, db_dict=db_dict) # if SSH access is required, then get execution environment SSH public - required = deep_get(config_descriptor, ("config-access", "ssh-access", "required")) - pub_key = None - user = None - if is_proxy_charm and required: - user = deep_get(config_descriptor, ("config-access", "ssh-access", "default-user")) - step = "Install configuration Software, getting public ssh key" - pub_key = await self.n2vc.get_ee_ssh_public__key( - ee_id=ee_id, - db_dict=db_dict - ) + if is_proxy_charm: # if native charm we have waited already to VM be UP + pub_key = None + user = None + if deep_get(config_descriptor, ("config-access", "ssh-access", "required")): + # Needed to inject a ssh key + user = deep_get(config_descriptor, ("config-access", "ssh-access", "default-user")) + step = "Install configuration Software, getting public ssh key" + pub_key = await self.n2vc.get_ee_ssh_public__key(ee_id=ee_id, db_dict=db_dict) + + step = "Insert public key into VM user={} ssh_key={}".format(user, pub_key) + else: + step = "Waiting to VM being up and getting IP address" + self.logger.debug(logging_text + step) - step = "Insert public key into VM" - else: - step = "Waiting to VM being up and getting IP address" - self.logger.debug(logging_text + step) + # n2vc_redesign STEP 5.1 + # wait for RO (ip-address) Insert pub_key into VM + if vnfr_id: + rw_mgmt_ip = await self.wait_vm_up_insert_key_ro(logging_text, nsr_id, vnfr_id, vdu_id, vdu_index, + user=user, pub_key=pub_key) + else: + rw_mgmt_ip = None # This is for a NS configuration - # n2vc_redesign STEP 5.1 - # wait for RO (ip-address) Insert pub_key into VM - rw_mgmt_ip = await self.wait_vm_up_insert_key_ro( - logging_text=logging_text, - nsr_id=nsr_id, - vnfr_id=vnfr_id, - vdu_id=vdu_id, - vdu_index=vdu_index, - user=user, - pub_key=pub_key - ) - self.logger.debug(logging_text + ' VM_ip_address={}'.format(rw_mgmt_ip)) + self.logger.debug(logging_text + ' VM_ip_address={}'.format(rw_mgmt_ip)) # store rw_mgmt_ip in deploy params for later replacement deploy_params["rw_mgmt_ip"] = rw_mgmt_ip @@ -1096,13 +1121,15 @@ class NsLcm(LcmBase): # add config if not present for NS charm initial_config_primitive_list = self._get_initial_config_primitive_list(initial_config_primitive_list, vca_deployed) - + if initial_config_primitive_list: + await self._wait_dependent_n2vc(nsr_id, vca_deployed_list, vca_index) for initial_config_primitive in initial_config_primitive_list: # adding information on the vca_deployed if it is a NS execution environment if not vca_deployed["member-vnf-index"]: - deploy_params["ns_config_info"] = self._get_ns_config_info(vca_deployed_list) + deploy_params["ns_config_info"] = json.dumps(self._get_ns_config_info(nsr_id)) # TODO check if already done primitive_params_ = self._map_primitive_params(initial_config_primitive, {}, deploy_params) + step = "execute primitive '{}' params '{}'".format(initial_config_primitive["name"], primitive_params_) self.logger.debug(logging_text + step) await self.n2vc.exec_primitive( @@ -1114,12 +1141,28 @@ class NsLcm(LcmBase): # TODO register in database that primitive is done step = "instantiated at VCA" + self.update_db_2("nsrs", nsr_id, {db_update_entry + "instantiation": "COMPLETED"}) self.logger.debug(logging_text + step) except Exception as e: # TODO not use Exception but N2VC exception + self.update_db_2("nsrs", nsr_id, {db_update_entry + "instantiation": "FAILED"}) raise Exception("{} {}".format(step, e)) from e # TODO raise N2VC exception with 'step' extra information + def _write_ns_status(self, nsr_id: str, ns_state: str, current_operation: str, current_operation_id: str, + error_description: str = None, error_detail: str = None): + try: + db_dict = dict() + if ns_state: + db_dict["nsState"] = ns_state + db_dict["currentOperation"] = current_operation + db_dict["currentOperationID"] = current_operation_id + db_dict["errorDescription"] = error_description + db_dict["errorDetail"] = error_detail + self.update_db_2("nsrs", nsr_id, db_dict) + except Exception as e: + self.logger.warn('Error writing NS status: {}'.format(e)) + async def instantiate(self, nsr_id, nslcmop_id): """ @@ -1158,6 +1201,7 @@ class NsLcm(LcmBase): db_vnfrs = {} # vnf's info indexed by member-index # n2vc_info = {} task_instantiation_list = [] + task_instantiation_info = {} # from task to info text exc = None try: # wait for any previous tasks in process @@ -1166,6 +1210,14 @@ class NsLcm(LcmBase): # STEP 0: Reading database (nslcmops, nsrs, nsds, vnfrs, vnfds) + # nsState="BUILDING", currentOperation="INSTANTIATING", currentOperationID=nslcmop_id + self._write_ns_status( + nsr_id=nsr_id, + ns_state="BUILDING", + current_operation="INSTANTIATING", + current_operation_id=nslcmop_id + ) + # read from db: operation step = "Getting nslcmop={} from db".format(nslcmop_id) db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id}) @@ -1230,25 +1282,21 @@ class NsLcm(LcmBase): db_nsr_update["_admin.nsState"] = "INSTANTIATED" self.update_db_2("nsrs", nsr_id, db_nsr_update) self.logger.debug(logging_text + "Before deploy_kdus") - db_k8scluster_list = self.db.get_list("k8sclusters", {}) # Call to deploy_kdus in case exists the "vdu:kdu" param task_kdu = asyncio.ensure_future( self.deploy_kdus( logging_text=logging_text, nsr_id=nsr_id, - nsd=nsd, db_nsr=db_nsr, - db_nslcmop=db_nslcmop, db_vnfrs=db_vnfrs, - db_vnfds_ref=db_vnfds_ref, - db_k8scluster=db_k8scluster_list ) ) self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "instantiate_KDUs", task_kdu) + task_instantiation_info[task_kdu] = "Deploy KDUs" task_instantiation_list.append(task_kdu) # n2vc_redesign STEP 1 Get VCA public ssh-key # feature 1429. Add n2vc public key to needed VMs - n2vc_key = await self.n2vc.get_public_key() + n2vc_key = self.n2vc.get_public_key() n2vc_key_list = [n2vc_key] if self.vca_config.get("public_key"): n2vc_key_list.append(self.vca_config["public_key"]) @@ -1267,6 +1315,7 @@ class NsLcm(LcmBase): ) ) self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "instantiate_RO", task_ro) + task_instantiation_info[task_ro] = "Deploy at VIM" task_instantiation_list.append(task_ro) # n2vc_redesign STEP 3 to 6 Deploy N2VC @@ -1289,15 +1338,12 @@ class NsLcm(LcmBase): # Get additional parameters deploy_params = {} if db_vnfr.get("additionalParamsForVnf"): - deploy_params = db_vnfr["additionalParamsForVnf"].copy() - for k, v in deploy_params.items(): - if isinstance(v, str) and v.startswith("!!yaml "): - deploy_params[k] = yaml.safe_load(v[7:]) + deploy_params = self._format_additional_params(db_vnfr["additionalParamsForVnf"].copy()) descriptor_config = vnfd.get("vnf-configuration") if descriptor_config and descriptor_config.get("juju"): self._deploy_n2vc( - logging_text=logging_text, + logging_text=logging_text + "member_vnf_index={} ".format(member_vnf_index), db_nsr=db_nsr, db_vnfr=db_vnfr, nslcmop_id=nslcmop_id, @@ -1312,13 +1358,19 @@ class NsLcm(LcmBase): deploy_params=deploy_params, descriptor_config=descriptor_config, base_folder=base_folder, - task_instantiation_list=task_instantiation_list + task_instantiation_list=task_instantiation_list, + task_instantiation_info=task_instantiation_info ) # Deploy charms for each VDU that supports one. for vdud in get_iterable(vnfd, 'vdu'): vdu_id = vdud["id"] descriptor_config = vdud.get('vdu-configuration') + vdur = next((x for x in db_vnfr["vdur"] if x["vdu-id-ref"] == vdu_id), None) + if vdur.get("additionalParams"): + deploy_params_vdu = self._format_additional_params(vdur["additionalParams"]) + else: + deploy_params_vdu = deploy_params if descriptor_config and descriptor_config.get("juju"): # look for vdu index in the db_vnfr["vdu"] section # for vdur_index, vdur in enumerate(db_vnfr["vdur"]): @@ -1333,7 +1385,8 @@ class NsLcm(LcmBase): for vdu_index in range(int(vdud.get("count", 1))): # TODO vnfr_params["rw_mgmt_ip"] = vdur["ip-address"] self._deploy_n2vc( - logging_text=logging_text, + logging_text=logging_text + "member_vnf_index={}, vdu_id={}, vdu_index={} ".format( + member_vnf_index, vdu_id, vdu_index), db_nsr=db_nsr, db_vnfr=db_vnfr, nslcmop_id=nslcmop_id, @@ -1345,10 +1398,11 @@ class NsLcm(LcmBase): member_vnf_index=member_vnf_index, vdu_index=vdu_index, vdu_name=vdu_name, - deploy_params=deploy_params, + deploy_params=deploy_params_vdu, descriptor_config=descriptor_config, base_folder=base_folder, - task_instantiation_list=task_instantiation_list + task_instantiation_list=task_instantiation_list, + task_instantiation_info=task_instantiation_info ) for kdud in get_iterable(vnfd, 'kdu'): kdu_name = kdud["name"] @@ -1383,7 +1437,8 @@ class NsLcm(LcmBase): deploy_params=deploy_params, descriptor_config=descriptor_config, base_folder=base_folder, - task_instantiation_list=task_instantiation_list + task_instantiation_list=task_instantiation_list, + task_instantiation_info=task_instantiation_info ) # Check if this NS has a charm configuration @@ -1400,10 +1455,7 @@ class NsLcm(LcmBase): # Get additional parameters deploy_params = {} if db_nsr.get("additionalParamsForNs"): - deploy_params = db_nsr["additionalParamsForNs"].copy() - for k, v in deploy_params.items(): - if isinstance(v, str) and v.startswith("!!yaml "): - deploy_params[k] = yaml.safe_load(v[7:]) + deploy_params = self._format_additional_params(db_nsr["additionalParamsForNs"].copy()) base_folder = nsd["_admin"]["storage"] self._deploy_n2vc( logging_text=logging_text, @@ -1421,33 +1473,64 @@ class NsLcm(LcmBase): deploy_params=deploy_params, descriptor_config=descriptor_config, base_folder=base_folder, - task_instantiation_list=task_instantiation_list + task_instantiation_list=task_instantiation_list, + task_instantiation_info=task_instantiation_info ) # Wait until all tasks of "task_instantiation_list" have been finished # while time() <= start_deploy + self.total_deploy_timeout: - error_text = None - timeout = 3600 # time() - start_deploy - task_instantiation_set = set(task_instantiation_list) # build a set with tasks - done = None - pending = None - if len(task_instantiation_set) > 0: - done, pending = await asyncio.wait(task_instantiation_set, timeout=timeout) - if pending: - error_text = "timeout" - for task in done: - if task.cancelled(): - if not error_text: - error_text = "cancelled" - elif task.done(): - exc = task.exception() - if exc: - error_text = str(exc) + error_text_list = [] + timeout = 3600 + + # let's begin with all OK + instantiated_ok = True + # let's begin with RO 'running' status (later we can change it) + db_nsr_update["operational-status"] = "running" + # let's begin with VCA 'configured' status (later we can change it) + db_nsr_update["config-status"] = "configured" + + if task_instantiation_list: + # wait for all tasks completion + done, pending = await asyncio.wait(task_instantiation_list, timeout=timeout) + + for task in pending: + instantiated_ok = False + if task == task_ro: + db_nsr_update["operational-status"] = "failed" + else: + db_nsr_update["config-status"] = "failed" + self.logger.error(logging_text + task_instantiation_info[task] + ": Timeout") + error_text_list.append(task_instantiation_info[task] + ": Timeout") + for task in done: + if task.cancelled(): + instantiated_ok = False + if task == task_ro: + db_nsr_update["operational-status"] = "failed" + else: + db_nsr_update["config-status"] = "failed" + self.logger.warn(logging_text + task_instantiation_info[task] + ": Cancelled") + error_text_list.append(task_instantiation_info[task] + ": Cancelled") + else: + exc = task.exception() + if exc: + instantiated_ok = False + if task == task_ro: + db_nsr_update["operational-status"] = "failed" + else: + db_nsr_update["config-status"] = "failed" + self.logger.error(logging_text + task_instantiation_info[task] + ": Failed") + if isinstance(exc, (N2VCException, ROclient.ROClientException)): + error_text_list.append(task_instantiation_info[task] + ": {}".format(exc)) + else: + exc_traceback = "".join(traceback.format_exception(None, exc, exc.__traceback__)) + self.logger.error(logging_text + task_instantiation_info[task] + exc_traceback) + error_text_list.append(task_instantiation_info[task] + ": " + exc_traceback) + else: + self.logger.debug(logging_text + task_instantiation_info[task] + ": Done") - if error_text: - db_nsr_update["config-status"] = "failed" - error_text = "fail configuring " + error_text + if error_text_list: + error_text = "\n".join(error_text_list) db_nsr_update["detailed-status"] = error_text db_nslcmop_update["operationState"] = nslcmop_operation_state = "FAILED_TEMP" db_nslcmop_update["detailed-status"] = error_text @@ -1457,7 +1540,6 @@ class NsLcm(LcmBase): db_nslcmop_update["operationState"] = nslcmop_operation_state = "COMPLETED" db_nslcmop_update["statusEnteredTime"] = time() db_nslcmop_update["detailed-status"] = "done" - db_nsr_update["config-status"] = "configured" db_nsr_update["detailed-status"] = "done" except (ROclient.ROClientException, DbException, LcmException) as e: @@ -1475,6 +1557,7 @@ class NsLcm(LcmBase): if db_nsr: db_nsr_update["detailed-status"] = "ERROR {}: {}".format(step, exc) db_nsr_update["operational-status"] = "failed" + db_nsr_update["config-status"] = "failed" if db_nslcmop: db_nslcmop_update["detailed-status"] = "FAILED {}: {}".format(step, exc) db_nslcmop_update["operationState"] = nslcmop_operation_state = "FAILED" @@ -1485,6 +1568,26 @@ class NsLcm(LcmBase): db_nsr_update["_admin.current-operation"] = None db_nsr_update["_admin.operation-type"] = None self.update_db_2("nsrs", nsr_id, db_nsr_update) + + # nsState="READY/BROKEN", currentOperation="IDLE", currentOperationID=None + ns_state = None + error_description = None + error_detail = None + if instantiated_ok: + ns_state = "READY" + else: + ns_state = "BROKEN" + error_description = 'Operation: INSTANTIATING.{}, step: {}'.format(nslcmop_id, step) + error_detail = error_text + self._write_ns_status( + nsr_id=nsr_id, + ns_state=ns_state, + current_operation="IDLE", + current_operation_id=None, + error_description=error_description, + error_detail=error_detail + ) + if db_nslcmop_update: self.update_db_2("nslcmops", nslcmop_id, db_nslcmop_update) except DbException as e: @@ -1500,97 +1603,102 @@ class NsLcm(LcmBase): self.logger.debug(logging_text + "Exit") self.lcm_tasks.remove("ns", nsr_id, nslcmop_id, "ns_instantiate") - async def deploy_kdus(self, logging_text, nsr_id, nsd, db_nsr, db_nslcmop, db_vnfrs, db_vnfds_ref, db_k8scluster): + async def deploy_kdus(self, logging_text, nsr_id, db_nsr, db_vnfrs): # Launch kdus if present in the descriptor - logging_text = "Deploy kdus: " - db_nsr_update = {} - db_nsr_update["_admin.deployed.K8s"] = [] + + k8scluster_id_2_uuic = {"helm-chart": {}, "juju-bundle": {}} + + def _get_cluster_id(cluster_id, cluster_type): + nonlocal k8scluster_id_2_uuic + if cluster_id in k8scluster_id_2_uuic[cluster_type]: + return k8scluster_id_2_uuic[cluster_type][cluster_id] + + db_k8scluster = self.db.get_one("k8sclusters", {"_id": cluster_id}, fail_on_empty=False) + if not db_k8scluster: + raise LcmException("K8s cluster {} cannot be found".format(cluster_id)) + k8s_id = deep_get(db_k8scluster, ("_admin", cluster_type, "id")) + if not k8s_id: + raise LcmException("K8s cluster '{}' has not been initilized for '{}'".format(cluster_id, cluster_type)) + k8scluster_id_2_uuic[cluster_type][cluster_id] = k8s_id + return k8s_id + + logging_text += "Deploy kdus: " try: - # Look for all vnfds - # db_nsr_update["_admin.deployed.K8s"] = [] - vnf_update = [] - task_list = [] - for c_vnf in nsd.get("constituent-vnfd", ()): - vnfr = db_vnfrs[c_vnf["member-vnf-index"]] - member_vnf_index = c_vnf["member-vnf-index"] - vnfd = db_vnfds_ref[c_vnf['vnfd-id-ref']] - vnfd_ref = vnfd["id"] - desc_params = {} + db_nsr_update = {"_admin.deployed.K8s": []} + self.update_db_2("nsrs", nsr_id, db_nsr_update) - step = "Checking kdu from vnf: {} - member-vnf-index: {}".format(vnfd_ref, member_vnf_index) - self.logger.debug(logging_text + step) - if vnfd.get("kdu"): - step = "vnf: {} has kdus".format(vnfd_ref) - self.logger.debug(logging_text + step) - for vnfr_name, vnfr_data in db_vnfrs.items(): - if vnfr_data["vnfd-ref"] == vnfd["id"]: - if vnfr_data.get("additionalParamsForVnf"): - desc_params = self._format_additional_params(vnfr_data["additionalParamsForVnf"]) - break + # Look for all vnfds + pending_tasks = {} + index = 0 + for vnfr_data in db_vnfrs.values(): + for kdur in get_iterable(vnfr_data, "kdur"): + desc_params = self._format_additional_params(kdur.get("additionalParams")) + kdumodel = None + k8sclustertype = None + error_text = None + cluster_uuid = None + if kdur.get("helm-chart"): + kdumodel = kdur["helm-chart"] + k8sclustertype = "chart" + k8sclustertype_full = "helm-chart" + elif kdur.get("juju-bundle"): + kdumodel = kdur["juju-bundle"] + k8sclustertype = "juju" + k8sclustertype_full = "juju-bundle" else: - raise LcmException("VNF descriptor not found with id: {}".format(vnfr_data["vnfd-ref"])) - self.logger.debug(logging_text + step) + error_text = "kdu type is neither helm-chart not juju-bundle. Maybe an old NBI version is" \ + " running" + try: + if not error_text: + cluster_uuid = _get_cluster_id(kdur["k8s-cluster"]["id"], k8sclustertype_full) + except LcmException as e: + error_text = str(e) + step = "Instantiate KDU {} in k8s cluster {}".format(kdur["kdu-name"], cluster_uuid) + + k8s_instace_info = {"kdu-instance": None, "k8scluster-uuid": cluster_uuid, + "k8scluster-type": k8sclustertype, + "kdu-name": kdur["kdu-name"], "kdu-model": kdumodel} + if error_text: + k8s_instace_info["detailed-status"] = error_text + db_nsr_update["_admin.deployed.K8s.{}".format(index)] = k8s_instace_info + self.update_db_2("nsrs", nsr_id, db_nsr_update) + if error_text: + continue - for kdur in vnfr.get("kdur"): - index = 0 - for k8scluster in db_k8scluster: - if kdur["k8s-cluster"]["id"] == k8scluster["_id"]: - cluster_uuid = k8scluster["cluster-uuid"] - break - else: - raise LcmException("K8scluster not found with id: {}".format(kdur["k8s-cluster"]["id"])) - self.logger.debug(logging_text + step) + db_dict = {"collection": "nsrs", "filter": {"_id": nsr_id}, "path": "_admin.deployed.K8s." + "{}".format(index)} + if k8sclustertype == "chart": + task = asyncio.ensure_future( + self.k8sclusterhelm.install(cluster_uuid=cluster_uuid, kdu_model=kdumodel, atomic=True, + params=desc_params, db_dict=db_dict, timeout=3600) + ) + else: + task = asyncio.ensure_future( + self.k8sclusterjuju.install(cluster_uuid=cluster_uuid, kdu_model=kdumodel, + atomic=True, params=desc_params, + db_dict=db_dict, timeout=600) + ) - step = "Instantiate KDU {} in k8s cluster {}".format(kdur["kdu-name"], cluster_uuid) - self.logger.debug(logging_text + step) - for kdu in vnfd.get("kdu"): - if kdu.get("name") == kdur["kdu-name"]: - break - else: - raise LcmException("KDU not found with name: {} in VNFD {}".format(kdur["kdu-name"], - vnfd["name"])) - self.logger.debug(logging_text + step) - kdumodel = None - k8sclustertype = None - if kdu.get("helm-chart"): - kdumodel = kdu["helm-chart"] - k8sclustertype = "chart" - elif kdu.get("juju-bundle"): - kdumodel = kdu["juju-bundle"] - k8sclustertype = "juju" - k8s_instace_info = {"kdu-instance": None, "k8scluster-uuid": cluster_uuid, - "vnfr-id": vnfr["id"], "k8scluster-type": k8sclustertype, - "kdu-name": kdur["kdu-name"], "kdu-model": kdumodel} - db_nsr_update["_admin.deployed.K8s"].append(k8s_instace_info) - db_dict = {"collection": "nsrs", "filter": {"_id": nsr_id}, "path": "_admin.deployed.K8s." - "{}".format(index)} - if k8sclustertype == "chart": - task = self.k8sclusterhelm.install(cluster_uuid=cluster_uuid, kdu_model=kdumodel, - atomic=True, params=desc_params, - db_dict=db_dict, timeout=300) - else: - # TODO I need the juju connector in place - pass - task_list.append(task) - index += 1 - self.update_db_2("nsrs", nsr_id, db_nsr_update) - done = None - pending = None - if len(task_list) > 0: - self.logger.debug('Waiting for terminate pending tasks...') - done, pending = await asyncio.wait(task_list, timeout=3600) - if not pending: - for fut in done: - k8s_instance = fut.result() - k8s_instace_info = {"kdu-instance": k8s_instance, "k8scluster-uuid": cluster_uuid, - "vnfr-id": vnfr["id"], "k8scluster-type": k8sclustertype, - "kdu-name": kdur["kdu-name"], "kdu-model": kdumodel} - vnf_update.append(k8s_instace_info) - self.logger.debug('All tasks finished...') - else: - self.logger.info('There are pending tasks: {}'.format(pending)) + pending_tasks[task] = "_admin.deployed.K8s.{}.".format(index) + index += 1 + if not pending_tasks: + return + self.logger.debug(logging_text + 'Waiting for terminate pending tasks...') + pending_list = list(pending_tasks.keys()) + while pending_list: + done_list, pending_list = await asyncio.wait(pending_list, timeout=30*60, + return_when=asyncio.FIRST_COMPLETED) + if not done_list: # timeout + for task in pending_list: + db_nsr_update[pending_tasks(task) + "detailed-status"] = "Timeout" + break + for task in done_list: + exc = task.exception() + if exc: + db_nsr_update[pending_tasks[task] + "detailed-status"] = "{}".format(exc) + else: + db_nsr_update[pending_tasks[task] + "kdu-instance"] = task.result() - db_nsr_update["_admin.deployed.K8s"] = vnf_update except Exception as e: self.logger.critical(logging_text + "Exit Exception {} while '{}': {}".format(type(e).__name__, step, e)) raise LcmException("{} Exit Exception {} while '{}': {}".format(logging_text, type(e).__name__, step, e)) @@ -1601,7 +1709,7 @@ class NsLcm(LcmBase): def _deploy_n2vc(self, logging_text, db_nsr, db_vnfr, nslcmop_id, nsr_id, nsi_id, vnfd_id, vdu_id, kdu_name, member_vnf_index, vdu_index, vdu_name, deploy_params, descriptor_config, - base_folder, task_instantiation_list): + base_folder, task_instantiation_list, task_instantiation_info): # launch instantiate_N2VC in a asyncio task and register task object # Look where information of this charm is at database ._admin.deployed.VCA # if not found, create one entry and update database @@ -1650,6 +1758,7 @@ class NsLcm(LcmBase): ) ) self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "instantiate_N2VC-{}".format(vca_index), task_n2vc) + task_instantiation_info[task_n2vc] = "Deploy VCA {}.{}".format(member_vnf_index or "", vdu_id or "") task_instantiation_list.append(task_n2vc) # Check if this VNFD has a configured terminate action @@ -1705,11 +1814,10 @@ class NsLcm(LcmBase): return nslcmop def _format_additional_params(self, params): - + params = params or {} for key, value in params.items(): if str(value).startswith("!!yaml "): params[key] = yaml.safe_load(value[7:]) - return params def _get_terminate_primitive_params(self, seq, vnf_index): @@ -1870,6 +1978,13 @@ class NsLcm(LcmBase): # or op_index (operationState != 'COMPLETED') return self._reintent_or_skip_suboperation(db_nslcmop, op_index) + # Function to return execution_environment id + + def _get_ee_id(self, vnf_index, vdu_id, vca_deployed_list): + for vca in vca_deployed_list: + if vca["member-vnf-index"] == vnf_index and vca["vdu_id"] == vdu_id: + return vca["ee_id"] + # Helper methods for terminate() async def _terminate_action(self, db_nslcmop, nslcmop_id, nsr_id): @@ -1877,6 +1992,8 @@ class NsLcm(LcmBase): Called from terminate() before deleting instance Calls action() to execute the primitive """ logging_text = "Task ns={} _terminate_action={} ".format(nsr_id, nslcmop_id) + db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) + vca_deployed_list = db_nsr["_admin"]["deployed"]["VCA"] db_vnfrs_list = self.db.get_list("vnfrs", {"nsr-id-ref": nsr_id}) db_vnfds = {} # Loop over VNFRs @@ -1928,8 +2045,7 @@ class NsLcm(LcmBase): # " primitive={} fails with error {}".format( # vnf_index, seq.get("name"), result_detail)) - # TODO: find ee_id - ee_id = None + ee_id = self._get_ee_id(vnf_index, vdu_id, vca_deployed_list) try: await self.n2vc.exec_primitive( ee_id=ee_id, @@ -1969,6 +2085,13 @@ class NsLcm(LcmBase): step = "Waiting for previous operations to terminate" await self.lcm_tasks.waitfor_related_HA("ns", 'nslcmops', nslcmop_id) + self._write_ns_status( + nsr_id=nsr_id, + ns_state="TERMINATING", + current_operation="TERMINATING", + current_operation_id=nslcmop_id + ) + step = "Getting nslcmop={} from db".format(nslcmop_id) db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id}) step = "Getting nsr={} from db".format(nsr_id) @@ -2003,19 +2126,24 @@ class NsLcm(LcmBase): # Delete from k8scluster step = "delete kdus" self.logger.debug(logging_text + step) - print(nsr_deployed) + # print(nsr_deployed) if nsr_deployed: - for kdu in nsr_deployed.get("K8s"): + for kdu in nsr_deployed.get("K8s", ()): + kdu_instance = kdu.get("kdu-instance") + if not kdu_instance: + continue if kdu.get("k8scluster-type") == "chart": - task_delete_kdu_instance = asyncio.ensure_future(self.k8sclusterhelm.uninstall( - cluster_uuid=kdu.get("k8scluster-uuid"), kdu_instance=kdu.get("kdu-instance"))) + task_delete_kdu_instance = asyncio.ensure_future( + self.k8sclusterhelm.uninstall(cluster_uuid=kdu.get("k8scluster-uuid"), + kdu_instance=kdu_instance)) elif kdu.get("k8scluster-type") == "juju": - # TODO Juju connector needed - pass + task_delete_kdu_instance = asyncio.ensure_future( + self.k8sclusterjuju.uninstall(cluster_uuid=kdu.get("k8scluster-uuid"), + kdu_instance=kdu_instance)) else: - msg = "k8scluster-type not defined" - raise LcmException(msg) - + self.error(logging_text + "Unknown k8s deployment type {}". + format(kdu.get("k8scluster-type"))) + continue pending_tasks.append(task_delete_kdu_instance) except LcmException as e: msg = "Failed while deleting KDUs from NS: {}".format(e) @@ -2138,6 +2266,7 @@ class NsLcm(LcmBase): self.logger.error(logging_text + failed_detail[-1]) if failed_detail: + terminate_ok = False self.logger.error(logging_text + " ;".join(failed_detail)) db_nsr_update["operational-status"] = "failed" db_nsr_update["detailed-status"] = "Deletion errors " + "; ".join(failed_detail) @@ -2145,6 +2274,7 @@ class NsLcm(LcmBase): db_nslcmop_update["operationState"] = nslcmop_operation_state = "FAILED" db_nslcmop_update["statusEnteredTime"] = time() else: + terminate_ok = True db_nsr_update["operational-status"] = "terminated" db_nsr_update["detailed-status"] = "Done" db_nsr_update["_admin.nsState"] = "NOT_INSTANTIATED" @@ -2176,6 +2306,25 @@ class NsLcm(LcmBase): db_nsr_update["_admin.current-operation"] = None db_nsr_update["_admin.operation-type"] = None self.update_db_2("nsrs", nsr_id, db_nsr_update) + + if terminate_ok: + ns_state = "IDLE" + error_description = None + error_detail = None + else: + ns_state = "BROKEN" + error_description = 'Operation: TERMINATING.{}, step: {}'.format(nslcmop_id, step) + error_detail = "; ".join(failed_detail) + + self._write_ns_status( + nsr_id=nsr_id, + ns_state=ns_state, + current_operation="IDLE", + current_operation_id=None, + error_description=error_description, + error_detail=error_detail + ) + except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) if nslcmop_operation_state: @@ -2322,6 +2471,13 @@ class NsLcm(LcmBase): step = "Waiting for previous operations to terminate" await self.lcm_tasks.waitfor_related_HA('ns', 'nslcmops', nslcmop_id) + self._write_ns_status( + nsr_id=nsr_id, + ns_state=None, + current_operation="RUNNING ACTION", + current_operation_id=nslcmop_id + ) + step = "Getting information from database" db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id}) db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) @@ -2365,14 +2521,10 @@ class NsLcm(LcmBase): break elif kdu_name: self.logger.debug(logging_text + "Checking actions in KDUs") - desc_params = {} - if vnf_index: - if db_vnfr.get("additionalParamsForVnf") and db_vnfr["additionalParamsForVnf"].\ - get("member-vnf-index") == vnf_index: - desc_params = self._format_additional_params(db_vnfr["additionalParamsForVnf"]. - get("additionalParams")) - if primitive_params: - desc_params.update(primitive_params) + kdur = next((x for x in db_vnfr["kdur"] if x["kdu-name"] == kdu_name), None) + desc_params = self._format_additional_params(kdur.get("additionalParams")) or {} + if primitive_params: + desc_params.update(primitive_params) # TODO Check if we will need something at vnf level index = 0 for kdu in get_iterable(nsr_deployed, "K8s"): @@ -2396,8 +2548,12 @@ class NsLcm(LcmBase): params=desc_params, db_dict=db_dict, timeout=300) elif kdu.get("k8scluster-type") == "juju": - # TODO Juju connector needed - pass + output = await self.k8sclusterjuju.upgrade(cluster_uuid=kdu.get("k8scluster-uuid"), + kdu_instance=kdu.get("kdu-instance"), + atomic=True, kdu_model=kdu_model, + params=desc_params, db_dict=db_dict, + timeout=300) + else: msg = "k8scluster-type not defined" raise LcmException(msg) @@ -2410,8 +2566,9 @@ class NsLcm(LcmBase): kdu_instance=kdu.get("kdu-instance"), db_dict=db_dict) elif kdu.get("k8scluster-type") == "juju": - # TODO Juju connector needed - pass + output = await self.k8sclusterjuju.rollback(cluster_uuid=kdu.get("k8scluster-uuid"), + kdu_instance=kdu.get("kdu-instance"), + db_dict=db_dict) else: msg = "k8scluster-type not defined" raise LcmException(msg) @@ -2421,8 +2578,8 @@ class NsLcm(LcmBase): output = await self.k8sclusterhelm.status_kdu(cluster_uuid=kdu.get("k8scluster-uuid"), kdu_instance=kdu.get("kdu-instance")) elif kdu.get("k8scluster-type") == "juju": - # TODO Juju connector needed - pass + output = await self.k8sclusterjuju.status_kdu(cluster_uuid=kdu.get("k8scluster-uuid"), + kdu_instance=kdu.get("kdu-instance")) else: msg = "k8scluster-type not defined" raise LcmException(msg) @@ -2458,10 +2615,14 @@ class NsLcm(LcmBase): desc_params = {} if vnf_index: if db_vnfr.get("additionalParamsForVnf"): - desc_params.update(db_vnfr["additionalParamsForVnf"]) + desc_params = self._format_additional_params(db_vnfr["additionalParamsForVnf"]) + if vdu_id: + vdur = next((x for x in db_vnfr["vdur"] if x["vdu-id-ref"] == vdu_id), None) + if vdur.get("additionalParams"): + desc_params = self._format_additional_params(vdur["additionalParams"]) else: if db_nsr.get("additionalParamsForNs"): - desc_params.update(db_nsr["additionalParamsForNs"]) + desc_params.update(self._format_additional_params(db_nsr["additionalParamsForNs"])) # TODO check if ns is in a proper status output, detail = await self._ns_execute_primitive( @@ -2509,6 +2670,12 @@ class NsLcm(LcmBase): db_nsr_update["_admin.nslcmop"] = None db_nsr_update["_admin.current-operation"] = None self.update_db_2("nsrs", nsr_id, db_nsr_update) + self._write_ns_status( + nsr_id=nsr_id, + ns_state=None, + current_operation="IDLE", + current_operation_id=None + ) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.logger.debug(logging_text + "Exit") @@ -2552,6 +2719,13 @@ class NsLcm(LcmBase): step = "Waiting for previous operations to terminate" await self.lcm_tasks.waitfor_related_HA('ns', 'nslcmops', nslcmop_id) + self._write_ns_status( + nsr_id=nsr_id, + ns_state=None, + current_operation="SCALING", + current_operation_id=nslcmop_id + ) + step = "Getting nslcmop from database" self.logger.debug(step + " after having waited for previous tasks to be completed") db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id}) @@ -2886,7 +3060,7 @@ class NsLcm(LcmBase): # Post-scale reintent check: Check if this sub-operation has been executed before op_index = self._check_or_add_scale_suboperation( db_nslcmop, nslcmop_id, vnf_index, vnf_config_primitive, primitive_params, 'POST-SCALE') - if (op_index == self.SUBOPERATION_STATUS_SKIP): + if op_index == self.SUBOPERATION_STATUS_SKIP: # Skip sub-operation result = 'COMPLETED' result_detail = 'Done' @@ -2894,7 +3068,7 @@ class NsLcm(LcmBase): "vnf_config_primitive={} Skipped sub-operation, result {} {}". format(vnf_config_primitive, result, result_detail)) else: - if (op_index == self.SUBOPERATION_STATUS_NEW): + if op_index == self.SUBOPERATION_STATUS_NEW: # New sub-operation: Get index of this sub-operation op_index = len(db_nslcmop.get('_admin', {}).get('operations')) - 1 self.logger.debug(logging_text + "vnf_config_primitive={} New sub-operation". @@ -2965,6 +3139,14 @@ class NsLcm(LcmBase): db_nsr_update["_admin.operation-type"] = None db_nsr_update["_admin.nslcmop"] = None self.update_db_2("nsrs", nsr_id, db_nsr_update) + + self._write_ns_status( + nsr_id=nsr_id, + ns_state=None, + current_operation="IDLE", + current_operation_id=None + ) + except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) if nslcmop_operation_state: