X-Git-Url: https://osm.etsi.org/gitweb/?a=blobdiff_plain;f=osm_lcm%2Fns.py;h=1504fb106c65f4a393c88f8c325858dbbba0d921;hb=72d90d9217ee99b8e05cfe345f59db28f6d54d93;hp=6a764f03ecf3beac475c0125b0f83ab328dd301b;hpb=aa366ed3f6c9133629ac10682d006cd908065784;p=osm%2FLCM.git diff --git a/osm_lcm/ns.py b/osm_lcm/ns.py index 6a764f0..1504fb1 100644 --- a/osm_lcm/ns.py +++ b/osm_lcm/ns.py @@ -23,6 +23,7 @@ import yaml import logging import logging.handlers import traceback +import ipaddress import json from jinja2 import ( Environment, @@ -34,6 +35,7 @@ from jinja2 import ( ) from osm_lcm import ROclient +from osm_lcm.data_utils.lcm_config import LcmCfg from osm_lcm.data_utils.nsr import ( get_deployed_kdu, get_deployed_vca, @@ -52,13 +54,14 @@ from osm_lcm.data_utils.vca import ( from osm_lcm.ng_ro import NgRoClient, NgRoException from osm_lcm.lcm_utils import ( LcmException, - LcmExceptionNoMgmtIP, LcmBase, deep_get, get_iterable, populate_dict, check_juju_bundle_existence, get_charm_artifact_path, + get_ee_id_parts, + vld_to_ro_ip_profile, ) from osm_lcm.data_utils.nsd import ( get_ns_configuration_relation_list, @@ -83,6 +86,7 @@ from osm_lcm.data_utils.vnfd import ( get_juju_ee_ref, get_kdu_resource_profile, find_software_version, + check_helm_ee_in_ns, ) from osm_lcm.data_utils.list_utils import find_in_list from osm_lcm.data_utils.vnfr import ( @@ -94,7 +98,6 @@ from osm_lcm.data_utils.vnfr import ( from osm_lcm.data_utils.dict_utils import parse_yaml_strings from osm_lcm.data_utils.database.vim_account import VimAccountDB from n2vc.definitions import RelationEndpoint -from n2vc.k8s_helm_conn import K8sHelmConnector from n2vc.k8s_helm3_conn import K8sHelm3Connector from n2vc.k8s_juju_conn import K8sJujuConnector @@ -103,6 +106,11 @@ from osm_common.fsbase import FsException from osm_lcm.data_utils.database.database import Database from osm_lcm.data_utils.filesystem.filesystem import Filesystem +from osm_lcm.data_utils.wim import ( + get_sdn_ports, + get_target_wim_attrs, + select_feasible_wim_account, +) from n2vc.n2vc_juju_conn import N2VCJujuConnector from n2vc.exceptions import N2VCException, N2VCNotFound, K8sException @@ -115,33 +123,27 @@ from copy import copy, deepcopy from time import time from uuid import uuid4 -from random import randint +from random import SystemRandom __author__ = "Alfonso Tierno " class NsLcm(LcmBase): - timeout_vca_on_error = ( - 5 * 60 - ) # Time for charm from first time at blocked,error status to mark as failed - timeout_ns_deploy = 2 * 3600 # default global timeout for deployment a ns - timeout_ns_terminate = 1800 # default global timeout for un deployment a ns - timeout_ns_heal = 1800 # default global timeout for un deployment a ns - timeout_charm_delete = 10 * 60 - timeout_primitive = 30 * 60 # timeout for primitive execution - timeout_ns_update = 30 * 60 # timeout for ns update - timeout_progress_primitive = ( - 10 * 60 - ) # timeout for some progress in a primitive execution - timeout_migrate = 1800 # default global timeout for migrating vnfs - timeout_operate = 1800 # default global timeout for migrating vnfs - timeout_verticalscale = 1800 # default global timeout for Vertical Sclaing SUBOPERATION_STATUS_NOT_FOUND = -1 SUBOPERATION_STATUS_NEW = -2 SUBOPERATION_STATUS_SKIP = -3 + EE_TLS_NAME = "ee-tls" task_name_deploy_vca = "Deploying VCA" - - def __init__(self, msg, lcm_tasks, config, loop): + rel_operation_types = { + "GE": ">=", + "LE": "<=", + "GT": ">", + "LT": "<", + "EQ": "==", + "NE": "!=", + } + + def __init__(self, msg, lcm_tasks, config: LcmCfg): """ Init, Connect to database, filesystem storage, and messaging :param config: two level dictionary with configuration. Top level should contain 'database', 'storage', @@ -151,17 +153,14 @@ class NsLcm(LcmBase): self.db = Database().instance.db self.fs = Filesystem().instance.fs - self.loop = loop self.lcm_tasks = lcm_tasks - self.timeout = config["timeout"] - self.ro_config = config["ro_config"] - self.ng_ro = config["ro_config"].get("ng") - self.vca_config = config["VCA"].copy() + self.timeout = config.timeout + self.ro_config = config.RO + self.vca_config = config.VCA # create N2VC connector self.n2vc = N2VCJujuConnector( log=self.logger, - loop=self.loop, on_update_db=self._on_update_n2vc_db, fs=self.fs, db=self.db, @@ -169,23 +168,13 @@ class NsLcm(LcmBase): self.conn_helm_ee = LCMHelmConn( log=self.logger, - loop=self.loop, vca_config=self.vca_config, on_update_db=self._on_update_n2vc_db, ) - self.k8sclusterhelm2 = K8sHelmConnector( - kubectl_command=self.vca_config.get("kubectlpath"), - helm_command=self.vca_config.get("helmpath"), - log=self.logger, - on_update_db=None, - fs=self.fs, - db=self.db, - ) - self.k8sclusterhelm3 = K8sHelm3Connector( - kubectl_command=self.vca_config.get("kubectlpath"), - helm_command=self.vca_config.get("helm3path"), + kubectl_command=self.vca_config.kubectlpath, + helm_command=self.vca_config.helm3path, fs=self.fs, log=self.logger, db=self.db, @@ -193,17 +182,15 @@ class NsLcm(LcmBase): ) self.k8sclusterjuju = K8sJujuConnector( - kubectl_command=self.vca_config.get("kubectlpath"), - juju_command=self.vca_config.get("jujupath"), + kubectl_command=self.vca_config.kubectlpath, + juju_command=self.vca_config.jujupath, log=self.logger, - loop=self.loop, on_update_db=self._on_update_k8s_db, fs=self.fs, db=self.db, ) self.k8scluster_map = { - "helm-chart": self.k8sclusterhelm2, "helm-chart-v3": self.k8sclusterhelm3, "chart": self.k8sclusterhelm3, "juju-bundle": self.k8sclusterjuju, @@ -219,7 +206,7 @@ class NsLcm(LcmBase): } # create RO client - self.RO = NgRoClient(self.loop, **self.ro_config) + self.RO = NgRoClient(**self.ro_config.to_dict()) self.op_status_map = { "instantiation": self.RO.status, @@ -235,6 +222,18 @@ class NsLcm(LcmBase): if not isinstance(ip_mac, str): return ip_mac try: + next_ipv6 = None + next_ipv4 = None + dual_ip = ip_mac.split(";") + if len(dual_ip) == 2: + for ip in dual_ip: + if ipaddress.ip_address(ip).version == 6: + ipv6 = ipaddress.IPv6Address(ip) + next_ipv6 = str(ipaddress.IPv6Address(int(ipv6) + 1)) + elif ipaddress.ip_address(ip).version == 4: + ipv4 = ipaddress.IPv4Address(ip) + next_ipv4 = str(ipaddress.IPv4Address(int(ipv4) + 1)) + return [next_ipv4, next_ipv6] # try with ipv4 look for last dot i = ip_mac.rfind(".") if i > 0: @@ -252,26 +251,7 @@ class NsLcm(LcmBase): pass return None - def _on_update_ro_db(self, nsrs_id, ro_descriptor): - - # self.logger.debug('_on_update_ro_db(nsrs_id={}'.format(nsrs_id)) - - try: - # TODO filter RO descriptor fields... - - # write to database - db_dict = dict() - # db_dict['deploymentStatus'] = yaml.dump(ro_descriptor, default_flow_style=False, indent=2) - db_dict["deploymentStatus"] = ro_descriptor - self.update_db_2("nsrs", nsrs_id, db_dict) - - except Exception as e: - self.logger.warn( - "Cannot write database RO deployment for ns={} -> {}".format(nsrs_id, e) - ) - async def _on_update_n2vc_db(self, table, filter, path, updated_data, vca_id=None): - # remove last dot from path (if exists) if path.endswith("."): path = path[:-1] @@ -279,7 +259,6 @@ class NsLcm(LcmBase): # self.logger.debug('_on_update_n2vc_db(table={}, filter={}, path={}, updated_data={}' # .format(table, filter, path, updated_data)) try: - nsr_id = filter.get("_id") # read ns record from database @@ -470,32 +449,6 @@ class NsLcm(LcmBase): additional_params = vdur.get("additionalParams") return parse_yaml_strings(additional_params) - def vnfd2RO(self, vnfd, new_id=None, additionalParams=None, nsrId=None): - """ - Converts creates a new vnfd descriptor for RO base on input OSM IM vnfd - :param vnfd: input vnfd - :param new_id: overrides vnf id if provided - :param additionalParams: Instantiation params for VNFs provided - :param nsrId: Id of the NSR - :return: copy of vnfd - """ - vnfd_RO = deepcopy(vnfd) - # remove unused by RO configuration, monitoring, scaling and internal keys - vnfd_RO.pop("_id", None) - vnfd_RO.pop("_admin", None) - vnfd_RO.pop("monitoring-param", None) - vnfd_RO.pop("scaling-group-descriptor", None) - vnfd_RO.pop("kdu", None) - vnfd_RO.pop("k8s-cluster", None) - if new_id: - vnfd_RO["id"] = new_id - - # parse cloud-init or cloud-init-file with the provided variables using Jinja2 - for vdu in get_iterable(vnfd_RO, "vdu"): - vdu.pop("cloud-init-file", None) - vdu.pop("cloud-init", None) - return vnfd_RO - @staticmethod def ip_profile_2_RO(ip_profile): RO_ip_profile = deepcopy(ip_profile) @@ -514,33 +467,7 @@ class NsLcm(LcmBase): RO_ip_profile["dhcp"] = RO_ip_profile.pop("dhcp-params") return RO_ip_profile - def _get_ro_vim_id_for_vim_account(self, vim_account): - db_vim = self.db.get_one("vim_accounts", {"_id": vim_account}) - if db_vim["_admin"]["operationalState"] != "ENABLED": - raise LcmException( - "VIM={} is not available. operationalState={}".format( - vim_account, db_vim["_admin"]["operationalState"] - ) - ) - RO_vim_id = db_vim["_admin"]["deployed"]["RO"] - return RO_vim_id - - def get_ro_wim_id_for_wim_account(self, wim_account): - if isinstance(wim_account, str): - db_wim = self.db.get_one("wim_accounts", {"_id": wim_account}) - if db_wim["_admin"]["operationalState"] != "ENABLED": - raise LcmException( - "WIM={} is not available. operationalState={}".format( - wim_account, db_wim["_admin"]["operationalState"] - ) - ) - RO_wim_id = db_wim["_admin"]["deployed"]["RO-account"] - return RO_wim_id - else: - return wim_account - def scale_vnfr(self, db_vnfr, vdu_create=None, vdu_delete=None, mark_delete=False): - db_vdu_push_list = [] template_vdur = [] db_update = {"_admin.modified": time()} @@ -692,103 +619,6 @@ class NsLcm(LcmBase): except DbException as e: self.logger.error("Cannot update vnf. {}".format(e)) - def ns_update_vnfr(self, db_vnfrs, nsr_desc_RO): - """ - Updates database vnfr with the RO info, e.g. ip_address, vim_id... Descriptor db_vnfrs is also updated - :param db_vnfrs: dictionary with member-vnf-index: vnfr-content - :param nsr_desc_RO: nsr descriptor from RO - :return: Nothing, LcmException is raised on errors - """ - for vnf_index, db_vnfr in db_vnfrs.items(): - for vnf_RO in nsr_desc_RO["vnfs"]: - if vnf_RO["member_vnf_index"] != vnf_index: - continue - vnfr_update = {} - if vnf_RO.get("ip_address"): - db_vnfr["ip-address"] = vnfr_update["ip-address"] = vnf_RO[ - "ip_address" - ].split(";")[0] - elif not db_vnfr.get("ip-address"): - if db_vnfr.get("vdur"): # if not VDUs, there is not ip_address - raise LcmExceptionNoMgmtIP( - "ns member_vnf_index '{}' has no IP address".format( - vnf_index - ) - ) - - for vdu_index, vdur in enumerate(get_iterable(db_vnfr, "vdur")): - vdur_RO_count_index = 0 - if vdur.get("pdu-type"): - continue - for vdur_RO in get_iterable(vnf_RO, "vms"): - if vdur["vdu-id-ref"] != vdur_RO["vdu_osm_id"]: - continue - if vdur["count-index"] != vdur_RO_count_index: - vdur_RO_count_index += 1 - continue - vdur["vim-id"] = vdur_RO.get("vim_vm_id") - if vdur_RO.get("ip_address"): - vdur["ip-address"] = vdur_RO["ip_address"].split(";")[0] - else: - vdur["ip-address"] = None - vdur["vdu-id-ref"] = vdur_RO.get("vdu_osm_id") - vdur["name"] = vdur_RO.get("vim_name") - vdur["status"] = vdur_RO.get("status") - vdur["status-detailed"] = vdur_RO.get("error_msg") - for ifacer in get_iterable(vdur, "interfaces"): - for interface_RO in get_iterable(vdur_RO, "interfaces"): - if ifacer["name"] == interface_RO.get("internal_name"): - ifacer["ip-address"] = interface_RO.get( - "ip_address" - ) - ifacer["mac-address"] = interface_RO.get( - "mac_address" - ) - break - else: - raise LcmException( - "ns_update_vnfr: Not found member_vnf_index={} vdur={} interface={} " - "from VIM info".format( - vnf_index, vdur["vdu-id-ref"], ifacer["name"] - ) - ) - vnfr_update["vdur.{}".format(vdu_index)] = vdur - break - else: - raise LcmException( - "ns_update_vnfr: Not found member_vnf_index={} vdur={} count_index={} from " - "VIM info".format( - vnf_index, vdur["vdu-id-ref"], vdur["count-index"] - ) - ) - - for vld_index, vld in enumerate(get_iterable(db_vnfr, "vld")): - for net_RO in get_iterable(nsr_desc_RO, "nets"): - if vld["id"] != net_RO.get("vnf_net_osm_id"): - continue - vld["vim-id"] = net_RO.get("vim_net_id") - vld["name"] = net_RO.get("vim_name") - vld["status"] = net_RO.get("status") - vld["status-detailed"] = net_RO.get("error_msg") - vnfr_update["vld.{}".format(vld_index)] = vld - break - else: - raise LcmException( - "ns_update_vnfr: Not found member_vnf_index={} vld={} from VIM info".format( - vnf_index, vld["id"] - ) - ) - - self.update_db_2("vnfrs", db_vnfr["_id"], vnfr_update) - break - - else: - raise LcmException( - "ns_update_vnfr: Not found member_vnf_index={} from VIM info".format( - vnf_index - ) - ) - def _get_ns_config_info(self, nsr_id): """ Generates a mapping between vnf,vdu elements and the N2VC id @@ -828,7 +658,6 @@ class NsLcm(LcmBase): start_deploy, timeout_ns_deploy, ): - db_vims = {} def get_vim_account(vim_account_id): @@ -844,9 +673,9 @@ class NsLcm(LcmBase): target_vim, target_vld, vld_params, target_sdn ): if vld_params.get("ip-profile"): - target_vld["vim_info"][target_vim]["ip_profile"] = vld_params[ - "ip-profile" - ] + target_vld["vim_info"][target_vim]["ip_profile"] = vld_to_ro_ip_profile( + vld_params["ip-profile"] + ) if vld_params.get("provider-network"): target_vld["vim_info"][target_vim]["provider_network"] = vld_params[ "provider-network" @@ -855,9 +684,30 @@ class NsLcm(LcmBase): target_vld["vim_info"][target_sdn]["sdn-ports"] = vld_params[ "provider-network" ]["sdn-ports"] - if vld_params.get("wimAccountId"): - target_wim = "wim:{}".format(vld_params["wimAccountId"]) - target_vld["vim_info"][target_wim] = {} + + # check if WIM is needed; if needed, choose a feasible WIM able to connect VIMs + # if wim_account_id is specified in vld_params, validate if it is feasible. + wim_account_id, db_wim = select_feasible_wim_account( + db_nsr, db_vnfrs, target_vld, vld_params, self.logger + ) + + if wim_account_id: + # WIM is needed and a feasible one was found, populate WIM target and SDN ports + self.logger.info("WIM selected: {:s}".format(str(wim_account_id))) + # update vld_params with correct WIM account Id + vld_params["wimAccountId"] = wim_account_id + + target_wim = "wim:{}".format(wim_account_id) + target_wim_attrs = get_target_wim_attrs(nsr_id, target_vld, vld_params) + sdn_ports = get_sdn_ports(vld_params, db_wim) + if len(sdn_ports) > 0: + target_vld["vim_info"][target_wim] = target_wim_attrs + target_vld["vim_info"][target_wim]["sdn-ports"] = sdn_ports + + self.logger.debug( + "Target VLD with WIM data: {:s}".format(str(target_vld)) + ) + for param in ("vim-network-name", "vim-network-id"): if vld_params.get(param): if isinstance(vld_params[param], dict): @@ -890,7 +740,7 @@ class NsLcm(LcmBase): ) vdur = next((vdur for vdur in target_vnf.get("vdur", ())), None) if not vdur: - return + continue for a_index, a_vld in enumerate(target["ns"]["vld"]): target_vld = find_in_list( get_iterable(vdur, "interfaces"), @@ -902,7 +752,6 @@ class NsLcm(LcmBase): lambda v_vld: v_vld["name"] in (a_vld["name"], a_vld["id"]), ) if target_vld: - if vnf_params.get("vimAccountId") not in a_vld.get( "vim_info", {} ): @@ -959,6 +808,10 @@ class NsLcm(LcmBase): image["vim_info"] = {} for flavor in target["flavor"]: flavor["vim_info"] = {} + if db_nsr.get("shared-volumes"): + target["shared-volumes"] = deepcopy(db_nsr["shared-volumes"]) + for shared_volumes in target["shared-volumes"]: + shared_volumes["vim_info"] = {} if db_nsr.get("affinity-or-anti-affinity-group"): target["affinity-or-anti-affinity-group"] = deepcopy( db_nsr["affinity-or-anti-affinity-group"] @@ -1001,16 +854,16 @@ class NsLcm(LcmBase): # check if this network needs SDN assist if vld.get("pci-interfaces"): db_vim = get_vim_account(ns_params["vimAccountId"]) - sdnc_id = db_vim["config"].get("sdn-controller") - if sdnc_id: - sdn_vld = "nsrs:{}:vld.{}".format(nsr_id, vld["id"]) - target_sdn = "sdn:{}".format(sdnc_id) - target_vld["vim_info"][target_sdn] = { - "sdn": True, - "target_vim": target_vim, - "vlds": [sdn_vld], - "type": vld.get("type"), - } + if vim_config := db_vim.get("config"): + if sdnc_id := vim_config.get("sdn-controller"): + sdn_vld = "nsrs:{}:vld.{}".format(nsr_id, vld["id"]) + target_sdn = "sdn:{}".format(sdnc_id) + target_vld["vim_info"][target_sdn] = { + "sdn": True, + "target_vim": target_vim, + "vlds": [sdn_vld], + "type": vld.get("type"), + } nsd_vnf_profiles = get_vnf_profiles(nsd) for nsd_vnf_profile in nsd_vnf_profiles: @@ -1037,27 +890,9 @@ class NsLcm(LcmBase): and nsd_vlp.get("virtual-link-protocol-data") and nsd_vlp["virtual-link-protocol-data"].get("l3-protocol-data") ): - ip_profile_source_data = nsd_vlp["virtual-link-protocol-data"][ + vld_params["ip-profile"] = nsd_vlp["virtual-link-protocol-data"][ "l3-protocol-data" ] - ip_profile_dest_data = {} - if "ip-version" in ip_profile_source_data: - ip_profile_dest_data["ip-version"] = ip_profile_source_data[ - "ip-version" - ] - if "cidr" in ip_profile_source_data: - ip_profile_dest_data["subnet-address"] = ip_profile_source_data[ - "cidr" - ] - if "gateway-ip" in ip_profile_source_data: - ip_profile_dest_data["gateway-address"] = ip_profile_source_data[ - "gateway-ip" - ] - if "dhcp-enabled" in ip_profile_source_data: - ip_profile_dest_data["dhcp-params"] = { - "enabled": ip_profile_source_data["dhcp-enabled"] - } - vld_params["ip-profile"] = ip_profile_dest_data # update vld_params with instantiation params vld_instantiation_params = find_in_list( @@ -1123,28 +958,9 @@ class NsLcm(LcmBase): and vnfd_vlp.get("virtual-link-protocol-data") and vnfd_vlp["virtual-link-protocol-data"].get("l3-protocol-data") ): - ip_profile_source_data = vnfd_vlp["virtual-link-protocol-data"][ + vld_params["ip-profile"] = vnfd_vlp["virtual-link-protocol-data"][ "l3-protocol-data" ] - ip_profile_dest_data = {} - if "ip-version" in ip_profile_source_data: - ip_profile_dest_data["ip-version"] = ip_profile_source_data[ - "ip-version" - ] - if "cidr" in ip_profile_source_data: - ip_profile_dest_data["subnet-address"] = ip_profile_source_data[ - "cidr" - ] - if "gateway-ip" in ip_profile_source_data: - ip_profile_dest_data[ - "gateway-address" - ] = ip_profile_source_data["gateway-ip"] - if "dhcp-enabled" in ip_profile_source_data: - ip_profile_dest_data["dhcp-params"] = { - "enabled": ip_profile_source_data["dhcp-enabled"] - } - - vld_params["ip-profile"] = ip_profile_dest_data # update vld_params with instantiation params if vnf_params: vld_instantiation_params = find_in_list( @@ -1266,6 +1082,15 @@ class NsLcm(LcmBase): if target_vim not in ns_ags["vim_info"]: ns_ags["vim_info"][target_vim] = {} + # shared-volumes + if vdur.get("shared-volumes-id"): + for sv_id in vdur["shared-volumes-id"]: + ns_sv = find_in_list( + target["shared-volumes"], lambda sv: sv_id in sv["id"] + ) + if ns_sv: + ns_sv["vim_info"][target_vim] = {} + vdur["vim_info"] = {target_vim: {}} # instantiation parameters if vnf_params: @@ -1279,6 +1104,9 @@ class NsLcm(LcmBase): vdu_instantiation_params, vdud ) vdur["additionalParams"]["OSM"]["vdu_volumes"] = vdu_volumes + vdur["additionalParams"]["OSM"][ + "vim_flavor_id" + ] = vdu_instantiation_params.get("vim-flavor-id") vdur_list.append(vdur) target_vnf["vdur"] = vdur_list target["vnf"].append(target_vnf) @@ -1344,7 +1172,7 @@ class NsLcm(LcmBase): db_nsr_update["detailed-status"] = " ".join(stage) self.update_db_2("nsrs", nsr_id, db_nsr_update) self._write_op_status(nslcmop_id, stage) - await asyncio.sleep(15, loop=self.loop) + await asyncio.sleep(15) else: # timeout_ns_deploy raise NgRoException("Timeout waiting ns to deploy") @@ -1365,7 +1193,6 @@ class NsLcm(LcmBase): } desc = await self.RO.deploy(nsr_id, target) action_id = desc["action_id"] - db_nsr_update["_admin.deployed.RO.nsr_delete_action_id"] = action_id db_nsr_update["_admin.deployed.RO.nsr_status"] = "DELETING" self.logger.debug( logging_text @@ -1383,20 +1210,17 @@ class NsLcm(LcmBase): stage, operation="termination", ) - - db_nsr_update["_admin.deployed.RO.nsr_delete_action_id"] = None db_nsr_update["_admin.deployed.RO.nsr_status"] = "DELETED" # delete all nsr await self.RO.delete(nsr_id) - except Exception as e: - if isinstance(e, NgRoException) and e.http_code == 404: # not found + except NgRoException as e: + if e.http_code == 404: # not found db_nsr_update["_admin.deployed.RO.nsr_id"] = None db_nsr_update["_admin.deployed.RO.nsr_status"] = "DELETED" - db_nsr_update["_admin.deployed.RO.nsr_delete_action_id"] = None self.logger.debug( logging_text + "RO_action_id={} already deleted".format(action_id) ) - elif isinstance(e, NgRoException) and e.http_code == 409: # conflict + elif e.http_code == 409: # conflict failed_detail.append("delete conflict: {}".format(e)) self.logger.debug( logging_text @@ -1408,6 +1232,11 @@ class NsLcm(LcmBase): logging_text + "RO_action_id={} delete error: {}".format(action_id, e) ) + except Exception as e: + failed_detail.append("delete error: {}".format(e)) + self.logger.error( + logging_text + "RO_action_id={} delete error: {}".format(action_id, e) + ) if failed_detail: stage[2] = "Error deleting from VIM" @@ -1452,9 +1281,7 @@ class NsLcm(LcmBase): if ns_params and ns_params.get("timeout_ns_deploy"): timeout_ns_deploy = ns_params["timeout_ns_deploy"] else: - timeout_ns_deploy = self.timeout.get( - "ns_deploy", self.timeout_ns_deploy - ) + timeout_ns_deploy = self.timeout.ns_deploy # Check for and optionally request placement optimization. Database will be updated if placement activated stage[2] = "Waiting for Placement." @@ -1531,7 +1358,7 @@ class NsLcm(LcmBase): "target KDU={} is in error state".format(kdu_name) ) - await asyncio.sleep(10, loop=self.loop) + await asyncio.sleep(10) nb_tries += 1 raise LcmException("Timeout waiting KDU={} instantiated".format(kdu_name)) @@ -1551,21 +1378,18 @@ class NsLcm(LcmBase): """ self.logger.debug(logging_text + "Starting wait_vm_up_insert_key_ro") - ro_nsr_id = None ip_address = None - nb_tries = 0 target_vdu_id = None ro_retries = 0 while True: - ro_retries += 1 if ro_retries >= 360: # 1 hour raise LcmException( "Not found _admin.deployed.RO.nsr_id for nsr_id: {}".format(nsr_id) ) - await asyncio.sleep(10, loop=self.loop) + await asyncio.sleep(10) # get ip address if not target_vdu_id: @@ -1641,74 +1465,24 @@ class NsLcm(LcmBase): self.logger.error(logging_text + "Cannot inject ssh-ky to a PDU") return ip_address try: - ro_vm_id = "{}-{}".format( - db_vnfr["member-vnf-index-ref"], target_vdu_id - ) # TODO add vdu_index - if self.ng_ro: - target = { - "action": { - "action": "inject_ssh_key", - "key": pub_key, - "user": user, - }, - "vnf": [{"_id": vnfr_id, "vdur": [{"id": vdur["id"]}]}], - } - desc = await self.RO.deploy(nsr_id, target) - action_id = desc["action_id"] - await self._wait_ng_ro( - nsr_id, action_id, timeout=600, operation="instantiation" - ) - break - else: - # wait until NS is deployed at RO - if not ro_nsr_id: - db_nsrs = self.db.get_one("nsrs", {"_id": nsr_id}) - ro_nsr_id = deep_get( - db_nsrs, ("_admin", "deployed", "RO", "nsr_id") - ) - if not ro_nsr_id: - continue - result_dict = await self.RO.create_action( - item="ns", - item_id_name=ro_nsr_id, - descriptor={ - "add_public_key": pub_key, - "vms": [ro_vm_id], - "user": user, - }, - ) - # result_dict contains the format {VM-id: {vim_result: 200, description: text}} - if not result_dict or not isinstance(result_dict, dict): - raise LcmException( - "Unknown response from RO when injecting key" - ) - for result in result_dict.values(): - if result.get("vim_result") == 200: - break - else: - raise ROclient.ROClientException( - "error injecting key: {}".format( - result.get("description") - ) - ) - break + target = { + "action": { + "action": "inject_ssh_key", + "key": pub_key, + "user": user, + }, + "vnf": [{"_id": vnfr_id, "vdur": [{"id": vdur["id"]}]}], + } + desc = await self.RO.deploy(nsr_id, target) + action_id = desc["action_id"] + await self._wait_ng_ro( + nsr_id, action_id, timeout=600, operation="instantiation" + ) + break except NgRoException as e: raise LcmException( "Reaching max tries injecting key. Error: {}".format(e) ) - except ROclient.ROClientException as e: - if not nb_tries: - self.logger.debug( - logging_text - + "error injecting key: {}. Retrying until {} seconds".format( - e, 20 * 10 - ) - ) - nb_tries += 1 - if nb_tries >= 20: - raise LcmException( - "Reaching max tries injecting key. Error: {}".format(e) - ) else: break @@ -1771,6 +1545,7 @@ class NsLcm(LcmBase): vdu_id, kdu_name, vdu_index, + kdu_index, config_descriptor, deploy_params, base_folder, @@ -1792,7 +1567,6 @@ class NsLcm(LcmBase): } step = "" try: - element_type = "NS" element_under_configuration = nsr_id @@ -1875,8 +1649,7 @@ class NsLcm(LcmBase): vca_id = self.get_vca_id(db_vnfr, db_nsr) # create or register execution environment in VCA - if vca_type in ("lxc_proxy_charm", "k8s_proxy_charm", "helm", "helm-v3"): - + if vca_type in ("lxc_proxy_charm", "k8s_proxy_charm", "helm-v3"): self._write_configuration_status( nsr_id=nsr_id, vca_index=vca_index, @@ -1898,11 +1671,11 @@ class NsLcm(LcmBase): db_dict=db_dict, vca_id=vca_id, ) - elif vca_type == "helm" or vca_type == "helm-v3": + elif vca_type == "helm-v3": ee_id, credentials = await self.vca_map[ vca_type ].create_execution_environment( - namespace=namespace, + namespace=nsr_id, reuse_ee_id=ee_id, db_dict=db_dict, config=osm_config, @@ -2032,16 +1805,19 @@ class NsLcm(LcmBase): ) # add relations for this VCA (wait for other peers related with this VCA) - await self._add_vca_relations( + is_relation_added = await self._add_vca_relations( logging_text=logging_text, nsr_id=nsr_id, vca_type=vca_type, vca_index=vca_index, ) + if not is_relation_added: + raise LcmException("Relations could not be added to VCA.") + # if SSH access is required, then get execution environment SSH public # if native charm we have waited already to VM be UP - if vca_type in ("k8s_proxy_charm", "lxc_proxy_charm", "helm", "helm-v3"): + if vca_type in ("k8s_proxy_charm", "lxc_proxy_charm", "helm-v3"): pub_key = None user = None # self.logger.debug("get ssh key block") @@ -2183,7 +1959,7 @@ class NsLcm(LcmBase): # TODO register in database that primitive is done # STEP 7 Configure metrics - if vca_type == "helm" or vca_type == "helm-v3": + if vca_type == "helm-v3": # TODO: review for those cases where the helm chart is a reference and # is not part of the NF package prometheus_jobs = await self.extract_prometheus_scrape_jobs( @@ -2193,6 +1969,12 @@ class NsLcm(LcmBase): vnfr_id=vnfr_id, nsr_id=nsr_id, target_ip=rw_mgmt_ip, + element_type=element_type, + vnf_member_index=db_vnfr.get("member-vnf-index-ref", ""), + vdu_id=vdu_id, + vdu_index=vdu_index, + kdu_name=kdu_name, + kdu_index=kdu_index, ) if prometheus_jobs: self.update_db_2( @@ -2228,7 +2010,7 @@ class NsLcm(LcmBase): self._write_configuration_status( nsr_id=nsr_id, vca_index=vca_index, status="BROKEN" ) - raise LcmException("{} {}".format(step, e)) from e + raise LcmException("{}. {}".format(step, e)) from e def _write_ns_status( self, @@ -2328,7 +2110,6 @@ class NsLcm(LcmBase): element_type: str = None, other_update: dict = None, ): - # self.logger.debug('_write_configuration_status(): vca_index={}, status={}' # .format(vca_index, status)) @@ -2369,9 +2150,7 @@ class NsLcm(LcmBase): self.logger.debug( logging_text + "Invoke and wait for placement optimization" ) - await self.msg.aiowrite( - "pla", "get_placement", {"nslcmopId": nslcmop_id}, loop=self.loop - ) + await self.msg.aiowrite("pla", "get_placement", {"nslcmopId": nslcmop_id}) db_poll_interval = 5 wait = db_poll_interval * 10 pla_result = None @@ -2400,6 +2179,299 @@ class NsLcm(LcmBase): vnfr["vim-account-id"] = pla_vnf["vimAccountId"] return modified + def _gather_vnfr_healing_alerts(self, vnfr, vnfd): + alerts = [] + nsr_id = vnfr["nsr-id-ref"] + df = vnfd.get("df", [{}])[0] + # Checking for auto-healing configuration + if "healing-aspect" in df: + healing_aspects = df["healing-aspect"] + for healing in healing_aspects: + for healing_policy in healing.get("healing-policy", ()): + vdu_id = healing_policy["vdu-id"] + vdur = next( + (vdur for vdur in vnfr["vdur"] if vdu_id == vdur["vdu-id-ref"]), + {}, + ) + if not vdur: + continue + metric_name = "vm_status" + vdu_name = vdur.get("name") + vnf_member_index = vnfr["member-vnf-index-ref"] + uuid = str(uuid4()) + name = f"healing_{uuid}" + action = healing_policy + # action_on_recovery = healing.get("action-on-recovery") + # cooldown_time = healing.get("cooldown-time") + # day1 = healing.get("day1") + alert = { + "uuid": uuid, + "name": name, + "metric": metric_name, + "tags": { + "ns_id": nsr_id, + "vnf_member_index": vnf_member_index, + "vdu_name": vdu_name, + }, + "alarm_status": "ok", + "action_type": "healing", + "action": action, + } + alerts.append(alert) + return alerts + + def _gather_vnfr_scaling_alerts(self, vnfr, vnfd): + alerts = [] + nsr_id = vnfr["nsr-id-ref"] + df = vnfd.get("df", [{}])[0] + # Checking for auto-scaling configuration + if "scaling-aspect" in df: + scaling_aspects = df["scaling-aspect"] + all_vnfd_monitoring_params = {} + for ivld in vnfd.get("int-virtual-link-desc", ()): + for mp in ivld.get("monitoring-parameters", ()): + all_vnfd_monitoring_params[mp.get("id")] = mp + for vdu in vnfd.get("vdu", ()): + for mp in vdu.get("monitoring-parameter", ()): + all_vnfd_monitoring_params[mp.get("id")] = mp + for df in vnfd.get("df", ()): + for mp in df.get("monitoring-parameter", ()): + all_vnfd_monitoring_params[mp.get("id")] = mp + for scaling_aspect in scaling_aspects: + scaling_group_name = scaling_aspect.get("name", "") + # Get monitored VDUs + all_monitored_vdus = set() + for delta in scaling_aspect.get("aspect-delta-details", {}).get( + "deltas", () + ): + for vdu_delta in delta.get("vdu-delta", ()): + all_monitored_vdus.add(vdu_delta.get("id")) + monitored_vdurs = list( + filter( + lambda vdur: vdur["vdu-id-ref"] in all_monitored_vdus, + vnfr["vdur"], + ) + ) + if not monitored_vdurs: + self.logger.error( + "Scaling criteria is referring to a vnf-monitoring-param that does not contain a reference to a vdu or vnf metric" + ) + continue + for scaling_policy in scaling_aspect.get("scaling-policy", ()): + if scaling_policy["scaling-type"] != "automatic": + continue + threshold_time = scaling_policy.get("threshold-time", "1") + cooldown_time = scaling_policy.get("cooldown-time", "0") + for scaling_criteria in scaling_policy["scaling-criteria"]: + monitoring_param_ref = scaling_criteria.get( + "vnf-monitoring-param-ref" + ) + vnf_monitoring_param = all_vnfd_monitoring_params[ + monitoring_param_ref + ] + for vdur in monitored_vdurs: + vdu_id = vdur["vdu-id-ref"] + metric_name = vnf_monitoring_param.get("performance-metric") + metric_name = f"osm_{metric_name}" + vnf_member_index = vnfr["member-vnf-index-ref"] + scalein_threshold = scaling_criteria.get( + "scale-in-threshold" + ) + scaleout_threshold = scaling_criteria.get( + "scale-out-threshold" + ) + # Looking for min/max-number-of-instances + instances_min_number = 1 + instances_max_number = 1 + vdu_profile = df["vdu-profile"] + if vdu_profile: + profile = next( + item for item in vdu_profile if item["id"] == vdu_id + ) + instances_min_number = profile.get( + "min-number-of-instances", 1 + ) + instances_max_number = profile.get( + "max-number-of-instances", 1 + ) + + if scalein_threshold: + uuid = str(uuid4()) + name = f"scalein_{uuid}" + operation = scaling_criteria[ + "scale-in-relational-operation" + ] + rel_operator = self.rel_operation_types.get( + operation, "<=" + ) + metric_selector = f'{metric_name}{{ns_id="{nsr_id}", vnf_member_index="{vnf_member_index}", vdu_id="{vdu_id}"}}' + expression = f"(count ({metric_selector}) > {instances_min_number}) and (avg({metric_selector}) {rel_operator} {scalein_threshold})" + labels = { + "ns_id": nsr_id, + "vnf_member_index": vnf_member_index, + "vdu_id": vdu_id, + } + prom_cfg = { + "alert": name, + "expr": expression, + "for": str(threshold_time) + "m", + "labels": labels, + } + action = scaling_policy + action = { + "scaling-group": scaling_group_name, + "cooldown-time": cooldown_time, + } + alert = { + "uuid": uuid, + "name": name, + "metric": metric_name, + "tags": { + "ns_id": nsr_id, + "vnf_member_index": vnf_member_index, + "vdu_id": vdu_id, + }, + "alarm_status": "ok", + "action_type": "scale_in", + "action": action, + "prometheus_config": prom_cfg, + } + alerts.append(alert) + + if scaleout_threshold: + uuid = str(uuid4()) + name = f"scaleout_{uuid}" + operation = scaling_criteria[ + "scale-out-relational-operation" + ] + rel_operator = self.rel_operation_types.get( + operation, "<=" + ) + metric_selector = f'{metric_name}{{ns_id="{nsr_id}", vnf_member_index="{vnf_member_index}", vdu_id="{vdu_id}"}}' + expression = f"(count ({metric_selector}) < {instances_max_number}) and (avg({metric_selector}) {rel_operator} {scaleout_threshold})" + labels = { + "ns_id": nsr_id, + "vnf_member_index": vnf_member_index, + "vdu_id": vdu_id, + } + prom_cfg = { + "alert": name, + "expr": expression, + "for": str(threshold_time) + "m", + "labels": labels, + } + action = scaling_policy + action = { + "scaling-group": scaling_group_name, + "cooldown-time": cooldown_time, + } + alert = { + "uuid": uuid, + "name": name, + "metric": metric_name, + "tags": { + "ns_id": nsr_id, + "vnf_member_index": vnf_member_index, + "vdu_id": vdu_id, + }, + "alarm_status": "ok", + "action_type": "scale_out", + "action": action, + "prometheus_config": prom_cfg, + } + alerts.append(alert) + return alerts + + def _gather_vnfr_alarm_alerts(self, vnfr, vnfd): + alerts = [] + nsr_id = vnfr["nsr-id-ref"] + vnf_member_index = vnfr["member-vnf-index-ref"] + + # Checking for VNF alarm configuration + for vdur in vnfr["vdur"]: + vdu_id = vdur["vdu-id-ref"] + vdu = next(filter(lambda vdu: vdu["id"] == vdu_id, vnfd["vdu"])) + if "alarm" in vdu: + # Get VDU monitoring params, since alerts are based on them + vdu_monitoring_params = {} + for mp in vdu.get("monitoring-parameter", []): + vdu_monitoring_params[mp.get("id")] = mp + if not vdu_monitoring_params: + self.logger.error( + "VDU alarm refers to a VDU monitoring param, but there are no VDU monitoring params in the VDU" + ) + continue + # Get alarms in the VDU + alarm_descriptors = vdu["alarm"] + # Create VDU alarms for each alarm in the VDU + for alarm_descriptor in alarm_descriptors: + # Check that the VDU alarm refers to a proper monitoring param + alarm_monitoring_param = alarm_descriptor.get( + "vnf-monitoring-param-ref", "" + ) + vdu_specific_monitoring_param = vdu_monitoring_params.get( + alarm_monitoring_param, {} + ) + if not vdu_specific_monitoring_param: + self.logger.error( + "VDU alarm refers to a VDU monitoring param not present in the VDU" + ) + continue + metric_name = vdu_specific_monitoring_param.get( + "performance-metric" + ) + if not metric_name: + self.logger.error( + "VDU alarm refers to a VDU monitoring param that has no associated performance-metric" + ) + continue + # Set params of the alarm to be created in Prometheus + metric_name = f"osm_{metric_name}" + metric_threshold = alarm_descriptor.get("value") + uuid = str(uuid4()) + alert_name = f"vdu_alarm_{uuid}" + operation = alarm_descriptor["operation"] + rel_operator = self.rel_operation_types.get(operation, "<=") + metric_selector = f'{metric_name}{{ns_id="{nsr_id}", vnf_member_index="{vnf_member_index}", vdu_id="{vdu_id}"}}' + expression = f"{metric_selector} {rel_operator} {metric_threshold}" + labels = { + "ns_id": nsr_id, + "vnf_member_index": vnf_member_index, + "vdu_id": vdu_id, + "vdu_name": "{{ $labels.vdu_name }}", + } + prom_cfg = { + "alert": alert_name, + "expr": expression, + "for": "1m", # default value. Ideally, this should be related to an IM param, but there is not such param + "labels": labels, + } + alarm_action = dict() + for action_type in ["ok", "insufficient-data", "alarm"]: + if ( + "actions" in alarm_descriptor + and action_type in alarm_descriptor["actions"] + ): + alarm_action[action_type] = alarm_descriptor["actions"][ + action_type + ] + alert = { + "uuid": uuid, + "name": alert_name, + "metric": metric_name, + "tags": { + "ns_id": nsr_id, + "vnf_member_index": vnf_member_index, + "vdu_id": vdu_id, + }, + "alarm_status": "ok", + "action_type": "vdu_alarm", + "action": alarm_action, + "prometheus_config": prom_cfg, + } + alerts.append(alert) + return alerts + def update_nsrs_with_pla_result(self, params): try: nslcmop_id = deep_get(params, ("placement", "nslcmopId")) @@ -2441,6 +2513,8 @@ class NsLcm(LcmBase): # update operation on nslcmops db_nslcmop_update = {} + timeout_ns_deploy = self.timeout.ns_deploy + nslcmop_operation_state = None db_vnfrs = {} # vnf's info indexed by member-index # n2vc_info = {} @@ -2481,10 +2555,6 @@ class NsLcm(LcmBase): ns_params = db_nslcmop.get("operationParams") if ns_params and ns_params.get("timeout_ns_deploy"): timeout_ns_deploy = ns_params["timeout_ns_deploy"] - else: - timeout_ns_deploy = self.timeout.get( - "ns_deploy", self.timeout_ns_deploy - ) # read from db: ns stage[1] = "Getting nsr={} from db.".format(nsr_id) @@ -2585,8 +2655,8 @@ class NsLcm(LcmBase): # feature 1429. Add n2vc public key to needed VMs n2vc_key = self.n2vc.get_public_key() n2vc_key_list = [n2vc_key] - if self.vca_config.get("public_key"): - n2vc_key_list.append(self.vca_config["public_key"]) + if self.vca_config.public_key: + n2vc_key_list.append(self.vca_config.public_key) stage[1] = "Deploying NS at VIM." task_ro = asyncio.ensure_future( @@ -2609,6 +2679,20 @@ class NsLcm(LcmBase): stage[1] = "Deploying Execution Environments." self.logger.debug(logging_text + stage[1]) + # create namespace and certificate if any helm based EE is present in the NS + if check_helm_ee_in_ns(db_vnfds): + await self.vca_map["helm-v3"].setup_ns_namespace( + name=nsr_id, + ) + # create TLS certificates + await self.vca_map["helm-v3"].create_tls_certificate( + secret_name=self.EE_TLS_NAME, + dns_prefix="*", + nsr_id=nsr_id, + usage="server auth", + namespace=nsr_id, + ) + nsi_id = None # TODO put nsi_id when this nsr belongs to a NSI for vnf_profile in get_vnf_profiles(nsd): vnfd_id = vnf_profile["vnfd-id"] @@ -2620,6 +2704,7 @@ class NsLcm(LcmBase): vdu_index = 0 vdu_name = None kdu_name = None + kdu_index = None # Get additional parameters deploy_params = {"OSM": get_osm_params(db_vnfr)} @@ -2643,6 +2728,7 @@ class NsLcm(LcmBase): kdu_name=kdu_name, member_vnf_index=member_vnf_index, vdu_index=vdu_index, + kdu_index=kdu_index, vdu_name=vdu_name, deploy_params=deploy_params, descriptor_config=descriptor_config, @@ -2675,6 +2761,7 @@ class NsLcm(LcmBase): if descriptor_config: vdu_name = None kdu_name = None + kdu_index = None for vdu_index in range(vdud_count): # TODO vnfr_params["rw_mgmt_ip"] = vdur["ip-address"] self._deploy_n2vc( @@ -2690,6 +2777,7 @@ class NsLcm(LcmBase): vnfd_id=vnfd_id, vdu_id=vdu_id, kdu_name=kdu_name, + kdu_index=kdu_index, member_vnf_index=member_vnf_index, vdu_index=vdu_index, vdu_name=vdu_name, @@ -2706,8 +2794,10 @@ class NsLcm(LcmBase): vdu_id = None vdu_index = 0 vdu_name = None - kdur = next( - x for x in db_vnfr["kdur"] if x["kdu-name"] == kdu_name + kdu_index, kdur = next( + x + for x in enumerate(db_vnfr["kdur"]) + if x[1]["kdu-name"] == kdu_name ) deploy_params_kdu = {"OSM": get_osm_params(db_vnfr)} if kdur.get("additionalParams"): @@ -2727,6 +2817,7 @@ class NsLcm(LcmBase): kdu_name=kdu_name, member_vnf_index=member_vnf_index, vdu_index=vdu_index, + kdu_index=kdu_index, vdu_name=vdu_name, deploy_params=deploy_params_kdu, descriptor_config=descriptor_config, @@ -2735,6 +2826,65 @@ class NsLcm(LcmBase): stage=stage, ) + # Check if each vnf has exporter for metric collection if so update prometheus job records + if "exporters-endpoints" in vnfd.get("df")[0]: + exporter_config = vnfd.get("df")[0].get("exporters-endpoints") + self.logger.debug("exporter config :{}".format(exporter_config)) + artifact_path = "{}/{}/{}".format( + base_folder["folder"], + base_folder["pkg-dir"], + "exporter-endpoint", + ) + ee_id = None + ee_config_descriptor = exporter_config + vnfr_id = db_vnfr["id"] + rw_mgmt_ip = await self.wait_vm_up_insert_key_ro( + logging_text, + nsr_id, + vnfr_id, + vdu_id=None, + vdu_index=None, + user=None, + pub_key=None, + ) + self.logger.debug("rw_mgmt_ip:{}".format(rw_mgmt_ip)) + self.logger.debug("Artifact_path:{}".format(artifact_path)) + db_vnfr = self.db.get_one("vnfrs", {"_id": vnfr_id}) + vdu_id_for_prom = None + vdu_index_for_prom = None + for x in get_iterable(db_vnfr, "vdur"): + vdu_id_for_prom = x.get("vdu-id-ref") + vdu_index_for_prom = x.get("count-index") + prometheus_jobs = await self.extract_prometheus_scrape_jobs( + ee_id=ee_id, + artifact_path=artifact_path, + ee_config_descriptor=ee_config_descriptor, + vnfr_id=vnfr_id, + nsr_id=nsr_id, + target_ip=rw_mgmt_ip, + element_type="VDU", + vdu_id=vdu_id_for_prom, + vdu_index=vdu_index_for_prom, + ) + + self.logger.debug("Prometheus job:{}".format(prometheus_jobs)) + if prometheus_jobs: + db_nsr_update["_admin.deployed.prometheus_jobs"] = prometheus_jobs + self.update_db_2( + "nsrs", + nsr_id, + db_nsr_update, + ) + + for job in prometheus_jobs: + self.db.set_one( + "prometheus_jobs", + {"job_name": job["job_name"]}, + job, + upsert=True, + fail_on_empty=False, + ) + # Check if this NS has a charm configuration descriptor_config = nsd.get("ns-configuration") if descriptor_config and descriptor_config.get("juju"): @@ -2743,6 +2893,7 @@ class NsLcm(LcmBase): member_vnf_index = None vdu_id = None kdu_name = None + kdu_index = None vdu_index = 0 vdu_name = None @@ -2765,6 +2916,7 @@ class NsLcm(LcmBase): kdu_name=kdu_name, member_vnf_index=member_vnf_index, vdu_index=vdu_index, + kdu_index=kdu_index, vdu_name=vdu_name, deploy_params=deploy_params, descriptor_config=descriptor_config, @@ -2815,7 +2967,15 @@ class NsLcm(LcmBase): stage[1] = stage[2] = "" except asyncio.CancelledError: error_list.append("Cancelled") - # TODO cancel all tasks + await self._cancel_pending_tasks(logging_text, tasks_dict_info) + await self._wait_for_tasks( + logging_text, + tasks_dict_info, + timeout_ns_deploy, + stage, + nslcmop_id, + nsr_id=nsr_id, + ) except Exception as exc: error_list.append(str(exc)) @@ -2856,7 +3016,27 @@ class NsLcm(LcmBase): db_nsr_update["detailed-status"] = "Done" db_nslcmop_update["detailed-status"] = "Done" nslcmop_operation_state = "COMPLETED" - + # Gather auto-healing and auto-scaling alerts for each vnfr + healing_alerts = [] + scaling_alerts = [] + for vnfr in self.db.get_list("vnfrs", {"nsr-id-ref": nsr_id}): + vnfd = next( + (sub for sub in db_vnfds if sub["_id"] == vnfr["vnfd-id"]), None + ) + healing_alerts = self._gather_vnfr_healing_alerts(vnfr, vnfd) + for alert in healing_alerts: + self.logger.info(f"Storing healing alert in MongoDB: {alert}") + self.db.create("alerts", alert) + + scaling_alerts = self._gather_vnfr_scaling_alerts(vnfr, vnfd) + for alert in scaling_alerts: + self.logger.info(f"Storing scaling alert in MongoDB: {alert}") + self.db.create("alerts", alert) + + alarm_alerts = self._gather_vnfr_alarm_alerts(vnfr, vnfd) + for alert in alarm_alerts: + self.logger.info(f"Storing VNF alarm alert in MongoDB: {alert}") + self.db.create("alerts", alert) if db_nsr: self._write_ns_status( nsr_id=nsr_id, @@ -2884,8 +3064,13 @@ class NsLcm(LcmBase): "nsr_id": nsr_id, "nslcmop_id": nslcmop_id, "operationState": nslcmop_operation_state, + "startTime": db_nslcmop["startTime"], + "links": db_nslcmop["links"], + "operationParams": { + "nsInstanceId": nsr_id, + "nsdId": db_nsr["nsd-id"], + }, }, - loop=self.loop, ) except Exception as e: self.logger.error( @@ -2895,9 +3080,11 @@ class NsLcm(LcmBase): self.logger.debug(logging_text + "Exit") self.lcm_tasks.remove("ns", nsr_id, nslcmop_id, "ns_instantiate") - def _get_vnfd(self, vnfd_id: str, cached_vnfds: Dict[str, Any]): + def _get_vnfd(self, vnfd_id: str, projects_read: str, cached_vnfds: Dict[str, Any]): if vnfd_id not in cached_vnfds: - cached_vnfds[vnfd_id] = self.db.get_one("vnfds", {"id": vnfd_id}) + cached_vnfds[vnfd_id] = self.db.get_one( + "vnfds", {"id": vnfd_id, "_admin.projects_read": projects_read} + ) return cached_vnfds[vnfd_id] def _get_vnfr(self, nsr_id: str, vnf_profile_id: str, cached_vnfrs: Dict[str, Any]): @@ -2939,7 +3126,8 @@ class NsLcm(LcmBase): ]: vnf_profile = get_vnf_profile(nsd, ee_relation_data["vnf-profile-id"]) vnfd_id = vnf_profile["vnfd-id"] - db_vnfd = self._get_vnfd(vnfd_id, cached_vnfds) + project = nsd["_admin"]["projects_read"][0] + db_vnfd = self._get_vnfd(vnfd_id, project, cached_vnfds) entity_id = ( vnfd_id if ee_relation_level == EELevel.VNF @@ -3009,10 +3197,14 @@ class NsLcm(LcmBase): cached_vnfds: Dict[str, Any], ) -> List[Relation]: relations = [] + if vca.target_element == "ns": + self.logger.debug("VCA is a NS charm, not a VNF.") + return relations vnf_profile = get_vnf_profile(nsd, vca.vnf_profile_id) vnf_profile_id = vnf_profile["id"] vnfd_id = vnf_profile["vnfd-id"] - db_vnfd = self._get_vnfd(vnfd_id, cached_vnfds) + project = nsd["_admin"]["projects_read"][0] + db_vnfd = self._get_vnfd(vnfd_id, project, cached_vnfds) db_vnf_relations = get_relation_list(db_vnfd, vnfd_id) for r in db_vnf_relations: provider_dict = None @@ -3067,7 +3259,8 @@ class NsLcm(LcmBase): vnf_profiles, lambda vnf_profile: vnf_profile["id"] == ee_relation.vnf_profile_id, )["vnfd-id"] - db_vnfd = self._get_vnfd(vnfd_id, cached_vnfds) + project = nsd["_admin"]["projects_read"][0] + db_vnfd = self._get_vnfd(vnfd_id, project, cached_vnfds) kdu_resource_profile = get_kdu_resource_profile( db_vnfd, ee_relation.kdu_resource_profile_id ) @@ -3173,11 +3366,14 @@ class NsLcm(LcmBase): requirer_vca_id, relation.requirer.endpoint, ) - await self.vca_map[vca_type].add_relation( - provider=provider_relation_endpoint, - requirer=requirer_relation_endpoint, - ) - # remove entry from relations list + try: + await self.vca_map[vca_type].add_relation( + provider=provider_relation_endpoint, + requirer=requirer_relation_endpoint, + ) + except N2VCException as exception: + self.logger.error(exception) + raise LcmException(exception) return True return False @@ -3189,7 +3385,6 @@ class NsLcm(LcmBase): vca_index: int, timeout: int = 3600, ) -> bool: - # steps: # 1. find all relations for this VCA # 2. wait for other peers related @@ -3267,7 +3462,6 @@ class NsLcm(LcmBase): timeout: int = 600, vca_id: str = None, ): - try: k8sclustertype = k8s_instance_info["k8scluster-type"] # Instantiate kdu @@ -3444,9 +3638,11 @@ class NsLcm(LcmBase): vnfr_data.get("_id"), {"kdur.{}.status".format(kdu_index): "ERROR"}, ) - except Exception: + except Exception as error: # ignore to keep original exception - pass + self.logger.warning( + f"An exception occurred while updating DB: {str(error)}" + ) # reraise original error raise @@ -3465,7 +3661,6 @@ class NsLcm(LcmBase): k8scluster_id_2_uuic = { "helm-chart-v3": {}, - "helm-chart": {}, "juju-bundle": {}, } @@ -3564,11 +3759,6 @@ class NsLcm(LcmBase): # Default version: helm3, if helm-version is v2 assign v2 k8sclustertype = "helm-chart-v3" self.logger.debug("kdur: {}".format(kdur)) - if ( - kdur.get("helm-version") - and kdur.get("helm-version") == "v2" - ): - k8sclustertype = "helm-chart" elif kdur.get("juju-bundle"): kdumodel = kdur["juju-bundle"] k8sclustertype = "juju-bundle" @@ -3606,8 +3796,8 @@ class NsLcm(LcmBase): kdumodel = self.fs.path + filename except (asyncio.TimeoutError, asyncio.CancelledError): raise - except Exception: # it is not a file - pass + except Exception as e: # it is not a file + self.logger.warning(f"An exception occurred: {str(e)}") k8s_cluster_id = kdur["k8s-cluster"]["id"] step = "Synchronize repos for k8s cluster '{}'".format( @@ -3737,6 +3927,7 @@ class NsLcm(LcmBase): kdu_name, member_vnf_index, vdu_index, + kdu_index, vdu_name, deploy_params, descriptor_config, @@ -3788,10 +3979,7 @@ class NsLcm(LcmBase): vca_type = "native_charm" elif ee_item.get("helm-chart"): vca_name = ee_item["helm-chart"] - if ee_item.get("helm-version") and ee_item.get("helm-version") == "v2": - vca_type = "helm" - else: - vca_type = "helm-v3" + vca_type = "helm-v3" else: self.logger.debug( logging_text + "skipping non juju neither charm configuration" @@ -3863,6 +4051,7 @@ class NsLcm(LcmBase): vdu_id=vdu_id, kdu_name=kdu_name, vdu_index=vdu_index, + kdu_index=kdu_index, deploy_params=deploy_params, config_descriptor=descriptor_config, base_folder=base_folder, @@ -3886,41 +4075,6 @@ class NsLcm(LcmBase): member_vnf_index or "", vdu_id or "" ) - @staticmethod - def _create_nslcmop(nsr_id, operation, params): - """ - Creates a ns-lcm-opp content to be stored at database. - :param nsr_id: internal id of the instance - :param operation: instantiate, terminate, scale, action, ... - :param params: user parameters for the operation - :return: dictionary following SOL005 format - """ - # Raise exception if invalid arguments - if not (nsr_id and operation and params): - raise LcmException( - "Parameters 'nsr_id', 'operation' and 'params' needed to create primitive not provided" - ) - now = time() - _id = str(uuid4()) - nslcmop = { - "id": _id, - "_id": _id, - # COMPLETED,PARTIALLY_COMPLETED,FAILED_TEMP,FAILED,ROLLING_BACK,ROLLED_BACK - "operationState": "PROCESSING", - "statusEnteredTime": now, - "nsInstanceId": nsr_id, - "lcmOperationType": operation, - "startTime": now, - "isAutomaticInvocation": False, - "operationParams": params, - "isCancelPending": False, - "links": { - "self": "/osm/nslcm/v1/ns_lcm_op_occs/" + _id, - "nsInstance": "/osm/nslcm/v1/ns_instances/" + nsr_id, - }, - } - return nslcmop - def _format_additional_params(self, params): params = params or {} for key, value in params.items(): @@ -4116,12 +4270,6 @@ class NsLcm(LcmBase): # Function to return execution_environment id - def _get_ee_id(self, vnf_index, vdu_id, vca_deployed_list): - # TODO vdu_index_count - for vca in vca_deployed_list: - if vca["member-vnf-index"] == vnf_index and vca["vdu_id"] == vdu_id: - return vca["ee_id"] - async def destroy_N2VC( self, logging_text, @@ -4220,226 +4368,26 @@ class NsLcm(LcmBase): # This uses NSR_ID, so it will destroy any jobs under this index self.db.del_list("prometheus_jobs", {"nsr_id": db_nslcmop["nsInstanceId"]}) - if destroy_ee: - await self.vca_map[vca_type].delete_execution_environment( - vca_deployed["ee_id"], - scaling_in=scaling_in, - vca_type=vca_type, - vca_id=vca_id, - ) - - async def _delete_all_N2VC(self, db_nsr: dict, vca_id: str = None): - self._write_all_config_status(db_nsr=db_nsr, status="TERMINATING") - namespace = "." + db_nsr["_id"] - try: - await self.n2vc.delete_namespace( - namespace=namespace, - total_timeout=self.timeout_charm_delete, - vca_id=vca_id, - ) - except N2VCNotFound: # already deleted. Skip - pass - self._write_all_config_status(db_nsr=db_nsr, status="DELETED") - - async def _terminate_RO( - self, logging_text, nsr_deployed, nsr_id, nslcmop_id, stage - ): - """ - Terminates a deployment from RO - :param logging_text: - :param nsr_deployed: db_nsr._admin.deployed - :param nsr_id: - :param nslcmop_id: - :param stage: list of string with the content to write on db_nslcmop.detailed-status. - this method will update only the index 2, but it will write on database the concatenated content of the list - :return: - """ - db_nsr_update = {} - failed_detail = [] - ro_nsr_id = ro_delete_action = None - if nsr_deployed and nsr_deployed.get("RO"): - ro_nsr_id = nsr_deployed["RO"].get("nsr_id") - ro_delete_action = nsr_deployed["RO"].get("nsr_delete_action_id") - try: - if ro_nsr_id: - stage[2] = "Deleting ns from VIM." - db_nsr_update["detailed-status"] = " ".join(stage) - self._write_op_status(nslcmop_id, stage) - self.logger.debug(logging_text + stage[2]) - self.update_db_2("nsrs", nsr_id, db_nsr_update) - self._write_op_status(nslcmop_id, stage) - desc = await self.RO.delete("ns", ro_nsr_id) - ro_delete_action = desc["action_id"] - db_nsr_update[ - "_admin.deployed.RO.nsr_delete_action_id" - ] = ro_delete_action - db_nsr_update["_admin.deployed.RO.nsr_id"] = None - db_nsr_update["_admin.deployed.RO.nsr_status"] = "DELETED" - if ro_delete_action: - # wait until NS is deleted from VIM - stage[2] = "Waiting ns deleted from VIM." - detailed_status_old = None - self.logger.debug( - logging_text - + stage[2] - + " RO_id={} ro_delete_action={}".format( - ro_nsr_id, ro_delete_action - ) - ) - self.update_db_2("nsrs", nsr_id, db_nsr_update) - self._write_op_status(nslcmop_id, stage) - - delete_timeout = 20 * 60 # 20 minutes - while delete_timeout > 0: - desc = await self.RO.show( - "ns", - item_id_name=ro_nsr_id, - extra_item="action", - extra_item_id=ro_delete_action, - ) - - # deploymentStatus - self._on_update_ro_db(nsrs_id=nsr_id, ro_descriptor=desc) - - ns_status, ns_status_info = self.RO.check_action_status(desc) - if ns_status == "ERROR": - raise ROclient.ROClientException(ns_status_info) - elif ns_status == "BUILD": - stage[2] = "Deleting from VIM {}".format(ns_status_info) - elif ns_status == "ACTIVE": - db_nsr_update["_admin.deployed.RO.nsr_delete_action_id"] = None - db_nsr_update["_admin.deployed.RO.nsr_status"] = "DELETED" - break - else: - assert ( - False - ), "ROclient.check_action_status returns unknown {}".format( - ns_status - ) - if stage[2] != detailed_status_old: - detailed_status_old = stage[2] - db_nsr_update["detailed-status"] = " ".join(stage) - self._write_op_status(nslcmop_id, stage) - self.update_db_2("nsrs", nsr_id, db_nsr_update) - await asyncio.sleep(5, loop=self.loop) - delete_timeout -= 5 - else: # delete_timeout <= 0: - raise ROclient.ROClientException( - "Timeout waiting ns deleted from VIM" - ) - - except Exception as e: - self.update_db_2("nsrs", nsr_id, db_nsr_update) - if ( - isinstance(e, ROclient.ROClientException) and e.http_code == 404 - ): # not found - db_nsr_update["_admin.deployed.RO.nsr_id"] = None - db_nsr_update["_admin.deployed.RO.nsr_status"] = "DELETED" - db_nsr_update["_admin.deployed.RO.nsr_delete_action_id"] = None - self.logger.debug( - logging_text + "RO_ns_id={} already deleted".format(ro_nsr_id) - ) - elif ( - isinstance(e, ROclient.ROClientException) and e.http_code == 409 - ): # conflict - failed_detail.append("delete conflict: {}".format(e)) - self.logger.debug( - logging_text - + "RO_ns_id={} delete conflict: {}".format(ro_nsr_id, e) - ) - else: - failed_detail.append("delete error: {}".format(e)) - self.logger.error( - logging_text + "RO_ns_id={} delete error: {}".format(ro_nsr_id, e) - ) - - # Delete nsd - if not failed_detail and deep_get(nsr_deployed, ("RO", "nsd_id")): - ro_nsd_id = nsr_deployed["RO"]["nsd_id"] - try: - stage[2] = "Deleting nsd from RO." - db_nsr_update["detailed-status"] = " ".join(stage) - self.update_db_2("nsrs", nsr_id, db_nsr_update) - self._write_op_status(nslcmop_id, stage) - await self.RO.delete("nsd", ro_nsd_id) - self.logger.debug( - logging_text + "ro_nsd_id={} deleted".format(ro_nsd_id) - ) - db_nsr_update["_admin.deployed.RO.nsd_id"] = None - except Exception as e: - if ( - isinstance(e, ROclient.ROClientException) and e.http_code == 404 - ): # not found - db_nsr_update["_admin.deployed.RO.nsd_id"] = None - self.logger.debug( - logging_text + "ro_nsd_id={} already deleted".format(ro_nsd_id) - ) - elif ( - isinstance(e, ROclient.ROClientException) and e.http_code == 409 - ): # conflict - failed_detail.append( - "ro_nsd_id={} delete conflict: {}".format(ro_nsd_id, e) - ) - self.logger.debug(logging_text + failed_detail[-1]) - else: - failed_detail.append( - "ro_nsd_id={} delete error: {}".format(ro_nsd_id, e) - ) - self.logger.error(logging_text + failed_detail[-1]) - - if not failed_detail and deep_get(nsr_deployed, ("RO", "vnfd")): - for index, vnf_deployed in enumerate(nsr_deployed["RO"]["vnfd"]): - if not vnf_deployed or not vnf_deployed["id"]: - continue - try: - ro_vnfd_id = vnf_deployed["id"] - stage[ - 2 - ] = "Deleting member_vnf_index={} ro_vnfd_id={} from RO.".format( - vnf_deployed["member-vnf-index"], ro_vnfd_id - ) - db_nsr_update["detailed-status"] = " ".join(stage) - self.update_db_2("nsrs", nsr_id, db_nsr_update) - self._write_op_status(nslcmop_id, stage) - await self.RO.delete("vnfd", ro_vnfd_id) - self.logger.debug( - logging_text + "ro_vnfd_id={} deleted".format(ro_vnfd_id) - ) - db_nsr_update["_admin.deployed.RO.vnfd.{}.id".format(index)] = None - except Exception as e: - if ( - isinstance(e, ROclient.ROClientException) and e.http_code == 404 - ): # not found - db_nsr_update[ - "_admin.deployed.RO.vnfd.{}.id".format(index) - ] = None - self.logger.debug( - logging_text - + "ro_vnfd_id={} already deleted ".format(ro_vnfd_id) - ) - elif ( - isinstance(e, ROclient.ROClientException) and e.http_code == 409 - ): # conflict - failed_detail.append( - "ro_vnfd_id={} delete conflict: {}".format(ro_vnfd_id, e) - ) - self.logger.debug(logging_text + failed_detail[-1]) - else: - failed_detail.append( - "ro_vnfd_id={} delete error: {}".format(ro_vnfd_id, e) - ) - self.logger.error(logging_text + failed_detail[-1]) - - if failed_detail: - stage[2] = "Error deleting from VIM" - else: - stage[2] = "Deleted from VIM" - db_nsr_update["detailed-status"] = " ".join(stage) - self.update_db_2("nsrs", nsr_id, db_nsr_update) - self._write_op_status(nslcmop_id, stage) + if destroy_ee: + await self.vca_map[vca_type].delete_execution_environment( + vca_deployed["ee_id"], + scaling_in=scaling_in, + vca_type=vca_type, + vca_id=vca_id, + ) - if failed_detail: - raise LcmException("; ".join(failed_detail)) + async def _delete_all_N2VC(self, db_nsr: dict, vca_id: str = None): + self._write_all_config_status(db_nsr=db_nsr, status="TERMINATING") + namespace = "." + db_nsr["_id"] + try: + await self.n2vc.delete_namespace( + namespace=namespace, + total_timeout=self.timeout.charm_delete, + vca_id=vca_id, + ) + except N2VCNotFound: # already deleted. Skip + pass + self._write_all_config_status(db_nsr=db_nsr, status="DELETED") async def terminate(self, nsr_id, nslcmop_id): # Try to lock HA task here @@ -4449,7 +4397,7 @@ class NsLcm(LcmBase): logging_text = "Task ns={} terminate={} ".format(nsr_id, nslcmop_id) self.logger.debug(logging_text + "Enter") - timeout_ns_terminate = self.timeout_ns_terminate + timeout_ns_terminate = self.timeout.ns_terminate db_nsr = None db_nslcmop = None operation_params = None @@ -4547,9 +4495,7 @@ class NsLcm(LcmBase): ) and vca.get("needed_terminate") # For helm we must destroy_ee. Also for native_charm, as juju_model cannot be deleted if there are # pending native charms - destroy_ee = ( - True if vca_type in ("helm", "helm-v3", "native_charm") else False - ) + destroy_ee = True if vca_type in ("helm-v3", "native_charm") else False # self.logger.debug(logging_text + "vca_index: {}, ee_id: {}, vca_type: {} destroy_ee: {}".format( # vca_index, vca.get("ee_id"), vca_type, destroy_ee)) task = asyncio.ensure_future( @@ -4575,7 +4521,7 @@ class NsLcm(LcmBase): error_list = await self._wait_for_tasks( logging_text, tasks_dict_info, - min(self.timeout_charm_delete, timeout_ns_terminate), + min(self.timeout.charm_delete, timeout_ns_terminate), stage, nslcmop_id, ) @@ -4593,12 +4539,22 @@ class NsLcm(LcmBase): task_delete_ee = asyncio.ensure_future( asyncio.wait_for( self._delete_all_N2VC(db_nsr=db_nsr, vca_id=vca_id), - timeout=self.timeout_charm_delete, + timeout=self.timeout.charm_delete, ) ) # task_delete_ee = asyncio.ensure_future(self.n2vc.delete_namespace(namespace="." + nsr_id)) tasks_dict_info[task_delete_ee] = "Terminating all VCA" + # Delete Namespace and Certificates if necessary + if check_helm_ee_in_ns(list(db_vnfds_from_member_index.values())): + await self.vca_map["helm-v3"].delete_tls_certificate( + namespace=db_nslcmop["nsInstanceId"], + certificate_name=self.EE_TLS_NAME, + ) + await self.vca_map["helm-v3"].delete_namespace( + namespace=db_nslcmop["nsInstanceId"], + ) + # Delete from k8scluster stage[1] = "Deleting KDUs." self.logger.debug(logging_text + stage[1]) @@ -4632,19 +4588,13 @@ class NsLcm(LcmBase): # remove from RO stage[1] = "Deleting ns from VIM." - if self.ng_ro: + if self.ro_config.ng: task_delete_ro = asyncio.ensure_future( self._terminate_ng_ro( logging_text, nsr_deployed, nsr_id, nslcmop_id, stage ) ) - else: - task_delete_ro = asyncio.ensure_future( - self._terminate_RO( - logging_text, nsr_deployed, nsr_id, nslcmop_id, stage - ) - ) - tasks_dict_info[task_delete_ro] = "Removing deployment from VIM" + tasks_dict_info[task_delete_ro] = "Removing deployment from VIM" # rest of staff will be done at finally @@ -4685,7 +4635,14 @@ class NsLcm(LcmBase): stage[1] = stage[2] = "" except asyncio.CancelledError: error_list.append("Cancelled") - # TODO cancell all tasks + await self._cancel_pending_tasks(logging_text, tasks_dict_info) + await self._wait_for_tasks( + logging_text, + tasks_dict_info, + timeout_ns_terminate, + stage, + nslcmop_id, + ) except Exception as exc: error_list.append(str(exc)) # update status at database @@ -4760,12 +4717,13 @@ class NsLcm(LcmBase): "operationState": nslcmop_operation_state, "autoremove": autoremove, }, - loop=self.loop, ) except Exception as e: self.logger.error( logging_text + "kafka_write notification Exception {}".format(e) ) + self.logger.debug(f"Deleting alerts: ns_id={nsr_id}") + self.db.del_list("alerts", {"tags.ns_id": nsr_id}) self.logger.debug(logging_text + "Exit") self.lcm_tasks.remove("ns", nsr_id, nslcmop_id, "ns_terminate") @@ -4847,6 +4805,11 @@ class NsLcm(LcmBase): self._write_op_status(nslcmop_id, stage) return error_detail_list + async def _cancel_pending_tasks(self, logging_text, created_tasks_info): + for task, name in created_tasks_info.items(): + self.logger.debug(logging_text + "Cancelling task: " + name) + task.cancel() + @staticmethod def _map_primitive_params(primitive_desc, params, instantiation_params): """ @@ -4995,13 +4958,13 @@ class NsLcm(LcmBase): ee_id=ee_id, primitive_name=primitive, params_dict=primitive_params, - progress_timeout=self.timeout_progress_primitive, - total_timeout=self.timeout_primitive, + progress_timeout=self.timeout.progress_primitive, + total_timeout=self.timeout.primitive, db_dict=db_dict, vca_id=vca_id, vca_type=vca_type, ), - timeout=timeout or self.timeout_primitive, + timeout=timeout or self.timeout.primitive, ) # execution was OK break @@ -5016,7 +4979,7 @@ class NsLcm(LcmBase): ) ) # wait and retry - await asyncio.sleep(retries_interval, loop=self.loop) + await asyncio.sleep(retries_interval) else: if isinstance(e, asyncio.TimeoutError): e = N2VCException( @@ -5081,6 +5044,7 @@ class NsLcm(LcmBase): nslcmop_operation_state = None error_description_nslcmop = None exc = None + step = "" try: # wait for any previous tasks in process step = "Waiting for previous operations to terminate" @@ -5109,7 +5073,7 @@ class NsLcm(LcmBase): primitive = db_nslcmop["operationParams"]["primitive"] primitive_params = db_nslcmop["operationParams"]["primitive_params"] timeout_ns_action = db_nslcmop["operationParams"].get( - "timeout_ns_action", self.timeout_primitive + "timeout_ns_action", self.timeout.primitive ) if vnf_index: @@ -5210,7 +5174,7 @@ class NsLcm(LcmBase): kdu_action = ( True if primitive_name in actions - and kdu["k8scluster-type"] not in ("helm-chart", "helm-chart-v3") + and kdu["k8scluster-type"] != "helm-chart-v3" else False ) @@ -5255,11 +5219,14 @@ class NsLcm(LcmBase): del desc_params["kdu_model"] else: kdu_model = kdu.get("kdu-model") - parts = kdu_model.split(sep=":") - if len(parts) == 2: - kdu_model = parts[0] + if kdu_model.count("/") < 2: # helm chart is not embedded + parts = kdu_model.split(sep=":") + if len(parts) == 2: + kdu_model = parts[0] if desc_params.get("kdu_atomic_upgrade"): - atomic_upgrade = desc_params.get("kdu_atomic_upgrade").lower() in ("yes", "true", "1") + atomic_upgrade = desc_params.get( + "kdu_atomic_upgrade" + ).lower() in ("yes", "true", "1") del desc_params["kdu_atomic_upgrade"] else: atomic_upgrade = True @@ -5424,7 +5391,6 @@ class NsLcm(LcmBase): "nslcmop_id": nslcmop_id, "operationState": nslcmop_operation_state, }, - loop=self.loop, ) except Exception as e: self.logger.error( @@ -5482,7 +5448,7 @@ class NsLcm(LcmBase): stage[2] = "Terminating VDUs" if scaling_info.get("vdu-delete"): # scale_process = "RO" - if self.ro_config.get("ng"): + if self.ro_config.ng: await self._scale_ng_ro( logging_text, db_nsr, @@ -5660,7 +5626,7 @@ class NsLcm(LcmBase): } ) scaling_info["vdu-create"][vdud["id"]] = count_index - if self.ro_config.get("ng"): + if self.ro_config.ng: self.logger.debug( "New Resources to be deployed: {}".format(scaling_info) ) @@ -5706,7 +5672,7 @@ class NsLcm(LcmBase): path=path, charm_id=charm_id, charm_type=charm_type, - timeout=timeout or self.timeout_ns_update, + timeout=timeout or self.timeout.ns_update, ) if output: @@ -5716,7 +5682,6 @@ class NsLcm(LcmBase): raise except Exception as e: - self.logger.debug("Error upgrading charm {}".format(path)) return "FAILED", "Error upgrading charm {}: {}".format(path, e) @@ -5753,6 +5718,7 @@ class NsLcm(LcmBase): exc = None change_type = "updated" detailed_status = "" + member_vnf_index = None try: # wait for any previous tasks in process @@ -5779,7 +5745,6 @@ class NsLcm(LcmBase): nsr_deployed = db_nsr["_admin"].get("deployed") if update_type == "CHANGE_VNFPKG": - # Get the input parameters given through update request vnf_instance_id = db_nslcmop["operationParams"][ "changeVnfPackageData" @@ -5814,11 +5779,11 @@ class NsLcm(LcmBase): current_charm_artifact_path, target_charm_artifact_path, charm_artifact_paths, - ) = ([], [], []) + helm_artifacts, + ) = ([], [], [], []) step = "Checking if revision has changed in VNFD" if current_vnf_revision != latest_vnfd_revision: - change_type = "policy_updated" # There is new revision of VNFD, update operation is required @@ -5836,24 +5801,33 @@ class NsLcm(LcmBase): step = ( "Get the charm-type, charm-id, ee-id if there is deployed VCA" ) - base_folder = latest_vnfd["_admin"]["storage"] + current_base_folder = current_vnfd["_admin"]["storage"] + latest_base_folder = latest_vnfd["_admin"]["storage"] - for charm_index, charm_deployed in enumerate( + for vca_index, vca_deployed in enumerate( get_iterable(nsr_deployed, "VCA") ): vnf_index = db_vnfr.get("member-vnf-index-ref") # Getting charm-id and charm-type - if charm_deployed.get("member-vnf-index") == vnf_index: - charm_id = self.get_vca_id(db_vnfr, db_nsr) - charm_type = charm_deployed.get("type") + if vca_deployed.get("member-vnf-index") == vnf_index: + vca_id = self.get_vca_id(db_vnfr, db_nsr) + vca_type = vca_deployed.get("type") + vdu_count_index = vca_deployed.get("vdu_count_index") # Getting ee-id - ee_id = charm_deployed.get("ee_id") + ee_id = vca_deployed.get("ee_id") step = "Getting descriptor config" + if current_vnfd.get("kdu"): + search_key = "kdu_name" + else: + search_key = "vnfd_id" + + entity_id = vca_deployed.get(search_key) + descriptor_config = get_configuration( - current_vnfd, current_vnfd["id"] + current_vnfd, entity_id ) if "execution-environment-list" in descriptor_config: @@ -5866,27 +5840,52 @@ class NsLcm(LcmBase): # There could be several charm used in the same VNF for ee_item in ee_list: if ee_item.get("juju"): - step = "Getting charm name" charm_name = ee_item["juju"].get("charm") step = "Setting Charm artifact paths" current_charm_artifact_path.append( get_charm_artifact_path( - base_folder, + current_base_folder, charm_name, - charm_type, + vca_type, current_vnf_revision, ) ) target_charm_artifact_path.append( get_charm_artifact_path( - base_folder, + latest_base_folder, charm_name, - charm_type, + vca_type, latest_vnfd_revision, ) ) + elif ee_item.get("helm-chart"): + # add chart to list and all parameters + step = "Getting helm chart name" + chart_name = ee_item.get("helm-chart") + vca_type = "helm-v3" + step = "Setting Helm chart artifact paths" + + helm_artifacts.append( + { + "current_artifact_path": get_charm_artifact_path( + current_base_folder, + chart_name, + vca_type, + current_vnf_revision, + ), + "target_artifact_path": get_charm_artifact_path( + latest_base_folder, + chart_name, + vca_type, + latest_vnfd_revision, + ), + "ee_id": ee_id, + "vca_index": vca_index, + "vdu_index": vdu_count_index, + } + ) charm_artifact_paths = zip( current_charm_artifact_path, target_charm_artifact_path @@ -5896,7 +5895,6 @@ class NsLcm(LcmBase): if find_software_version(current_vnfd) != find_software_version( latest_vnfd ): - step = "Checking if existing VNF has charm" for current_charm_path, target_charm_path in list( charm_artifact_paths @@ -5918,7 +5916,25 @@ class NsLcm(LcmBase): if result == "FAILED": nslcmop_operation_state = result error_description_nslcmop = detailed_status + old_operational_status = "failed" db_nslcmop_update["detailed-status"] = detailed_status + db_nsr_update["detailed-status"] = detailed_status + scaling_aspect = get_scaling_aspect(latest_vnfd) + scaling_group_desc = db_nsr.get("_admin").get( + "scaling-group", None + ) + if scaling_group_desc: + for aspect in scaling_aspect: + scaling_group_id = aspect.get("id") + for scale_index, scaling_group in enumerate( + scaling_group_desc + ): + if scaling_group.get("name") == scaling_group_id: + db_nsr_update[ + "_admin.scaling-group.{}.nb-scale-op".format( + scale_index + ) + ] = 0 self.logger.debug( logging_text + " step {} Done with result {} {}".format( @@ -5938,10 +5954,8 @@ class NsLcm(LcmBase): current_charm_path, target_charm_path ) ): - step = "Checking whether VNF uses juju bundle" if check_juju_bundle_existence(current_vnfd): - raise LcmException( "Charm upgrade is not supported for the instance which" " uses juju-bundle: {}".format( @@ -5955,8 +5969,8 @@ class NsLcm(LcmBase): detailed_status, ) = await self._ns_charm_upgrade( ee_id=ee_id, - charm_id=charm_id, - charm_type=charm_type, + charm_id=vca_id, + charm_type=vca_type, path=self.fs.path + target_charm_path, timeout=timeout_seconds, ) @@ -5979,6 +5993,121 @@ class NsLcm(LcmBase): detailed_status = "Done" db_nslcmop_update["detailed-status"] = "Done" + # helm base EE + for item in helm_artifacts: + if not ( + item["current_artifact_path"] + and item["target_artifact_path"] + and self.check_charm_hash_changed( + item["current_artifact_path"], + item["target_artifact_path"], + ) + ): + continue + db_update_entry = "_admin.deployed.VCA.{}.".format( + item["vca_index"] + ) + vnfr_id = db_vnfr["_id"] + osm_config = {"osm": {"ns_id": nsr_id, "vnf_id": vnfr_id}} + db_dict = { + "collection": "nsrs", + "filter": {"_id": nsr_id}, + "path": db_update_entry, + } + vca_type, namespace, helm_id = get_ee_id_parts(item["ee_id"]) + await self.vca_map[vca_type].upgrade_execution_environment( + namespace=namespace, + helm_id=helm_id, + db_dict=db_dict, + config=osm_config, + artifact_path=item["target_artifact_path"], + vca_type=vca_type, + ) + vnf_id = db_vnfr.get("vnfd-ref") + config_descriptor = get_configuration(latest_vnfd, vnf_id) + self.logger.debug("get ssh key block") + rw_mgmt_ip = None + if deep_get( + config_descriptor, + ("config-access", "ssh-access", "required"), + ): + # Needed to inject a ssh key + user = deep_get( + config_descriptor, + ("config-access", "ssh-access", "default-user"), + ) + step = ( + "Install configuration Software, getting public ssh key" + ) + pub_key = await self.vca_map[ + vca_type + ].get_ee_ssh_public__key( + ee_id=ee_id, db_dict=db_dict, vca_id=vca_id + ) + + step = ( + "Insert public key into VM user={} ssh_key={}".format( + user, pub_key + ) + ) + self.logger.debug(logging_text + step) + + # wait for RO (ip-address) Insert pub_key into VM + rw_mgmt_ip = await self.wait_vm_up_insert_key_ro( + logging_text, + nsr_id, + vnfr_id, + None, + item["vdu_index"], + user=user, + pub_key=pub_key, + ) + + initial_config_primitive_list = config_descriptor.get( + "initial-config-primitive" + ) + config_primitive = next( + ( + p + for p in initial_config_primitive_list + if p["name"] == "config" + ), + None, + ) + if not config_primitive: + continue + + deploy_params = {"OSM": get_osm_params(db_vnfr)} + if rw_mgmt_ip: + deploy_params["rw_mgmt_ip"] = rw_mgmt_ip + if db_vnfr.get("additionalParamsForVnf"): + deploy_params.update( + parse_yaml_strings( + db_vnfr["additionalParamsForVnf"].copy() + ) + ) + primitive_params_ = self._map_primitive_params( + config_primitive, {}, deploy_params + ) + + step = "execute primitive '{}' params '{}'".format( + config_primitive["name"], primitive_params_ + ) + self.logger.debug(logging_text + step) + await self.vca_map[vca_type].exec_primitive( + ee_id=ee_id, + primitive_name=config_primitive["name"], + params_dict=primitive_params_, + db_dict=db_dict, + vca_id=vca_id, + vca_type=vca_type, + ) + + step = "Updating policies" + member_vnf_index = db_vnfr["member-vnf-index-ref"] + detailed_status = "Done" + db_nslcmop_update["detailed-status"] = "Done" + # If nslcmop_operation_state is None, so any operation is not failed. if not nslcmop_operation_state: nslcmop_operation_state = "COMPLETED" @@ -6099,9 +6228,12 @@ class NsLcm(LcmBase): "nslcmop_id": nslcmop_id, "operationState": nslcmop_operation_state, } - if change_type in ("vnf_terminated", "policy_updated"): + if ( + change_type in ("vnf_terminated", "policy_updated") + and member_vnf_index + ): msg.update({"vnf_member_index": member_vnf_index}) - await self.msg.aiowrite("ns", change_type, msg, loop=self.loop) + await self.msg.aiowrite("ns", change_type, msg) except Exception as e: self.logger.error( logging_text + "kafka_write notification Exception {}".format(e) @@ -6131,6 +6263,7 @@ class NsLcm(LcmBase): old_operational_status = "" old_config_status = "" nsi_id = None + prom_job_name = "" try: # wait for any previous tasks in process step = "Waiting for previous operations to terminate" @@ -6241,6 +6374,15 @@ class NsLcm(LcmBase): vdud = get_vdu(db_vnfd, vdu_delta["id"]) # vdu_index also provides the number of instance of the targeted vdu vdu_count = vdu_index = get_vdur_index(db_vnfr, vdu_delta) + if vdu_index <= len(db_vnfr["vdur"]): + vdu_name_id = db_vnfr["vdur"][vdu_index - 1]["vdu-name"] + prom_job_name = ( + db_vnfr["_id"] + vdu_name_id + str(vdu_index - 1) + ) + prom_job_name = prom_job_name.replace("_", "") + prom_job_name = prom_job_name.replace("-", "") + else: + prom_job_name = None cloud_init_text = self._get_vdu_cloud_init_content( vdud, db_vnfd ) @@ -6317,11 +6459,6 @@ class NsLcm(LcmBase): if kdur.get("helm-chart"): k8s_cluster_type = "helm-chart-v3" self.logger.debug("kdur: {}".format(kdur)) - if ( - kdur.get("helm-version") - and kdur.get("helm-version") == "v2" - ): - k8s_cluster_type = "helm-chart" elif kdur.get("juju-bundle"): k8s_cluster_type = "juju-bundle" else: @@ -6450,11 +6587,6 @@ class NsLcm(LcmBase): if kdur.get("helm-chart"): k8s_cluster_type = "helm-chart-v3" self.logger.debug("kdur: {}".format(kdur)) - if ( - kdur.get("helm-version") - and kdur.get("helm-version") == "v2" - ): - k8s_cluster_type = "helm-chart" elif kdur.get("juju-bundle"): k8s_cluster_type = "juju-bundle" else: @@ -6741,7 +6873,7 @@ class NsLcm(LcmBase): scaling_in=True, vca_id=vca_id, ), - timeout=self.timeout_charm_delete, + timeout=self.timeout.charm_delete, ) ) tasks_dict_info[task] = "Terminating VCA {}".format( @@ -6761,7 +6893,7 @@ class NsLcm(LcmBase): logging_text, tasks_dict_info, min( - self.timeout_charm_delete, self.timeout_ns_terminate + self.timeout.charm_delete, self.timeout.ns_terminate ), stage, nslcmop_id, @@ -6783,7 +6915,7 @@ class NsLcm(LcmBase): # SCALE RO - BEGIN if scaling_info.get("vdu-create") or scaling_info.get("vdu-delete"): scale_process = "RO" - if self.ro_config.get("ng"): + if self.ro_config.ng: await self._scale_ng_ro( logging_text, db_nsr, db_nslcmop, db_vnfr, scaling_info, stage ) @@ -6837,6 +6969,7 @@ class NsLcm(LcmBase): vdu_id = None vdu_name = None kdu_name = None + kdu_index = None self._deploy_n2vc( logging_text=logging_text + "member_vnf_index={} ".format(member_vnf_index), @@ -6848,6 +6981,7 @@ class NsLcm(LcmBase): vnfd_id=vnfd_id, vdu_id=vdu_id, kdu_name=kdu_name, + kdu_index=kdu_index, member_vnf_index=member_vnf_index, vdu_index=vdu_index, vdu_name=vdu_name, @@ -6874,6 +7008,7 @@ class NsLcm(LcmBase): if descriptor_config: vdu_name = None kdu_name = None + kdu_index = None stage[ 1 ] = "Scaling member_vnf_index={}, vdu_id={}, vdu_index={} ".format( @@ -6896,6 +7031,7 @@ class NsLcm(LcmBase): kdu_name=kdu_name, member_vnf_index=member_vnf_index, vdu_index=vdu_index, + kdu_index=kdu_index, vdu_name=vdu_name, deploy_params=deploy_params_vdu, descriptor_config=descriptor_config, @@ -7035,7 +7171,69 @@ class NsLcm(LcmBase): db_nsr_update["config-status"] = old_config_status scale_process = None # POST-SCALE END + # Check if each vnf has exporter for metric collection if so update prometheus job records + if scaling_type == "SCALE_OUT": + if "exporters-endpoints" in db_vnfd.get("df")[0]: + vnfr_id = db_vnfr["id"] + db_vnfr = self.db.get_one("vnfrs", {"_id": vnfr_id}) + exporter_config = db_vnfd.get("df")[0].get("exporters-endpoints") + self.logger.debug("exporter config :{}".format(exporter_config)) + artifact_path = "{}/{}/{}".format( + base_folder["folder"], + base_folder["pkg-dir"], + "exporter-endpoint", + ) + ee_id = None + ee_config_descriptor = exporter_config + rw_mgmt_ip = await self.wait_vm_up_insert_key_ro( + logging_text, + nsr_id, + vnfr_id, + vdu_id=db_vnfr["vdur"][-1]["vdu-id-ref"], + vdu_index=db_vnfr["vdur"][-1]["count-index"], + user=None, + pub_key=None, + ) + self.logger.debug("rw_mgmt_ip:{}".format(rw_mgmt_ip)) + self.logger.debug("Artifact_path:{}".format(artifact_path)) + vdu_id_for_prom = None + vdu_index_for_prom = None + for x in get_iterable(db_vnfr, "vdur"): + vdu_id_for_prom = x.get("vdu-id-ref") + vdu_index_for_prom = x.get("count-index") + vnfr_id = vnfr_id + vdu_id + str(vdu_index) + vnfr_id = vnfr_id.replace("_", "") + prometheus_jobs = await self.extract_prometheus_scrape_jobs( + ee_id=ee_id, + artifact_path=artifact_path, + ee_config_descriptor=ee_config_descriptor, + vnfr_id=vnfr_id, + nsr_id=nsr_id, + target_ip=rw_mgmt_ip, + element_type="VDU", + vdu_id=vdu_id_for_prom, + vdu_index=vdu_index_for_prom, + ) + + self.logger.debug("Prometheus job:{}".format(prometheus_jobs)) + if prometheus_jobs: + db_nsr_update[ + "_admin.deployed.prometheus_jobs" + ] = prometheus_jobs + self.update_db_2( + "nsrs", + nsr_id, + db_nsr_update, + ) + for job in prometheus_jobs: + self.db.set_one( + "prometheus_jobs", + {"job_name": ""}, + job, + upsert=True, + fail_on_empty=False, + ) db_nsr_update[ "detailed-status" ] = "" # "scaled {} {}".format(scaling_group, scaling_type) @@ -7066,27 +7264,45 @@ class NsLcm(LcmBase): exc_info=True, ) finally: + error_list = list() + if exc: + error_list.append(str(exc)) self._write_ns_status( nsr_id=nsr_id, ns_state=None, current_operation="IDLE", current_operation_id=None, ) - if tasks_dict_info: - stage[1] = "Waiting for instantiate pending tasks." - self.logger.debug(logging_text + stage[1]) - exc = await self._wait_for_tasks( + try: + if tasks_dict_info: + stage[1] = "Waiting for instantiate pending tasks." + self.logger.debug(logging_text + stage[1]) + exc = await self._wait_for_tasks( + logging_text, + tasks_dict_info, + self.timeout.ns_deploy, + stage, + nslcmop_id, + nsr_id=nsr_id, + ) + except asyncio.CancelledError: + error_list.append("Cancelled") + await self._cancel_pending_tasks(logging_text, tasks_dict_info) + await self._wait_for_tasks( logging_text, tasks_dict_info, - self.timeout_ns_deploy, + self.timeout.ns_deploy, stage, nslcmop_id, nsr_id=nsr_id, ) - if exc: + if error_list: + error_detail = "; ".join(error_list) db_nslcmop_update[ "detailed-status" - ] = error_description_nslcmop = "FAILED {}: {}".format(step, exc) + ] = error_description_nslcmop = "FAILED {}: {}".format( + step, error_detail + ) nslcmop_operation_state = "FAILED" if db_nsr: db_nsr_update["operational-status"] = old_operational_status @@ -7100,12 +7316,18 @@ class NsLcm(LcmBase): db_nsr_update[ "detailed-status" ] = "FAILED scaling nslcmop={} {}: {}".format( - nslcmop_id, step, exc + nslcmop_id, step, error_detail ) else: error_description_nslcmop = None nslcmop_operation_state = "COMPLETED" db_nslcmop_update["detailed-status"] = "Done" + if scaling_type == "SCALE_IN" and prom_job_name is not None: + self.db.del_one( + "prometheus_jobs", + {"job_name": prom_job_name}, + fail_on_empty=False, + ) self._write_op_status( op_id=nslcmop_id, @@ -7130,7 +7352,7 @@ class NsLcm(LcmBase): "nslcmop_id": nslcmop_id, "operationState": nslcmop_operation_state, } - await self.msg.aiowrite("ns", "scaled", msg, loop=self.loop) + await self.msg.aiowrite("ns", "scaled", msg) except Exception as e: self.logger.error( logging_text + "kafka_write notification Exception {}".format(e) @@ -7193,23 +7415,27 @@ class NsLcm(LcmBase): primitive_name=terminate_config_primitive["name"], params=primitive_params_, db_dict=db_dict, + total_timeout=self.timeout.primitive, vca_id=vca_id, ), - timeout=600, + timeout=self.timeout.primitive + * self.timeout.primitive_outer_factor, ) await asyncio.wait_for( self.k8scluster_map[k8s_cluster_type].scale( - kdu_instance, - scale, - kdu_scaling_info["resource-name"], + kdu_instance=kdu_instance, + scale=scale, + resource_name=kdu_scaling_info["resource-name"], + total_timeout=self.timeout.scale_on_error, vca_id=vca_id, cluster_uuid=cluster_uuid, kdu_model=kdu_model, atomic=True, db_dict=db_dict, ), - timeout=self.timeout_vca_on_error, + timeout=self.timeout.scale_on_error + * self.timeout.scale_on_error_outer_factor, ) if kdu_scaling_info["type"] == "create": @@ -7284,7 +7510,7 @@ class NsLcm(LcmBase): n2vc_key_list, stage=stage, start_deploy=time(), - timeout_ns_deploy=self.timeout_ns_deploy, + timeout_ns_deploy=self.timeout.ns_deploy, ) if vdu_scaling_info.get("vdu-delete"): self.scale_vnfr( @@ -7292,8 +7518,48 @@ class NsLcm(LcmBase): ) async def extract_prometheus_scrape_jobs( - self, ee_id, artifact_path, ee_config_descriptor, vnfr_id, nsr_id, target_ip - ): + self, + ee_id: str, + artifact_path: str, + ee_config_descriptor: dict, + vnfr_id: str, + nsr_id: str, + target_ip: str, + element_type: str, + vnf_member_index: str = "", + vdu_id: str = "", + vdu_index: int = None, + kdu_name: str = "", + kdu_index: int = None, + ) -> dict: + """Method to extract prometheus scrape jobs from EE's Prometheus template job file + This method will wait until the corresponding VDU or KDU is fully instantiated + + Args: + ee_id (str): Execution Environment ID + artifact_path (str): Path where the EE's content is (including the Prometheus template file) + ee_config_descriptor (dict): Execution Environment's configuration descriptor + vnfr_id (str): VNFR ID where this EE applies + nsr_id (str): NSR ID where this EE applies + target_ip (str): VDU/KDU instance IP address + element_type (str): NS or VNF or VDU or KDU + vnf_member_index (str, optional): VNF index where this EE applies. Defaults to "". + vdu_id (str, optional): VDU ID where this EE applies. Defaults to "". + vdu_index (int, optional): VDU index where this EE applies. Defaults to None. + kdu_name (str, optional): KDU name where this EE applies. Defaults to "". + kdu_index (int, optional): KDU index where this EE applies. Defaults to None. + + Raises: + LcmException: When the VDU or KDU instance was not found in an hour + + Returns: + _type_: Prometheus jobs + """ + # default the vdur and kdur names to an empty string, to avoid any later + # problem with Prometheus when the element type is not VDU or KDU + vdur_name = "" + kdur_name = "" + # look if exist a file called 'prometheus*.j2' and artifact_content = self.fs.dir_ls(artifact_path) job_file = next( @@ -7306,20 +7572,86 @@ class NsLcm(LcmBase): ) if not job_file: return + self.logger.debug("Artifact path{}".format(artifact_path)) + self.logger.debug("job file{}".format(job_file)) with self.fs.file_open((artifact_path, job_file), "r") as f: job_data = f.read() - # TODO get_service - _, _, service = ee_id.partition(".") # remove prefix "namespace." - host_name = "{}-{}".format(service, ee_config_descriptor["metric-service"]) - host_port = "80" - vnfr_id = vnfr_id.replace("-", "") - variables = { - "JOB_NAME": vnfr_id, - "TARGET_IP": target_ip, - "EXPORTER_POD_IP": host_name, - "EXPORTER_POD_PORT": host_port, - } + # obtain the VDUR or KDUR, if the element type is VDU or KDU + if element_type in ("VDU", "KDU"): + for _ in range(360): + db_vnfr = self.db.get_one("vnfrs", {"_id": vnfr_id}) + if vdu_id and vdu_index is not None: + vdur = next( + ( + x + for x in get_iterable(db_vnfr, "vdur") + if ( + x.get("vdu-id-ref") == vdu_id + and x.get("count-index") == vdu_index + ) + ), + {}, + ) + if vdur.get("name"): + vdur_name = vdur.get("name") + break + if kdu_name and kdu_index is not None: + kdur = next( + ( + x + for x in get_iterable(db_vnfr, "kdur") + if ( + x.get("kdu-name") == kdu_name + and x.get("count-index") == kdu_index + ) + ), + {}, + ) + if kdur.get("name"): + kdur_name = kdur.get("name") + break + + await asyncio.sleep(10) + else: + if vdu_id and vdu_index is not None: + raise LcmException( + f"Timeout waiting VDU with name={vdu_id} and index={vdu_index} to be intantiated" + ) + if kdu_name and kdu_index is not None: + raise LcmException( + f"Timeout waiting KDU with name={kdu_name} and index={kdu_index} to be intantiated" + ) + + if ee_id is not None: + _, namespace, helm_id = get_ee_id_parts( + ee_id + ) # get namespace and EE gRPC service name + host_name = f'{helm_id}-{ee_config_descriptor["metric-service"]}.{namespace}.svc' # svc_name.namespace.svc + host_port = "80" + vnfr_id = vnfr_id.replace("-", "") + variables = { + "JOB_NAME": vnfr_id, + "TARGET_IP": target_ip, + "EXPORTER_POD_IP": host_name, + "EXPORTER_POD_PORT": host_port, + "NSR_ID": nsr_id, + "VNF_MEMBER_INDEX": vnf_member_index, + "VDUR_NAME": vdur_name, + "KDUR_NAME": kdur_name, + "ELEMENT_TYPE": element_type, + } + else: + metric_path = ee_config_descriptor["metric-path"] + target_port = ee_config_descriptor["metric-port"] + vnfr_id = vnfr_id.replace("-", "") + variables = { + "JOB_NAME": vnfr_id, + "TARGET_IP": target_ip, + "TARGET_PORT": target_port, + "METRIC_PATH": metric_path, + } + job_list = parse_job(job_data, variables) # ensure job_name is using the vnfr_id. Adding the metadata nsr_id for job in job_list: @@ -7327,7 +7659,7 @@ class NsLcm(LcmBase): not isinstance(job.get("job_name"), str) or vnfr_id not in job["job_name"] ): - job["job_name"] = vnfr_id + "_" + str(randint(1, 10000)) + job["job_name"] = vnfr_id + "_" + str(SystemRandom().randint(1, 10000)) job["nsr_id"] = nsr_id job["vnfr_id"] = vnfr_id return job_list @@ -7401,7 +7733,7 @@ class NsLcm(LcmBase): action_id, nslcmop_id, start_deploy, - self.timeout_operate, + self.timeout.operate, None, "start_stop_rebuild", ) @@ -7419,28 +7751,6 @@ class NsLcm(LcmBase): ) return "FAILED", "Error in operate VNF {}".format(exc) - def get_vca_cloud_and_credentials(self, vim_account_id: str) -> (str, str): - """ - Get VCA Cloud and VCA Cloud Credentials for the VIM account - - :param: vim_account_id: VIM Account ID - - :return: (cloud_name, cloud_credential) - """ - config = VimAccountDB.get_vim_account_with_id(vim_account_id).get("config", {}) - return config.get("vca_cloud"), config.get("vca_cloud_credential") - - def get_vca_k8s_cloud_and_credentials(self, vim_account_id: str) -> (str, str): - """ - Get VCA K8s Cloud and VCA K8s Cloud Credentials for the VIM account - - :param: vim_account_id: VIM Account ID - - :return: (cloud_name, cloud_credential) - """ - config = VimAccountDB.get_vim_account_with_id(vim_account_id).get("config", {}) - return config.get("vca_k8s_cloud"), config.get("vca_k8s_cloud_credential") - async def migrate(self, nsr_id, nslcmop_id): """ Migrate VNFs and VDUs instances in a NS @@ -7493,7 +7803,7 @@ class NsLcm(LcmBase): action_id, nslcmop_id, start_deploy, - self.timeout_migrate, + self.timeout.migrate, operation="migrate", ) except (ROclient.ROClientException, DbException, LcmException) as e: @@ -7536,7 +7846,7 @@ class NsLcm(LcmBase): "nslcmop_id": nslcmop_id, "operationState": nslcmop_operation_state, } - await self.msg.aiowrite("ns", "migrated", msg, loop=self.loop) + await self.msg.aiowrite("ns", "migrated", msg) except Exception as e: self.logger.error( logging_text + "kafka_write notification Exception {}".format(e) @@ -7600,17 +7910,12 @@ class NsLcm(LcmBase): self.update_db_2("nsrs", nsr_id, db_nsr_update) step = "Sending heal order to VIM" - task_ro = asyncio.ensure_future( - self.heal_RO( - logging_text=logging_text, - nsr_id=nsr_id, - db_nslcmop=db_nslcmop, - stage=stage, - ) + await self.heal_RO( + logging_text=logging_text, + nsr_id=nsr_id, + db_nslcmop=db_nslcmop, + stage=stage, ) - self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "heal_RO", task_ro) - tasks_dict_info[task_ro] = "Healing at VIM" - # VCA tasks # read from db: nsd stage[1] = "Getting nsd={} from db.".format(db_nsr["nsd-id"]) @@ -7665,9 +7970,9 @@ class NsLcm(LcmBase): for target_vdu in target_vdu_list: deploy_params_vdu = target_vdu # Set run-day1 vnf level value if not vdu level value exists - if not deploy_params_vdu.get("run-day1") and target_vnf[ - "additionalParams" - ].get("run-day1"): + if not deploy_params_vdu.get("run-day1") and target_vnf.get( + "additionalParams", {} + ).get("run-day1"): deploy_params_vdu["run-day1"] = target_vnf[ "additionalParams" ].get("run-day1") @@ -7766,28 +8071,48 @@ class NsLcm(LcmBase): exc_info=True, ) finally: - if tasks_dict_info: - stage[1] = "Waiting for healing pending tasks." - self.logger.debug(logging_text + stage[1]) - exc = await self._wait_for_tasks( + error_list = list() + if exc: + error_list.append(str(exc)) + try: + if tasks_dict_info: + stage[1] = "Waiting for healing pending tasks." + self.logger.debug(logging_text + stage[1]) + exc = await self._wait_for_tasks( + logging_text, + tasks_dict_info, + self.timeout.ns_deploy, + stage, + nslcmop_id, + nsr_id=nsr_id, + ) + except asyncio.CancelledError: + error_list.append("Cancelled") + await self._cancel_pending_tasks(logging_text, tasks_dict_info) + await self._wait_for_tasks( logging_text, tasks_dict_info, - self.timeout_ns_deploy, + self.timeout.ns_deploy, stage, nslcmop_id, nsr_id=nsr_id, ) - if exc: + if error_list: + error_detail = "; ".join(error_list) db_nslcmop_update[ "detailed-status" - ] = error_description_nslcmop = "FAILED {}: {}".format(step, exc) + ] = error_description_nslcmop = "FAILED {}: {}".format( + step, error_detail + ) nslcmop_operation_state = "FAILED" if db_nsr: db_nsr_update["operational-status"] = old_operational_status db_nsr_update["config-status"] = old_config_status db_nsr_update[ "detailed-status" - ] = "FAILED healing nslcmop={} {}: {}".format(nslcmop_id, step, exc) + ] = "FAILED healing nslcmop={} {}: {}".format( + nslcmop_id, step, error_detail + ) for task, task_name in tasks_dict_info.items(): if not task.done() or task.cancelled() or task.exception(): if task_name.startswith(self.task_name_deploy_vca): @@ -7827,7 +8152,7 @@ class NsLcm(LcmBase): "nslcmop_id": nslcmop_id, "operationState": nslcmop_operation_state, } - await self.msg.aiowrite("ns", "healed", msg, loop=self.loop) + await self.msg.aiowrite("ns", "healed", msg) except Exception as e: self.logger.error( logging_text + "kafka_write notification Exception {}".format(e) @@ -7865,7 +8190,7 @@ class NsLcm(LcmBase): if ns_params and ns_params.get("timeout_ns_heal"): timeout_ns_heal = ns_params["timeout_ns_heal"] else: - timeout_ns_heal = self.timeout.get("ns_heal", self.timeout_ns_heal) + timeout_ns_heal = self.timeout.ns_heal db_vims = {} @@ -7987,10 +8312,7 @@ class NsLcm(LcmBase): vca_type = "native_charm" elif ee_item.get("helm-chart"): vca_name = ee_item["helm-chart"] - if ee_item.get("helm-version") and ee_item.get("helm-version") == "v2": - vca_type = "helm" - else: - vca_type = "helm-v3" + vca_type = "helm-v3" else: self.logger.debug( logging_text + "skipping non juju neither charm configuration" @@ -8116,7 +8438,6 @@ class NsLcm(LcmBase): } step = "" try: - element_type = "NS" element_under_configuration = nsr_id @@ -8323,7 +8644,7 @@ class NsLcm(LcmBase): # if SSH access is required, then get execution environment SSH public # if native charm we have waited already to VM be UP - if vca_type in ("k8s_proxy_charm", "lxc_proxy_charm", "helm", "helm-v3"): + if vca_type in ("k8s_proxy_charm", "lxc_proxy_charm", "helm-v3"): pub_key = None user = None # self.logger.debug("get ssh key block") @@ -8352,7 +8673,7 @@ class NsLcm(LcmBase): # n2vc_redesign STEP 5.1 # wait for RO (ip-address) Insert pub_key into VM # IMPORTANT: We need do wait for RO to complete healing operation. - await self._wait_heal_ro(nsr_id, self.timeout_ns_heal) + await self._wait_heal_ro(nsr_id, self.timeout.ns_heal) if vnfr_id: if kdu_name: rw_mgmt_ip = await self.wait_kdu_up( @@ -8509,7 +8830,7 @@ class NsLcm(LcmBase): self.logger.debug("Wait Heal RO > {}".format(operational_status_ro)) if operational_status_ro != "healing": break - await asyncio.sleep(15, loop=self.loop) + await asyncio.sleep(15) else: # timeout_ns_deploy raise NgRoException("Timeout waiting ns to deploy") @@ -8531,6 +8852,10 @@ class NsLcm(LcmBase): db_nslcmop = None db_nslcmop_update = {} nslcmop_operation_state = None + old_db_update = {} + q_filter = {} + old_vdu_index = None + old_flavor_id = None db_nsr_update = {} target = {} exc = None @@ -8554,6 +8879,59 @@ class NsLcm(LcmBase): ) db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id}) operationParams = db_nslcmop.get("operationParams") + # Update the VNFRS and NSRS with the requested flavour detail, So that ro tasks can function properly + db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) + db_flavor = db_nsr.get("flavor") + db_flavor_index = str(len(db_flavor)) + change_vnf_flavor_data = operationParams["changeVnfFlavorData"] + flavor_dict = change_vnf_flavor_data["additionalParams"] + count_index = flavor_dict["vduCountIndex"] + vdu_id_ref = flavor_dict["vduid"] + flavor_dict_update = { + "id": db_flavor_index, + "memory-mb": flavor_dict["virtualMemory"], + "name": f"{vdu_id_ref}-{count_index}-flv", + "storage-gb": flavor_dict["sizeOfStorage"], + "vcpu-count": flavor_dict["numVirtualCpu"], + } + db_flavor.append(flavor_dict_update) + db_update = {} + db_update["flavor"] = db_flavor + ns_q_filter = { + "_id": nsr_id, + } + self.db.set_one( + "nsrs", + q_filter=ns_q_filter, + update_dict=db_update, + fail_on_empty=True, + ) + db_vnfr = self.db.get_one( + "vnfrs", {"_id": change_vnf_flavor_data["vnfInstanceId"]} + ) + for vdu_index, vdur in enumerate(db_vnfr.get("vdur", ())): + if ( + vdur.get("count-index") == count_index + and vdur.get("vdu-id-ref") == vdu_id_ref + ): + old_flavor_id = vdur.get("ns-flavor-id", 0) + old_vdu_index = vdu_index + filter_text = { + "_id": change_vnf_flavor_data["vnfInstanceId"], + "vdur.count-index": count_index, + "vdur.vdu-id-ref": vdu_id_ref, + } + q_filter.update(filter_text) + db_update = {} + db_update[ + "vdur.{}.ns-flavor-id".format(vdu_index) + ] = db_flavor_index + self.db.set_one( + "vnfrs", + q_filter=q_filter, + update_dict=db_update, + fail_on_empty=True, + ) target = {} target.update(operationParams) desc = await self.RO.vertical_scale(nsr_id, target) @@ -8564,10 +8942,15 @@ class NsLcm(LcmBase): action_id, nslcmop_id, start_deploy, - self.timeout_verticalscale, + self.timeout.verticalscale, operation="verticalscale", ) - except (ROclient.ROClientException, DbException, LcmException) as e: + except ( + NgRoException, + ROclient.ROClientException, + DbException, + LcmException, + ) as e: self.logger.error("Exit Exception {}".format(e)) exc = e except asyncio.CancelledError: @@ -8588,6 +8971,9 @@ class NsLcm(LcmBase): if exc: db_nslcmop_update["detailed-status"] = "FAILED {}: {}".format(step, exc) nslcmop_operation_state = "FAILED" + old_db_update[ + "vdur.{}.ns-flavor-id".format(old_vdu_index) + ] = old_flavor_id else: nslcmop_operation_state = "COMPLETED" db_nslcmop_update["detailed-status"] = "Done" @@ -8600,6 +8986,16 @@ class NsLcm(LcmBase): operation_state=nslcmop_operation_state, other_update=db_nslcmop_update, ) + if old_vdu_index and old_db_update != {}: + self.logger.critical( + "Reverting Old Flavor -- : {}".format(old_db_update) + ) + self.db.set_one( + "vnfrs", + q_filter=q_filter, + update_dict=old_db_update, + fail_on_empty=True, + ) if nslcmop_operation_state: try: msg = { @@ -8607,7 +9003,7 @@ class NsLcm(LcmBase): "nslcmop_id": nslcmop_id, "operationState": nslcmop_operation_state, } - await self.msg.aiowrite("ns", "verticalscaled", msg, loop=self.loop) + await self.msg.aiowrite("ns", "verticalscaled", msg) except Exception as e: self.logger.error( logging_text + "kafka_write notification Exception {}".format(e)