X-Git-Url: https://osm.etsi.org/gitweb/?a=blobdiff_plain;f=osm_lcm%2Fns.py;h=e00f0d274b53dbdc70ec2012805e667f394eab90;hb=3ff2325f97dda222d286d0d8ceb720de8b1a537c;hp=5d960c5da2adf90c5a61654437cfb39e1897d871;hpb=411af2e5804302a45c911556a60fe702227fa799;p=osm%2FLCM.git diff --git a/osm_lcm/ns.py b/osm_lcm/ns.py index 5d960c5..e00f0d2 100644 --- a/osm_lcm/ns.py +++ b/osm_lcm/ns.py @@ -61,6 +61,7 @@ from osm_lcm.lcm_utils import ( check_juju_bundle_existence, get_charm_artifact_path, get_ee_id_parts, + vld_to_ro_ip_profile, ) from osm_lcm.data_utils.nsd import ( get_ns_configuration_relation_list, @@ -97,7 +98,6 @@ from osm_lcm.data_utils.vnfr import ( from osm_lcm.data_utils.dict_utils import parse_yaml_strings from osm_lcm.data_utils.database.vim_account import VimAccountDB from n2vc.definitions import RelationEndpoint -from n2vc.k8s_helm_conn import K8sHelmConnector from n2vc.k8s_helm3_conn import K8sHelm3Connector from n2vc.k8s_juju_conn import K8sJujuConnector @@ -123,7 +123,7 @@ from copy import copy, deepcopy from time import time from uuid import uuid4 -from random import randint +from random import SystemRandom __author__ = "Alfonso Tierno " @@ -132,9 +132,18 @@ class NsLcm(LcmBase): SUBOPERATION_STATUS_NOT_FOUND = -1 SUBOPERATION_STATUS_NEW = -2 SUBOPERATION_STATUS_SKIP = -3 + EE_TLS_NAME = "ee-tls" task_name_deploy_vca = "Deploying VCA" - - def __init__(self, msg, lcm_tasks, config: LcmCfg, loop): + rel_operation_types = { + "GE": ">=", + "LE": "<=", + "GT": ">", + "LT": "<", + "EQ": "==", + "NE": "!=", + } + + def __init__(self, msg, lcm_tasks, config: LcmCfg): """ Init, Connect to database, filesystem storage, and messaging :param config: two level dictionary with configuration. Top level should contain 'database', 'storage', @@ -144,7 +153,6 @@ class NsLcm(LcmBase): self.db = Database().instance.db self.fs = Filesystem().instance.fs - self.loop = loop self.lcm_tasks = lcm_tasks self.timeout = config.timeout self.ro_config = config.RO @@ -153,7 +161,6 @@ class NsLcm(LcmBase): # create N2VC connector self.n2vc = N2VCJujuConnector( log=self.logger, - loop=self.loop, on_update_db=self._on_update_n2vc_db, fs=self.fs, db=self.db, @@ -161,20 +168,10 @@ class NsLcm(LcmBase): self.conn_helm_ee = LCMHelmConn( log=self.logger, - loop=self.loop, vca_config=self.vca_config, on_update_db=self._on_update_n2vc_db, ) - self.k8sclusterhelm2 = K8sHelmConnector( - kubectl_command=self.vca_config.kubectlpath, - helm_command=self.vca_config.helmpath, - log=self.logger, - on_update_db=None, - fs=self.fs, - db=self.db, - ) - self.k8sclusterhelm3 = K8sHelm3Connector( kubectl_command=self.vca_config.kubectlpath, helm_command=self.vca_config.helm3path, @@ -188,14 +185,12 @@ class NsLcm(LcmBase): kubectl_command=self.vca_config.kubectlpath, juju_command=self.vca_config.jujupath, log=self.logger, - loop=self.loop, on_update_db=self._on_update_k8s_db, fs=self.fs, db=self.db, ) self.k8scluster_map = { - "helm-chart": self.k8sclusterhelm2, "helm-chart-v3": self.k8sclusterhelm3, "chart": self.k8sclusterhelm3, "juju-bundle": self.k8sclusterjuju, @@ -211,7 +206,7 @@ class NsLcm(LcmBase): } # create RO client - self.RO = NgRoClient(self.loop, **self.ro_config.to_dict()) + self.RO = NgRoClient(**self.ro_config.to_dict()) self.op_status_map = { "instantiation": self.RO.status, @@ -245,7 +240,6 @@ class NsLcm(LcmBase): return None def _on_update_ro_db(self, nsrs_id, ro_descriptor): - # self.logger.debug('_on_update_ro_db(nsrs_id={}'.format(nsrs_id)) try: @@ -263,7 +257,6 @@ class NsLcm(LcmBase): ) async def _on_update_n2vc_db(self, table, filter, path, updated_data, vca_id=None): - # remove last dot from path (if exists) if path.endswith("."): path = path[:-1] @@ -271,7 +264,6 @@ class NsLcm(LcmBase): # self.logger.debug('_on_update_n2vc_db(table={}, filter={}, path={}, updated_data={}' # .format(table, filter, path, updated_data)) try: - nsr_id = filter.get("_id") # read ns record from database @@ -532,7 +524,6 @@ class NsLcm(LcmBase): return wim_account def scale_vnfr(self, db_vnfr, vdu_create=None, vdu_delete=None, mark_delete=False): - db_vdu_push_list = [] template_vdur = [] db_update = {"_admin.modified": time()} @@ -820,7 +811,6 @@ class NsLcm(LcmBase): start_deploy, timeout_ns_deploy, ): - db_vims = {} def get_vim_account(vim_account_id): @@ -836,9 +826,9 @@ class NsLcm(LcmBase): target_vim, target_vld, vld_params, target_sdn ): if vld_params.get("ip-profile"): - target_vld["vim_info"][target_vim]["ip_profile"] = vld_params[ - "ip-profile" - ] + target_vld["vim_info"][target_vim]["ip_profile"] = vld_to_ro_ip_profile( + vld_params["ip-profile"] + ) if vld_params.get("provider-network"): target_vld["vim_info"][target_vim]["provider_network"] = vld_params[ "provider-network" @@ -903,7 +893,7 @@ class NsLcm(LcmBase): ) vdur = next((vdur for vdur in target_vnf.get("vdur", ())), None) if not vdur: - return + continue for a_index, a_vld in enumerate(target["ns"]["vld"]): target_vld = find_in_list( get_iterable(vdur, "interfaces"), @@ -915,7 +905,6 @@ class NsLcm(LcmBase): lambda v_vld: v_vld["name"] in (a_vld["name"], a_vld["id"]), ) if target_vld: - if vnf_params.get("vimAccountId") not in a_vld.get( "vim_info", {} ): @@ -972,6 +961,10 @@ class NsLcm(LcmBase): image["vim_info"] = {} for flavor in target["flavor"]: flavor["vim_info"] = {} + if db_nsr.get("shared-volumes"): + target["shared-volumes"] = deepcopy(db_nsr["shared-volumes"]) + for shared_volumes in target["shared-volumes"]: + shared_volumes["vim_info"] = {} if db_nsr.get("affinity-or-anti-affinity-group"): target["affinity-or-anti-affinity-group"] = deepcopy( db_nsr["affinity-or-anti-affinity-group"] @@ -1014,16 +1007,16 @@ class NsLcm(LcmBase): # check if this network needs SDN assist if vld.get("pci-interfaces"): db_vim = get_vim_account(ns_params["vimAccountId"]) - sdnc_id = db_vim["config"].get("sdn-controller") - if sdnc_id: - sdn_vld = "nsrs:{}:vld.{}".format(nsr_id, vld["id"]) - target_sdn = "sdn:{}".format(sdnc_id) - target_vld["vim_info"][target_sdn] = { - "sdn": True, - "target_vim": target_vim, - "vlds": [sdn_vld], - "type": vld.get("type"), - } + if vim_config := db_vim.get("config"): + if sdnc_id := vim_config.get("sdn-controller"): + sdn_vld = "nsrs:{}:vld.{}".format(nsr_id, vld["id"]) + target_sdn = "sdn:{}".format(sdnc_id) + target_vld["vim_info"][target_sdn] = { + "sdn": True, + "target_vim": target_vim, + "vlds": [sdn_vld], + "type": vld.get("type"), + } nsd_vnf_profiles = get_vnf_profiles(nsd) for nsd_vnf_profile in nsd_vnf_profiles: @@ -1050,27 +1043,9 @@ class NsLcm(LcmBase): and nsd_vlp.get("virtual-link-protocol-data") and nsd_vlp["virtual-link-protocol-data"].get("l3-protocol-data") ): - ip_profile_source_data = nsd_vlp["virtual-link-protocol-data"][ + vld_params["ip-profile"] = nsd_vlp["virtual-link-protocol-data"][ "l3-protocol-data" ] - ip_profile_dest_data = {} - if "ip-version" in ip_profile_source_data: - ip_profile_dest_data["ip-version"] = ip_profile_source_data[ - "ip-version" - ] - if "cidr" in ip_profile_source_data: - ip_profile_dest_data["subnet-address"] = ip_profile_source_data[ - "cidr" - ] - if "gateway-ip" in ip_profile_source_data: - ip_profile_dest_data["gateway-address"] = ip_profile_source_data[ - "gateway-ip" - ] - if "dhcp-enabled" in ip_profile_source_data: - ip_profile_dest_data["dhcp-params"] = { - "enabled": ip_profile_source_data["dhcp-enabled"] - } - vld_params["ip-profile"] = ip_profile_dest_data # update vld_params with instantiation params vld_instantiation_params = find_in_list( @@ -1136,28 +1111,9 @@ class NsLcm(LcmBase): and vnfd_vlp.get("virtual-link-protocol-data") and vnfd_vlp["virtual-link-protocol-data"].get("l3-protocol-data") ): - ip_profile_source_data = vnfd_vlp["virtual-link-protocol-data"][ + vld_params["ip-profile"] = vnfd_vlp["virtual-link-protocol-data"][ "l3-protocol-data" ] - ip_profile_dest_data = {} - if "ip-version" in ip_profile_source_data: - ip_profile_dest_data["ip-version"] = ip_profile_source_data[ - "ip-version" - ] - if "cidr" in ip_profile_source_data: - ip_profile_dest_data["subnet-address"] = ip_profile_source_data[ - "cidr" - ] - if "gateway-ip" in ip_profile_source_data: - ip_profile_dest_data[ - "gateway-address" - ] = ip_profile_source_data["gateway-ip"] - if "dhcp-enabled" in ip_profile_source_data: - ip_profile_dest_data["dhcp-params"] = { - "enabled": ip_profile_source_data["dhcp-enabled"] - } - - vld_params["ip-profile"] = ip_profile_dest_data # update vld_params with instantiation params if vnf_params: vld_instantiation_params = find_in_list( @@ -1279,6 +1235,15 @@ class NsLcm(LcmBase): if target_vim not in ns_ags["vim_info"]: ns_ags["vim_info"][target_vim] = {} + # shared-volumes + if vdur.get("shared-volumes-id"): + for sv_id in vdur["shared-volumes-id"]: + ns_sv = find_in_list( + target["shared-volumes"], lambda sv: sv_id in sv["id"] + ) + if ns_sv: + ns_sv["vim_info"][target_vim] = {} + vdur["vim_info"] = {target_vim: {}} # instantiation parameters if vnf_params: @@ -1292,6 +1257,9 @@ class NsLcm(LcmBase): vdu_instantiation_params, vdud ) vdur["additionalParams"]["OSM"]["vdu_volumes"] = vdu_volumes + vdur["additionalParams"]["OSM"][ + "vim_flavor_id" + ] = vdu_instantiation_params.get("vim-flavor-id") vdur_list.append(vdur) target_vnf["vdur"] = vdur_list target["vnf"].append(target_vnf) @@ -1357,7 +1325,7 @@ class NsLcm(LcmBase): db_nsr_update["detailed-status"] = " ".join(stage) self.update_db_2("nsrs", nsr_id, db_nsr_update) self._write_op_status(nslcmop_id, stage) - await asyncio.sleep(15, loop=self.loop) + await asyncio.sleep(15) else: # timeout_ns_deploy raise NgRoException("Timeout waiting ns to deploy") @@ -1543,7 +1511,7 @@ class NsLcm(LcmBase): "target KDU={} is in error state".format(kdu_name) ) - await asyncio.sleep(10, loop=self.loop) + await asyncio.sleep(10) nb_tries += 1 raise LcmException("Timeout waiting KDU={} instantiated".format(kdu_name)) @@ -1568,14 +1536,13 @@ class NsLcm(LcmBase): ro_retries = 0 while True: - ro_retries += 1 if ro_retries >= 360: # 1 hour raise LcmException( "Not found _admin.deployed.RO.nsr_id for nsr_id: {}".format(nsr_id) ) - await asyncio.sleep(10, loop=self.loop) + await asyncio.sleep(10) # get ip address if not target_vdu_id: @@ -1753,7 +1720,6 @@ class NsLcm(LcmBase): } step = "" try: - element_type = "NS" element_under_configuration = nsr_id @@ -1836,8 +1802,7 @@ class NsLcm(LcmBase): vca_id = self.get_vca_id(db_vnfr, db_nsr) # create or register execution environment in VCA - if vca_type in ("lxc_proxy_charm", "k8s_proxy_charm", "helm", "helm-v3"): - + if vca_type in ("lxc_proxy_charm", "k8s_proxy_charm", "helm-v3"): self._write_configuration_status( nsr_id=nsr_id, vca_index=vca_index, @@ -1859,11 +1824,11 @@ class NsLcm(LcmBase): db_dict=db_dict, vca_id=vca_id, ) - elif vca_type == "helm" or vca_type == "helm-v3": + elif vca_type == "helm-v3": ee_id, credentials = await self.vca_map[ vca_type ].create_execution_environment( - namespace=namespace, + namespace=nsr_id, reuse_ee_id=ee_id, db_dict=db_dict, config=osm_config, @@ -2005,7 +1970,7 @@ class NsLcm(LcmBase): # if SSH access is required, then get execution environment SSH public # if native charm we have waited already to VM be UP - if vca_type in ("k8s_proxy_charm", "lxc_proxy_charm", "helm", "helm-v3"): + if vca_type in ("k8s_proxy_charm", "lxc_proxy_charm", "helm-v3"): pub_key = None user = None # self.logger.debug("get ssh key block") @@ -2147,7 +2112,7 @@ class NsLcm(LcmBase): # TODO register in database that primitive is done # STEP 7 Configure metrics - if vca_type == "helm" or vca_type == "helm-v3": + if vca_type == "helm-v3": # TODO: review for those cases where the helm chart is a reference and # is not part of the NF package prometheus_jobs = await self.extract_prometheus_scrape_jobs( @@ -2157,6 +2122,7 @@ class NsLcm(LcmBase): vnfr_id=vnfr_id, nsr_id=nsr_id, target_ip=rw_mgmt_ip, + element_type=element_type, vnf_member_index=db_vnfr.get("member-vnf-index-ref", ""), vdu_id=vdu_id, vdu_index=vdu_index, @@ -2297,7 +2263,6 @@ class NsLcm(LcmBase): element_type: str = None, other_update: dict = None, ): - # self.logger.debug('_write_configuration_status(): vca_index={}, status={}' # .format(vca_index, status)) @@ -2338,9 +2303,7 @@ class NsLcm(LcmBase): self.logger.debug( logging_text + "Invoke and wait for placement optimization" ) - await self.msg.aiowrite( - "pla", "get_placement", {"nslcmopId": nslcmop_id}, loop=self.loop - ) + await self.msg.aiowrite("pla", "get_placement", {"nslcmopId": nslcmop_id}) db_poll_interval = 5 wait = db_poll_interval * 10 pla_result = None @@ -2369,6 +2332,299 @@ class NsLcm(LcmBase): vnfr["vim-account-id"] = pla_vnf["vimAccountId"] return modified + def _gather_vnfr_healing_alerts(self, vnfr, vnfd): + alerts = [] + nsr_id = vnfr["nsr-id-ref"] + df = vnfd.get("df", [{}])[0] + # Checking for auto-healing configuration + if "healing-aspect" in df: + healing_aspects = df["healing-aspect"] + for healing in healing_aspects: + for healing_policy in healing.get("healing-policy", ()): + vdu_id = healing_policy["vdu-id"] + vdur = next( + (vdur for vdur in vnfr["vdur"] if vdu_id == vdur["vdu-id-ref"]), + {}, + ) + if not vdur: + continue + metric_name = "vm_status" + vdu_name = vdur.get("name") + vnf_member_index = vnfr["member-vnf-index-ref"] + uuid = str(uuid4()) + name = f"healing_{uuid}" + action = healing_policy + # action_on_recovery = healing.get("action-on-recovery") + # cooldown_time = healing.get("cooldown-time") + # day1 = healing.get("day1") + alert = { + "uuid": uuid, + "name": name, + "metric": metric_name, + "tags": { + "ns_id": nsr_id, + "vnf_member_index": vnf_member_index, + "vdu_name": vdu_name, + }, + "alarm_status": "ok", + "action_type": "healing", + "action": action, + } + alerts.append(alert) + return alerts + + def _gather_vnfr_scaling_alerts(self, vnfr, vnfd): + alerts = [] + nsr_id = vnfr["nsr-id-ref"] + df = vnfd.get("df", [{}])[0] + # Checking for auto-scaling configuration + if "scaling-aspect" in df: + scaling_aspects = df["scaling-aspect"] + all_vnfd_monitoring_params = {} + for ivld in vnfd.get("int-virtual-link-desc", ()): + for mp in ivld.get("monitoring-parameters", ()): + all_vnfd_monitoring_params[mp.get("id")] = mp + for vdu in vnfd.get("vdu", ()): + for mp in vdu.get("monitoring-parameter", ()): + all_vnfd_monitoring_params[mp.get("id")] = mp + for df in vnfd.get("df", ()): + for mp in df.get("monitoring-parameter", ()): + all_vnfd_monitoring_params[mp.get("id")] = mp + for scaling_aspect in scaling_aspects: + scaling_group_name = scaling_aspect.get("name", "") + # Get monitored VDUs + all_monitored_vdus = set() + for delta in scaling_aspect.get("aspect-delta-details", {}).get( + "deltas", () + ): + for vdu_delta in delta.get("vdu-delta", ()): + all_monitored_vdus.add(vdu_delta.get("id")) + monitored_vdurs = list( + filter( + lambda vdur: vdur["vdu-id-ref"] in all_monitored_vdus, + vnfr["vdur"], + ) + ) + if not monitored_vdurs: + self.logger.error( + "Scaling criteria is referring to a vnf-monitoring-param that does not contain a reference to a vdu or vnf metric" + ) + continue + for scaling_policy in scaling_aspect.get("scaling-policy", ()): + if scaling_policy["scaling-type"] != "automatic": + continue + threshold_time = scaling_policy.get("threshold-time", "1") + cooldown_time = scaling_policy.get("cooldown-time", "0") + for scaling_criteria in scaling_policy["scaling-criteria"]: + monitoring_param_ref = scaling_criteria.get( + "vnf-monitoring-param-ref" + ) + vnf_monitoring_param = all_vnfd_monitoring_params[ + monitoring_param_ref + ] + for vdur in monitored_vdurs: + vdu_id = vdur["vdu-id-ref"] + metric_name = vnf_monitoring_param.get("performance-metric") + metric_name = f"osm_{metric_name}" + vnf_member_index = vnfr["member-vnf-index-ref"] + scalein_threshold = scaling_criteria.get( + "scale-in-threshold" + ) + scaleout_threshold = scaling_criteria.get( + "scale-out-threshold" + ) + # Looking for min/max-number-of-instances + instances_min_number = 1 + instances_max_number = 1 + vdu_profile = df["vdu-profile"] + if vdu_profile: + profile = next( + item for item in vdu_profile if item["id"] == vdu_id + ) + instances_min_number = profile.get( + "min-number-of-instances", 1 + ) + instances_max_number = profile.get( + "max-number-of-instances", 1 + ) + + if scalein_threshold: + uuid = str(uuid4()) + name = f"scalein_{uuid}" + operation = scaling_criteria[ + "scale-in-relational-operation" + ] + rel_operator = self.rel_operation_types.get( + operation, "<=" + ) + metric_selector = f'{metric_name}{{ns_id="{nsr_id}", vnf_member_index="{vnf_member_index}", vdu_id="{vdu_id}"}}' + expression = f"(count ({metric_selector}) > {instances_min_number}) and (avg({metric_selector}) {rel_operator} {scalein_threshold})" + labels = { + "ns_id": nsr_id, + "vnf_member_index": vnf_member_index, + "vdu_id": vdu_id, + } + prom_cfg = { + "alert": name, + "expr": expression, + "for": str(threshold_time) + "m", + "labels": labels, + } + action = scaling_policy + action = { + "scaling-group": scaling_group_name, + "cooldown-time": cooldown_time, + } + alert = { + "uuid": uuid, + "name": name, + "metric": metric_name, + "tags": { + "ns_id": nsr_id, + "vnf_member_index": vnf_member_index, + "vdu_id": vdu_id, + }, + "alarm_status": "ok", + "action_type": "scale_in", + "action": action, + "prometheus_config": prom_cfg, + } + alerts.append(alert) + + if scaleout_threshold: + uuid = str(uuid4()) + name = f"scaleout_{uuid}" + operation = scaling_criteria[ + "scale-out-relational-operation" + ] + rel_operator = self.rel_operation_types.get( + operation, "<=" + ) + metric_selector = f'{metric_name}{{ns_id="{nsr_id}", vnf_member_index="{vnf_member_index}", vdu_id="{vdu_id}"}}' + expression = f"(count ({metric_selector}) < {instances_max_number}) and (avg({metric_selector}) {rel_operator} {scaleout_threshold})" + labels = { + "ns_id": nsr_id, + "vnf_member_index": vnf_member_index, + "vdu_id": vdu_id, + } + prom_cfg = { + "alert": name, + "expr": expression, + "for": str(threshold_time) + "m", + "labels": labels, + } + action = scaling_policy + action = { + "scaling-group": scaling_group_name, + "cooldown-time": cooldown_time, + } + alert = { + "uuid": uuid, + "name": name, + "metric": metric_name, + "tags": { + "ns_id": nsr_id, + "vnf_member_index": vnf_member_index, + "vdu_id": vdu_id, + }, + "alarm_status": "ok", + "action_type": "scale_out", + "action": action, + "prometheus_config": prom_cfg, + } + alerts.append(alert) + return alerts + + def _gather_vnfr_alarm_alerts(self, vnfr, vnfd): + alerts = [] + nsr_id = vnfr["nsr-id-ref"] + vnf_member_index = vnfr["member-vnf-index-ref"] + + # Checking for VNF alarm configuration + for vdur in vnfr["vdur"]: + vdu_id = vdur["vdu-id-ref"] + vdu = next(filter(lambda vdu: vdu["id"] == vdu_id, vnfd["vdu"])) + if "alarm" in vdu: + # Get VDU monitoring params, since alerts are based on them + vdu_monitoring_params = {} + for mp in vdu.get("monitoring-parameter", []): + vdu_monitoring_params[mp.get("id")] = mp + if not vdu_monitoring_params: + self.logger.error( + "VDU alarm refers to a VDU monitoring param, but there are no VDU monitoring params in the VDU" + ) + continue + # Get alarms in the VDU + alarm_descriptors = vdu["alarm"] + # Create VDU alarms for each alarm in the VDU + for alarm_descriptor in alarm_descriptors: + # Check that the VDU alarm refers to a proper monitoring param + alarm_monitoring_param = alarm_descriptor.get( + "vnf-monitoring-param-ref", "" + ) + vdu_specific_monitoring_param = vdu_monitoring_params.get( + alarm_monitoring_param, {} + ) + if not vdu_specific_monitoring_param: + self.logger.error( + "VDU alarm refers to a VDU monitoring param not present in the VDU" + ) + continue + metric_name = vdu_specific_monitoring_param.get( + "performance-metric" + ) + if not metric_name: + self.logger.error( + "VDU alarm refers to a VDU monitoring param that has no associated performance-metric" + ) + continue + # Set params of the alarm to be created in Prometheus + metric_name = f"osm_{metric_name}" + metric_threshold = alarm_descriptor.get("value") + uuid = str(uuid4()) + alert_name = f"vdu_alarm_{uuid}" + operation = alarm_descriptor["operation"] + rel_operator = self.rel_operation_types.get(operation, "<=") + metric_selector = f'{metric_name}{{ns_id="{nsr_id}", vnf_member_index="{vnf_member_index}", vdu_id="{vdu_id}"}}' + expression = f"{metric_selector} {rel_operator} {metric_threshold}" + labels = { + "ns_id": nsr_id, + "vnf_member_index": vnf_member_index, + "vdu_id": vdu_id, + "vdu_name": "{{ $labels.vdu_name }}", + } + prom_cfg = { + "alert": alert_name, + "expr": expression, + "for": "1m", # default value. Ideally, this should be related to an IM param, but there is not such param + "labels": labels, + } + alarm_action = dict() + for action_type in ["ok", "insufficient-data", "alarm"]: + if ( + "actions" in alarm_descriptor + and action_type in alarm_descriptor["actions"] + ): + alarm_action[action_type] = alarm_descriptor["actions"][ + action_type + ] + alert = { + "uuid": uuid, + "name": alert_name, + "metric": metric_name, + "tags": { + "ns_id": nsr_id, + "vnf_member_index": vnf_member_index, + "vdu_id": vdu_id, + }, + "alarm_status": "ok", + "action_type": "vdu_alarm", + "action": alarm_action, + "prometheus_config": prom_cfg, + } + alerts.append(alert) + return alerts + def update_nsrs_with_pla_result(self, params): try: nslcmop_id = deep_get(params, ("placement", "nslcmopId")) @@ -2578,13 +2834,16 @@ class NsLcm(LcmBase): # create namespace and certificate if any helm based EE is present in the NS if check_helm_ee_in_ns(db_vnfds): - # TODO: create EE namespace + await self.vca_map["helm-v3"].setup_ns_namespace( + name=nsr_id, + ) # create TLS certificates await self.vca_map["helm-v3"].create_tls_certificate( - secret_name="ee-tls-{}".format(nsr_id), + secret_name=self.EE_TLS_NAME, dns_prefix="*", nsr_id=nsr_id, usage="server auth", + namespace=nsr_id, ) nsi_id = None # TODO put nsi_id when this nsr belongs to a NSI @@ -2720,6 +2979,65 @@ class NsLcm(LcmBase): stage=stage, ) + # Check if each vnf has exporter for metric collection if so update prometheus job records + if "exporters-endpoints" in vnfd.get("df")[0]: + exporter_config = vnfd.get("df")[0].get("exporters-endpoints") + self.logger.debug("exporter config :{}".format(exporter_config)) + artifact_path = "{}/{}/{}".format( + base_folder["folder"], + base_folder["pkg-dir"], + "exporter-endpoint", + ) + ee_id = None + ee_config_descriptor = exporter_config + vnfr_id = db_vnfr["id"] + rw_mgmt_ip = await self.wait_vm_up_insert_key_ro( + logging_text, + nsr_id, + vnfr_id, + vdu_id=None, + vdu_index=None, + user=None, + pub_key=None, + ) + self.logger.debug("rw_mgmt_ip:{}".format(rw_mgmt_ip)) + self.logger.debug("Artifact_path:{}".format(artifact_path)) + db_vnfr = self.db.get_one("vnfrs", {"_id": vnfr_id}) + vdu_id_for_prom = None + vdu_index_for_prom = None + for x in get_iterable(db_vnfr, "vdur"): + vdu_id_for_prom = x.get("vdu-id-ref") + vdu_index_for_prom = x.get("count-index") + prometheus_jobs = await self.extract_prometheus_scrape_jobs( + ee_id=ee_id, + artifact_path=artifact_path, + ee_config_descriptor=ee_config_descriptor, + vnfr_id=vnfr_id, + nsr_id=nsr_id, + target_ip=rw_mgmt_ip, + element_type="VDU", + vdu_id=vdu_id_for_prom, + vdu_index=vdu_index_for_prom, + ) + + self.logger.debug("Prometheus job:{}".format(prometheus_jobs)) + if prometheus_jobs: + db_nsr_update["_admin.deployed.prometheus_jobs"] = prometheus_jobs + self.update_db_2( + "nsrs", + nsr_id, + db_nsr_update, + ) + + for job in prometheus_jobs: + self.db.set_one( + "prometheus_jobs", + {"job_name": job["job_name"]}, + job, + upsert=True, + fail_on_empty=False, + ) + # Check if this NS has a charm configuration descriptor_config = nsd.get("ns-configuration") if descriptor_config and descriptor_config.get("juju"): @@ -2802,7 +3120,15 @@ class NsLcm(LcmBase): stage[1] = stage[2] = "" except asyncio.CancelledError: error_list.append("Cancelled") - # TODO cancel all tasks + await self._cancel_pending_tasks(logging_text, tasks_dict_info) + await self._wait_for_tasks( + logging_text, + tasks_dict_info, + timeout_ns_deploy, + stage, + nslcmop_id, + nsr_id=nsr_id, + ) except Exception as exc: error_list.append(str(exc)) @@ -2843,7 +3169,27 @@ class NsLcm(LcmBase): db_nsr_update["detailed-status"] = "Done" db_nslcmop_update["detailed-status"] = "Done" nslcmop_operation_state = "COMPLETED" - + # Gather auto-healing and auto-scaling alerts for each vnfr + healing_alerts = [] + scaling_alerts = [] + for vnfr in self.db.get_list("vnfrs", {"nsr-id-ref": nsr_id}): + vnfd = next( + (sub for sub in db_vnfds if sub["_id"] == vnfr["vnfd-id"]), None + ) + healing_alerts = self._gather_vnfr_healing_alerts(vnfr, vnfd) + for alert in healing_alerts: + self.logger.info(f"Storing healing alert in MongoDB: {alert}") + self.db.create("alerts", alert) + + scaling_alerts = self._gather_vnfr_scaling_alerts(vnfr, vnfd) + for alert in scaling_alerts: + self.logger.info(f"Storing scaling alert in MongoDB: {alert}") + self.db.create("alerts", alert) + + alarm_alerts = self._gather_vnfr_alarm_alerts(vnfr, vnfd) + for alert in alarm_alerts: + self.logger.info(f"Storing VNF alarm alert in MongoDB: {alert}") + self.db.create("alerts", alert) if db_nsr: self._write_ns_status( nsr_id=nsr_id, @@ -2871,8 +3217,13 @@ class NsLcm(LcmBase): "nsr_id": nsr_id, "nslcmop_id": nslcmop_id, "operationState": nslcmop_operation_state, + "startTime": db_nslcmop["startTime"], + "links": db_nslcmop["links"], + "operationParams": { + "nsInstanceId": nsr_id, + "nsdId": db_nsr["nsd-id"], + }, }, - loop=self.loop, ) except Exception as e: self.logger.error( @@ -2999,6 +3350,9 @@ class NsLcm(LcmBase): cached_vnfds: Dict[str, Any], ) -> List[Relation]: relations = [] + if vca.target_element == "ns": + self.logger.debug("VCA is a NS charm, not a VNF.") + return relations vnf_profile = get_vnf_profile(nsd, vca.vnf_profile_id) vnf_profile_id = vnf_profile["id"] vnfd_id = vnf_profile["vnfd-id"] @@ -3184,7 +3538,6 @@ class NsLcm(LcmBase): vca_index: int, timeout: int = 3600, ) -> bool: - # steps: # 1. find all relations for this VCA # 2. wait for other peers related @@ -3262,7 +3615,6 @@ class NsLcm(LcmBase): timeout: int = 600, vca_id: str = None, ): - try: k8sclustertype = k8s_instance_info["k8scluster-type"] # Instantiate kdu @@ -3439,9 +3791,11 @@ class NsLcm(LcmBase): vnfr_data.get("_id"), {"kdur.{}.status".format(kdu_index): "ERROR"}, ) - except Exception: + except Exception as error: # ignore to keep original exception - pass + self.logger.warning( + f"An exception occurred while updating DB: {str(error)}" + ) # reraise original error raise @@ -3460,7 +3814,6 @@ class NsLcm(LcmBase): k8scluster_id_2_uuic = { "helm-chart-v3": {}, - "helm-chart": {}, "juju-bundle": {}, } @@ -3559,11 +3912,6 @@ class NsLcm(LcmBase): # Default version: helm3, if helm-version is v2 assign v2 k8sclustertype = "helm-chart-v3" self.logger.debug("kdur: {}".format(kdur)) - if ( - kdur.get("helm-version") - and kdur.get("helm-version") == "v2" - ): - k8sclustertype = "helm-chart" elif kdur.get("juju-bundle"): kdumodel = kdur["juju-bundle"] k8sclustertype = "juju-bundle" @@ -3601,8 +3949,8 @@ class NsLcm(LcmBase): kdumodel = self.fs.path + filename except (asyncio.TimeoutError, asyncio.CancelledError): raise - except Exception: # it is not a file - pass + except Exception as e: # it is not a file + self.logger.warning(f"An exception occurred: {str(e)}") k8s_cluster_id = kdur["k8s-cluster"]["id"] step = "Synchronize repos for k8s cluster '{}'".format( @@ -3784,10 +4132,7 @@ class NsLcm(LcmBase): vca_type = "native_charm" elif ee_item.get("helm-chart"): vca_name = ee_item["helm-chart"] - if ee_item.get("helm-version") and ee_item.get("helm-version") == "v2": - vca_type = "helm" - else: - vca_type = "helm-v3" + vca_type = "helm-v3" else: self.logger.debug( logging_text + "skipping non juju neither charm configuration" @@ -4117,7 +4462,7 @@ class NsLcm(LcmBase): # TODO vdu_index_count for vca in vca_deployed_list: if vca["member-vnf-index"] == vnf_index and vca["vdu_id"] == vdu_id: - return vca["ee_id"] + return vca.get("ee_id") async def destroy_N2VC( self, @@ -4344,9 +4689,7 @@ class NsLcm(LcmBase): ) and vca.get("needed_terminate") # For helm we must destroy_ee. Also for native_charm, as juju_model cannot be deleted if there are # pending native charms - destroy_ee = ( - True if vca_type in ("helm", "helm-v3", "native_charm") else False - ) + destroy_ee = True if vca_type in ("helm-v3", "native_charm") else False # self.logger.debug(logging_text + "vca_index: {}, ee_id: {}, vca_type: {} destroy_ee: {}".format( # vca_index, vca.get("ee_id"), vca_type, destroy_ee)) task = asyncio.ensure_future( @@ -4399,9 +4742,12 @@ class NsLcm(LcmBase): # Delete Namespace and Certificates if necessary if check_helm_ee_in_ns(list(db_vnfds_from_member_index.values())): await self.vca_map["helm-v3"].delete_tls_certificate( - certificate_name=db_nslcmop["nsInstanceId"], + namespace=db_nslcmop["nsInstanceId"], + certificate_name=self.EE_TLS_NAME, + ) + await self.vca_map["helm-v3"].delete_namespace( + namespace=db_nslcmop["nsInstanceId"], ) - # TODO: Delete namespace # Delete from k8scluster stage[1] = "Deleting KDUs." @@ -4483,7 +4829,14 @@ class NsLcm(LcmBase): stage[1] = stage[2] = "" except asyncio.CancelledError: error_list.append("Cancelled") - # TODO cancell all tasks + await self._cancel_pending_tasks(logging_text, tasks_dict_info) + await self._wait_for_tasks( + logging_text, + tasks_dict_info, + timeout_ns_terminate, + stage, + nslcmop_id, + ) except Exception as exc: error_list.append(str(exc)) # update status at database @@ -4558,12 +4911,13 @@ class NsLcm(LcmBase): "operationState": nslcmop_operation_state, "autoremove": autoremove, }, - loop=self.loop, ) except Exception as e: self.logger.error( logging_text + "kafka_write notification Exception {}".format(e) ) + self.logger.debug(f"Deleting alerts: ns_id={nsr_id}") + self.db.del_list("alerts", {"tags.ns_id": nsr_id}) self.logger.debug(logging_text + "Exit") self.lcm_tasks.remove("ns", nsr_id, nslcmop_id, "ns_terminate") @@ -4645,6 +4999,11 @@ class NsLcm(LcmBase): self._write_op_status(nslcmop_id, stage) return error_detail_list + async def _cancel_pending_tasks(self, logging_text, created_tasks_info): + for task, name in created_tasks_info.items(): + self.logger.debug(logging_text + "Cancelling task: " + name) + task.cancel() + @staticmethod def _map_primitive_params(primitive_desc, params, instantiation_params): """ @@ -4814,7 +5173,7 @@ class NsLcm(LcmBase): ) ) # wait and retry - await asyncio.sleep(retries_interval, loop=self.loop) + await asyncio.sleep(retries_interval) else: if isinstance(e, asyncio.TimeoutError): e = N2VCException( @@ -5009,7 +5368,7 @@ class NsLcm(LcmBase): kdu_action = ( True if primitive_name in actions - and kdu["k8scluster-type"] not in ("helm-chart", "helm-chart-v3") + and kdu["k8scluster-type"] != "helm-chart-v3" else False ) @@ -5054,9 +5413,10 @@ class NsLcm(LcmBase): del desc_params["kdu_model"] else: kdu_model = kdu.get("kdu-model") - parts = kdu_model.split(sep=":") - if len(parts) == 2: - kdu_model = parts[0] + if kdu_model.count("/") < 2: # helm chart is not embedded + parts = kdu_model.split(sep=":") + if len(parts) == 2: + kdu_model = parts[0] if desc_params.get("kdu_atomic_upgrade"): atomic_upgrade = desc_params.get( "kdu_atomic_upgrade" @@ -5225,7 +5585,6 @@ class NsLcm(LcmBase): "nslcmop_id": nslcmop_id, "operationState": nslcmop_operation_state, }, - loop=self.loop, ) except Exception as e: self.logger.error( @@ -5517,7 +5876,6 @@ class NsLcm(LcmBase): raise except Exception as e: - self.logger.debug("Error upgrading charm {}".format(path)) return "FAILED", "Error upgrading charm {}: {}".format(path, e) @@ -5581,7 +5939,6 @@ class NsLcm(LcmBase): nsr_deployed = db_nsr["_admin"].get("deployed") if update_type == "CHANGE_VNFPKG": - # Get the input parameters given through update request vnf_instance_id = db_nslcmop["operationParams"][ "changeVnfPackageData" @@ -5621,7 +5978,6 @@ class NsLcm(LcmBase): step = "Checking if revision has changed in VNFD" if current_vnf_revision != latest_vnfd_revision: - change_type = "policy_updated" # There is new revision of VNFD, update operation is required @@ -5658,7 +6014,6 @@ class NsLcm(LcmBase): step = "Getting descriptor config" if current_vnfd.get("kdu"): - search_key = "kdu_name" else: search_key = "vnfd_id" @@ -5679,7 +6034,6 @@ class NsLcm(LcmBase): # There could be several charm used in the same VNF for ee_item in ee_list: if ee_item.get("juju"): - step = "Getting charm name" charm_name = ee_item["juju"].get("charm") @@ -5704,13 +6058,7 @@ class NsLcm(LcmBase): # add chart to list and all parameters step = "Getting helm chart name" chart_name = ee_item.get("helm-chart") - if ( - ee_item.get("helm-version") - and ee_item.get("helm-version") == "v2" - ): - vca_type = "helm" - else: - vca_type = "helm-v3" + vca_type = "helm-v3" step = "Setting Helm chart artifact paths" helm_artifacts.append( @@ -5741,7 +6089,6 @@ class NsLcm(LcmBase): if find_software_version(current_vnfd) != find_software_version( latest_vnfd ): - step = "Checking if existing VNF has charm" for current_charm_path, target_charm_path in list( charm_artifact_paths @@ -5783,10 +6130,8 @@ class NsLcm(LcmBase): current_charm_path, target_charm_path ) ): - step = "Checking whether VNF uses juju bundle" if check_juju_bundle_existence(current_vnfd): - raise LcmException( "Charm upgrade is not supported for the instance which" " uses juju-bundle: {}".format( @@ -6064,7 +6409,7 @@ class NsLcm(LcmBase): and member_vnf_index ): msg.update({"vnf_member_index": member_vnf_index}) - await self.msg.aiowrite("ns", change_type, msg, loop=self.loop) + await self.msg.aiowrite("ns", change_type, msg) except Exception as e: self.logger.error( logging_text + "kafka_write notification Exception {}".format(e) @@ -6280,11 +6625,6 @@ class NsLcm(LcmBase): if kdur.get("helm-chart"): k8s_cluster_type = "helm-chart-v3" self.logger.debug("kdur: {}".format(kdur)) - if ( - kdur.get("helm-version") - and kdur.get("helm-version") == "v2" - ): - k8s_cluster_type = "helm-chart" elif kdur.get("juju-bundle"): k8s_cluster_type = "juju-bundle" else: @@ -6413,11 +6753,6 @@ class NsLcm(LcmBase): if kdur.get("helm-chart"): k8s_cluster_type = "helm-chart-v3" self.logger.debug("kdur: {}".format(kdur)) - if ( - kdur.get("helm-version") - and kdur.get("helm-version") == "v2" - ): - k8s_cluster_type = "helm-chart" elif kdur.get("juju-bundle"): k8s_cluster_type = "juju-bundle" else: @@ -7033,16 +7368,31 @@ class NsLcm(LcmBase): exc_info=True, ) finally: + error_list = list() + if exc: + error_list.append(str(exc)) self._write_ns_status( nsr_id=nsr_id, ns_state=None, current_operation="IDLE", current_operation_id=None, ) - if tasks_dict_info: - stage[1] = "Waiting for instantiate pending tasks." - self.logger.debug(logging_text + stage[1]) - exc = await self._wait_for_tasks( + try: + if tasks_dict_info: + stage[1] = "Waiting for instantiate pending tasks." + self.logger.debug(logging_text + stage[1]) + exc = await self._wait_for_tasks( + logging_text, + tasks_dict_info, + self.timeout.ns_deploy, + stage, + nslcmop_id, + nsr_id=nsr_id, + ) + except asyncio.CancelledError: + error_list.append("Cancelled") + await self._cancel_pending_tasks(logging_text, tasks_dict_info) + await self._wait_for_tasks( logging_text, tasks_dict_info, self.timeout.ns_deploy, @@ -7050,10 +7400,13 @@ class NsLcm(LcmBase): nslcmop_id, nsr_id=nsr_id, ) - if exc: + if error_list: + error_detail = "; ".join(error_list) db_nslcmop_update[ "detailed-status" - ] = error_description_nslcmop = "FAILED {}: {}".format(step, exc) + ] = error_description_nslcmop = "FAILED {}: {}".format( + step, error_detail + ) nslcmop_operation_state = "FAILED" if db_nsr: db_nsr_update["operational-status"] = old_operational_status @@ -7067,7 +7420,7 @@ class NsLcm(LcmBase): db_nsr_update[ "detailed-status" ] = "FAILED scaling nslcmop={} {}: {}".format( - nslcmop_id, step, exc + nslcmop_id, step, error_detail ) else: error_description_nslcmop = None @@ -7097,7 +7450,7 @@ class NsLcm(LcmBase): "nslcmop_id": nslcmop_id, "operationState": nslcmop_operation_state, } - await self.msg.aiowrite("ns", "scaled", msg, loop=self.loop) + await self.msg.aiowrite("ns", "scaled", msg) except Exception as e: self.logger.error( logging_text + "kafka_write notification Exception {}".format(e) @@ -7270,6 +7623,7 @@ class NsLcm(LcmBase): vnfr_id: str, nsr_id: str, target_ip: str, + element_type: str, vnf_member_index: str = "", vdu_id: str = "", vdu_index: int = None, @@ -7286,6 +7640,7 @@ class NsLcm(LcmBase): vnfr_id (str): VNFR ID where this EE applies nsr_id (str): NSR ID where this EE applies target_ip (str): VDU/KDU instance IP address + element_type (str): NS or VNF or VDU or KDU vnf_member_index (str, optional): VNF index where this EE applies. Defaults to "". vdu_id (str, optional): VDU ID where this EE applies. Defaults to "". vdu_index (int, optional): VDU index where this EE applies. Defaults to None. @@ -7298,7 +7653,11 @@ class NsLcm(LcmBase): Returns: _type_: Prometheus jobs """ - self.logger.debug(f"KDU: {kdu_name}; KDU INDEX: {kdu_index}") + # default the vdur and kdur names to an empty string, to avoid any later + # problem with Prometheus when the element type is not VDU or KDU + vdur_name = "" + kdur_name = "" + # look if exist a file called 'prometheus*.j2' and artifact_content = self.fs.dir_ls(artifact_path) job_file = next( @@ -7311,70 +7670,86 @@ class NsLcm(LcmBase): ) if not job_file: return + self.logger.debug("Artifact path{}".format(artifact_path)) + self.logger.debug("job file{}".format(job_file)) with self.fs.file_open((artifact_path, job_file), "r") as f: job_data = f.read() - vdur_name = "" - kdur_name = "" - for r in range(360): - db_vnfr = self.db.get_one("vnfrs", {"_id": vnfr_id}) - if vdu_id and vdu_index is not None: - vdur = next( - ( - x - for x in get_iterable(db_vnfr, "vdur") - if ( - x.get("vdu-id-ref") == vdu_id - and x.get("count-index") == vdu_index - ) - ), - {}, - ) - if vdur.get("name"): - vdur_name = vdur.get("name") - break - if kdu_name and kdu_index is not None: - kdur = next( - ( - x - for x in get_iterable(db_vnfr, "kdur") - if ( - x.get("kdu-name") == kdu_name - and x.get("count-index") == kdu_index - ) - ), - {}, - ) - if kdur.get("name"): - kdur_name = kdur.get("name") - break + # obtain the VDUR or KDUR, if the element type is VDU or KDU + if element_type in ("VDU", "KDU"): + for _ in range(360): + db_vnfr = self.db.get_one("vnfrs", {"_id": vnfr_id}) + if vdu_id and vdu_index is not None: + vdur = next( + ( + x + for x in get_iterable(db_vnfr, "vdur") + if ( + x.get("vdu-id-ref") == vdu_id + and x.get("count-index") == vdu_index + ) + ), + {}, + ) + if vdur.get("name"): + vdur_name = vdur.get("name") + break + if kdu_name and kdu_index is not None: + kdur = next( + ( + x + for x in get_iterable(db_vnfr, "kdur") + if ( + x.get("kdu-name") == kdu_name + and x.get("count-index") == kdu_index + ) + ), + {}, + ) + if kdur.get("name"): + kdur_name = kdur.get("name") + break - await asyncio.sleep(10, loop=self.loop) + await asyncio.sleep(10) + else: + if vdu_id and vdu_index is not None: + raise LcmException( + f"Timeout waiting VDU with name={vdu_id} and index={vdu_index} to be intantiated" + ) + if kdu_name and kdu_index is not None: + raise LcmException( + f"Timeout waiting KDU with name={kdu_name} and index={kdu_index} to be intantiated" + ) + + if ee_id is not None: + _, namespace, helm_id = get_ee_id_parts( + ee_id + ) # get namespace and EE gRPC service name + host_name = f'{helm_id}-{ee_config_descriptor["metric-service"]}.{namespace}.svc' # svc_name.namespace.svc + host_port = "80" + vnfr_id = vnfr_id.replace("-", "") + variables = { + "JOB_NAME": vnfr_id, + "TARGET_IP": target_ip, + "EXPORTER_POD_IP": host_name, + "EXPORTER_POD_PORT": host_port, + "NSR_ID": nsr_id, + "VNF_MEMBER_INDEX": vnf_member_index, + "VDUR_NAME": vdur_name, + "KDUR_NAME": kdur_name, + "ELEMENT_TYPE": element_type, + } else: - if vdu_id and vdu_index is not None: - raise LcmException( - f"Timeout waiting VDU with name={vdu_id} and index={vdu_index} to be intantiated" - ) - if kdu_name and kdu_index is not None: - raise LcmException( - f"Timeout waiting KDU with name={kdu_name} and index={kdu_index} to be intantiated" - ) - - # TODO get_service - _, _, service = ee_id.partition(".") # remove prefix "namespace." - host_name = "{}-{}".format(service, ee_config_descriptor["metric-service"]) - host_port = "80" - vnfr_id = vnfr_id.replace("-", "") - variables = { - "JOB_NAME": vnfr_id, - "TARGET_IP": target_ip, - "EXPORTER_POD_IP": host_name, - "EXPORTER_POD_PORT": host_port, - "NSR_ID": nsr_id, - "VNF_MEMBER_INDEX": vnf_member_index, - "VDUR_NAME": vdur_name, - "KDUR_NAME": kdur_name, - } + metric_path = ee_config_descriptor["metric-path"] + target_port = ee_config_descriptor["metric-port"] + vnfr_id = vnfr_id.replace("-", "") + variables = { + "JOB_NAME": vnfr_id, + "TARGET_IP": target_ip, + "TARGET_PORT": target_port, + "METRIC_PATH": metric_path, + } + job_list = parse_job(job_data, variables) # ensure job_name is using the vnfr_id. Adding the metadata nsr_id for job in job_list: @@ -7382,7 +7757,7 @@ class NsLcm(LcmBase): not isinstance(job.get("job_name"), str) or vnfr_id not in job["job_name"] ): - job["job_name"] = vnfr_id + "_" + str(randint(1, 10000)) + job["job_name"] = vnfr_id + "_" + str(SystemRandom().randint(1, 10000)) job["nsr_id"] = nsr_id job["vnfr_id"] = vnfr_id return job_list @@ -7591,7 +7966,7 @@ class NsLcm(LcmBase): "nslcmop_id": nslcmop_id, "operationState": nslcmop_operation_state, } - await self.msg.aiowrite("ns", "migrated", msg, loop=self.loop) + await self.msg.aiowrite("ns", "migrated", msg) except Exception as e: self.logger.error( logging_text + "kafka_write notification Exception {}".format(e) @@ -7715,9 +8090,9 @@ class NsLcm(LcmBase): for target_vdu in target_vdu_list: deploy_params_vdu = target_vdu # Set run-day1 vnf level value if not vdu level value exists - if not deploy_params_vdu.get("run-day1") and target_vnf[ - "additionalParams" - ].get("run-day1"): + if not deploy_params_vdu.get("run-day1") and target_vnf.get( + "additionalParams", {} + ).get("run-day1"): deploy_params_vdu["run-day1"] = target_vnf[ "additionalParams" ].get("run-day1") @@ -7816,10 +8191,25 @@ class NsLcm(LcmBase): exc_info=True, ) finally: - if tasks_dict_info: - stage[1] = "Waiting for healing pending tasks." - self.logger.debug(logging_text + stage[1]) - exc = await self._wait_for_tasks( + error_list = list() + if exc: + error_list.append(str(exc)) + try: + if tasks_dict_info: + stage[1] = "Waiting for healing pending tasks." + self.logger.debug(logging_text + stage[1]) + exc = await self._wait_for_tasks( + logging_text, + tasks_dict_info, + self.timeout.ns_deploy, + stage, + nslcmop_id, + nsr_id=nsr_id, + ) + except asyncio.CancelledError: + error_list.append("Cancelled") + await self._cancel_pending_tasks(logging_text, tasks_dict_info) + await self._wait_for_tasks( logging_text, tasks_dict_info, self.timeout.ns_deploy, @@ -7827,17 +8217,22 @@ class NsLcm(LcmBase): nslcmop_id, nsr_id=nsr_id, ) - if exc: + if error_list: + error_detail = "; ".join(error_list) db_nslcmop_update[ "detailed-status" - ] = error_description_nslcmop = "FAILED {}: {}".format(step, exc) + ] = error_description_nslcmop = "FAILED {}: {}".format( + step, error_detail + ) nslcmop_operation_state = "FAILED" if db_nsr: db_nsr_update["operational-status"] = old_operational_status db_nsr_update["config-status"] = old_config_status db_nsr_update[ "detailed-status" - ] = "FAILED healing nslcmop={} {}: {}".format(nslcmop_id, step, exc) + ] = "FAILED healing nslcmop={} {}: {}".format( + nslcmop_id, step, error_detail + ) for task, task_name in tasks_dict_info.items(): if not task.done() or task.cancelled() or task.exception(): if task_name.startswith(self.task_name_deploy_vca): @@ -7877,7 +8272,7 @@ class NsLcm(LcmBase): "nslcmop_id": nslcmop_id, "operationState": nslcmop_operation_state, } - await self.msg.aiowrite("ns", "healed", msg, loop=self.loop) + await self.msg.aiowrite("ns", "healed", msg) except Exception as e: self.logger.error( logging_text + "kafka_write notification Exception {}".format(e) @@ -8037,10 +8432,7 @@ class NsLcm(LcmBase): vca_type = "native_charm" elif ee_item.get("helm-chart"): vca_name = ee_item["helm-chart"] - if ee_item.get("helm-version") and ee_item.get("helm-version") == "v2": - vca_type = "helm" - else: - vca_type = "helm-v3" + vca_type = "helm-v3" else: self.logger.debug( logging_text + "skipping non juju neither charm configuration" @@ -8166,7 +8558,6 @@ class NsLcm(LcmBase): } step = "" try: - element_type = "NS" element_under_configuration = nsr_id @@ -8373,7 +8764,7 @@ class NsLcm(LcmBase): # if SSH access is required, then get execution environment SSH public # if native charm we have waited already to VM be UP - if vca_type in ("k8s_proxy_charm", "lxc_proxy_charm", "helm", "helm-v3"): + if vca_type in ("k8s_proxy_charm", "lxc_proxy_charm", "helm-v3"): pub_key = None user = None # self.logger.debug("get ssh key block") @@ -8559,7 +8950,7 @@ class NsLcm(LcmBase): self.logger.debug("Wait Heal RO > {}".format(operational_status_ro)) if operational_status_ro != "healing": break - await asyncio.sleep(15, loop=self.loop) + await asyncio.sleep(15) else: # timeout_ns_deploy raise NgRoException("Timeout waiting ns to deploy") @@ -8657,7 +9048,7 @@ class NsLcm(LcmBase): "nslcmop_id": nslcmop_id, "operationState": nslcmop_operation_state, } - await self.msg.aiowrite("ns", "verticalscaled", msg, loop=self.loop) + await self.msg.aiowrite("ns", "verticalscaled", msg) except Exception as e: self.logger.error( logging_text + "kafka_write notification Exception {}".format(e)