X-Git-Url: https://osm.etsi.org/gitweb/?a=blobdiff_plain;f=osm_lcm%2Fns.py;h=1504fb106c65f4a393c88f8c325858dbbba0d921;hb=72d90d9217ee99b8e05cfe345f59db28f6d54d93;hp=9ceb60984657cf02bd1e52c182062bdd5304cfe4;hpb=6316219125d503815baf8a869fbacc8c252876ad;p=osm%2FLCM.git diff --git a/osm_lcm/ns.py b/osm_lcm/ns.py index 9ceb609..1504fb1 100644 --- a/osm_lcm/ns.py +++ b/osm_lcm/ns.py @@ -23,6 +23,7 @@ import yaml import logging import logging.handlers import traceback +import ipaddress import json from jinja2 import ( Environment, @@ -53,7 +54,6 @@ from osm_lcm.data_utils.vca import ( from osm_lcm.ng_ro import NgRoClient, NgRoException from osm_lcm.lcm_utils import ( LcmException, - LcmExceptionNoMgmtIP, LcmBase, deep_get, get_iterable, @@ -98,7 +98,6 @@ from osm_lcm.data_utils.vnfr import ( from osm_lcm.data_utils.dict_utils import parse_yaml_strings from osm_lcm.data_utils.database.vim_account import VimAccountDB from n2vc.definitions import RelationEndpoint -from n2vc.k8s_helm_conn import K8sHelmConnector from n2vc.k8s_helm3_conn import K8sHelm3Connector from n2vc.k8s_juju_conn import K8sJujuConnector @@ -133,7 +132,16 @@ class NsLcm(LcmBase): SUBOPERATION_STATUS_NOT_FOUND = -1 SUBOPERATION_STATUS_NEW = -2 SUBOPERATION_STATUS_SKIP = -3 + EE_TLS_NAME = "ee-tls" task_name_deploy_vca = "Deploying VCA" + rel_operation_types = { + "GE": ">=", + "LE": "<=", + "GT": ">", + "LT": "<", + "EQ": "==", + "NE": "!=", + } def __init__(self, msg, lcm_tasks, config: LcmCfg): """ @@ -164,15 +172,6 @@ class NsLcm(LcmBase): on_update_db=self._on_update_n2vc_db, ) - self.k8sclusterhelm2 = K8sHelmConnector( - kubectl_command=self.vca_config.kubectlpath, - helm_command=self.vca_config.helmpath, - log=self.logger, - on_update_db=None, - fs=self.fs, - db=self.db, - ) - self.k8sclusterhelm3 = K8sHelm3Connector( kubectl_command=self.vca_config.kubectlpath, helm_command=self.vca_config.helm3path, @@ -192,7 +191,6 @@ class NsLcm(LcmBase): ) self.k8scluster_map = { - "helm-chart": self.k8sclusterhelm2, "helm-chart-v3": self.k8sclusterhelm3, "chart": self.k8sclusterhelm3, "juju-bundle": self.k8sclusterjuju, @@ -224,6 +222,18 @@ class NsLcm(LcmBase): if not isinstance(ip_mac, str): return ip_mac try: + next_ipv6 = None + next_ipv4 = None + dual_ip = ip_mac.split(";") + if len(dual_ip) == 2: + for ip in dual_ip: + if ipaddress.ip_address(ip).version == 6: + ipv6 = ipaddress.IPv6Address(ip) + next_ipv6 = str(ipaddress.IPv6Address(int(ipv6) + 1)) + elif ipaddress.ip_address(ip).version == 4: + ipv4 = ipaddress.IPv4Address(ip) + next_ipv4 = str(ipaddress.IPv4Address(int(ipv4) + 1)) + return [next_ipv4, next_ipv6] # try with ipv4 look for last dot i = ip_mac.rfind(".") if i > 0: @@ -241,23 +251,6 @@ class NsLcm(LcmBase): pass return None - def _on_update_ro_db(self, nsrs_id, ro_descriptor): - # self.logger.debug('_on_update_ro_db(nsrs_id={}'.format(nsrs_id)) - - try: - # TODO filter RO descriptor fields... - - # write to database - db_dict = dict() - # db_dict['deploymentStatus'] = yaml.dump(ro_descriptor, default_flow_style=False, indent=2) - db_dict["deploymentStatus"] = ro_descriptor - self.update_db_2("nsrs", nsrs_id, db_dict) - - except Exception as e: - self.logger.warn( - "Cannot write database RO deployment for ns={} -> {}".format(nsrs_id, e) - ) - async def _on_update_n2vc_db(self, table, filter, path, updated_data, vca_id=None): # remove last dot from path (if exists) if path.endswith("."): @@ -456,32 +449,6 @@ class NsLcm(LcmBase): additional_params = vdur.get("additionalParams") return parse_yaml_strings(additional_params) - def vnfd2RO(self, vnfd, new_id=None, additionalParams=None, nsrId=None): - """ - Converts creates a new vnfd descriptor for RO base on input OSM IM vnfd - :param vnfd: input vnfd - :param new_id: overrides vnf id if provided - :param additionalParams: Instantiation params for VNFs provided - :param nsrId: Id of the NSR - :return: copy of vnfd - """ - vnfd_RO = deepcopy(vnfd) - # remove unused by RO configuration, monitoring, scaling and internal keys - vnfd_RO.pop("_id", None) - vnfd_RO.pop("_admin", None) - vnfd_RO.pop("monitoring-param", None) - vnfd_RO.pop("scaling-group-descriptor", None) - vnfd_RO.pop("kdu", None) - vnfd_RO.pop("k8s-cluster", None) - if new_id: - vnfd_RO["id"] = new_id - - # parse cloud-init or cloud-init-file with the provided variables using Jinja2 - for vdu in get_iterable(vnfd_RO, "vdu"): - vdu.pop("cloud-init-file", None) - vdu.pop("cloud-init", None) - return vnfd_RO - @staticmethod def ip_profile_2_RO(ip_profile): RO_ip_profile = deepcopy(ip_profile) @@ -500,31 +467,6 @@ class NsLcm(LcmBase): RO_ip_profile["dhcp"] = RO_ip_profile.pop("dhcp-params") return RO_ip_profile - def _get_ro_vim_id_for_vim_account(self, vim_account): - db_vim = self.db.get_one("vim_accounts", {"_id": vim_account}) - if db_vim["_admin"]["operationalState"] != "ENABLED": - raise LcmException( - "VIM={} is not available. operationalState={}".format( - vim_account, db_vim["_admin"]["operationalState"] - ) - ) - RO_vim_id = db_vim["_admin"]["deployed"]["RO"] - return RO_vim_id - - def get_ro_wim_id_for_wim_account(self, wim_account): - if isinstance(wim_account, str): - db_wim = self.db.get_one("wim_accounts", {"_id": wim_account}) - if db_wim["_admin"]["operationalState"] != "ENABLED": - raise LcmException( - "WIM={} is not available. operationalState={}".format( - wim_account, db_wim["_admin"]["operationalState"] - ) - ) - RO_wim_id = db_wim["_admin"]["deployed"]["RO-account"] - return RO_wim_id - else: - return wim_account - def scale_vnfr(self, db_vnfr, vdu_create=None, vdu_delete=None, mark_delete=False): db_vdu_push_list = [] template_vdur = [] @@ -677,103 +619,6 @@ class NsLcm(LcmBase): except DbException as e: self.logger.error("Cannot update vnf. {}".format(e)) - def ns_update_vnfr(self, db_vnfrs, nsr_desc_RO): - """ - Updates database vnfr with the RO info, e.g. ip_address, vim_id... Descriptor db_vnfrs is also updated - :param db_vnfrs: dictionary with member-vnf-index: vnfr-content - :param nsr_desc_RO: nsr descriptor from RO - :return: Nothing, LcmException is raised on errors - """ - for vnf_index, db_vnfr in db_vnfrs.items(): - for vnf_RO in nsr_desc_RO["vnfs"]: - if vnf_RO["member_vnf_index"] != vnf_index: - continue - vnfr_update = {} - if vnf_RO.get("ip_address"): - db_vnfr["ip-address"] = vnfr_update["ip-address"] = vnf_RO[ - "ip_address" - ].split(";")[0] - elif not db_vnfr.get("ip-address"): - if db_vnfr.get("vdur"): # if not VDUs, there is not ip_address - raise LcmExceptionNoMgmtIP( - "ns member_vnf_index '{}' has no IP address".format( - vnf_index - ) - ) - - for vdu_index, vdur in enumerate(get_iterable(db_vnfr, "vdur")): - vdur_RO_count_index = 0 - if vdur.get("pdu-type"): - continue - for vdur_RO in get_iterable(vnf_RO, "vms"): - if vdur["vdu-id-ref"] != vdur_RO["vdu_osm_id"]: - continue - if vdur["count-index"] != vdur_RO_count_index: - vdur_RO_count_index += 1 - continue - vdur["vim-id"] = vdur_RO.get("vim_vm_id") - if vdur_RO.get("ip_address"): - vdur["ip-address"] = vdur_RO["ip_address"].split(";")[0] - else: - vdur["ip-address"] = None - vdur["vdu-id-ref"] = vdur_RO.get("vdu_osm_id") - vdur["name"] = vdur_RO.get("vim_name") - vdur["status"] = vdur_RO.get("status") - vdur["status-detailed"] = vdur_RO.get("error_msg") - for ifacer in get_iterable(vdur, "interfaces"): - for interface_RO in get_iterable(vdur_RO, "interfaces"): - if ifacer["name"] == interface_RO.get("internal_name"): - ifacer["ip-address"] = interface_RO.get( - "ip_address" - ) - ifacer["mac-address"] = interface_RO.get( - "mac_address" - ) - break - else: - raise LcmException( - "ns_update_vnfr: Not found member_vnf_index={} vdur={} interface={} " - "from VIM info".format( - vnf_index, vdur["vdu-id-ref"], ifacer["name"] - ) - ) - vnfr_update["vdur.{}".format(vdu_index)] = vdur - break - else: - raise LcmException( - "ns_update_vnfr: Not found member_vnf_index={} vdur={} count_index={} from " - "VIM info".format( - vnf_index, vdur["vdu-id-ref"], vdur["count-index"] - ) - ) - - for vld_index, vld in enumerate(get_iterable(db_vnfr, "vld")): - for net_RO in get_iterable(nsr_desc_RO, "nets"): - if vld["id"] != net_RO.get("vnf_net_osm_id"): - continue - vld["vim-id"] = net_RO.get("vim_net_id") - vld["name"] = net_RO.get("vim_name") - vld["status"] = net_RO.get("status") - vld["status-detailed"] = net_RO.get("error_msg") - vnfr_update["vld.{}".format(vld_index)] = vld - break - else: - raise LcmException( - "ns_update_vnfr: Not found member_vnf_index={} vld={} from VIM info".format( - vnf_index, vld["id"] - ) - ) - - self.update_db_2("vnfrs", db_vnfr["_id"], vnfr_update) - break - - else: - raise LcmException( - "ns_update_vnfr: Not found member_vnf_index={} from VIM info".format( - vnf_index - ) - ) - def _get_ns_config_info(self, nsr_id): """ Generates a mapping between vnf,vdu elements and the N2VC id @@ -895,7 +740,7 @@ class NsLcm(LcmBase): ) vdur = next((vdur for vdur in target_vnf.get("vdur", ())), None) if not vdur: - return + continue for a_index, a_vld in enumerate(target["ns"]["vld"]): target_vld = find_in_list( get_iterable(vdur, "interfaces"), @@ -1804,7 +1649,7 @@ class NsLcm(LcmBase): vca_id = self.get_vca_id(db_vnfr, db_nsr) # create or register execution environment in VCA - if vca_type in ("lxc_proxy_charm", "k8s_proxy_charm", "helm", "helm-v3"): + if vca_type in ("lxc_proxy_charm", "k8s_proxy_charm", "helm-v3"): self._write_configuration_status( nsr_id=nsr_id, vca_index=vca_index, @@ -1826,11 +1671,11 @@ class NsLcm(LcmBase): db_dict=db_dict, vca_id=vca_id, ) - elif vca_type == "helm" or vca_type == "helm-v3": + elif vca_type == "helm-v3": ee_id, credentials = await self.vca_map[ vca_type ].create_execution_environment( - namespace=namespace, + namespace=nsr_id, reuse_ee_id=ee_id, db_dict=db_dict, config=osm_config, @@ -1972,7 +1817,7 @@ class NsLcm(LcmBase): # if SSH access is required, then get execution environment SSH public # if native charm we have waited already to VM be UP - if vca_type in ("k8s_proxy_charm", "lxc_proxy_charm", "helm", "helm-v3"): + if vca_type in ("k8s_proxy_charm", "lxc_proxy_charm", "helm-v3"): pub_key = None user = None # self.logger.debug("get ssh key block") @@ -2114,7 +1959,7 @@ class NsLcm(LcmBase): # TODO register in database that primitive is done # STEP 7 Configure metrics - if vca_type == "helm" or vca_type == "helm-v3": + if vca_type == "helm-v3": # TODO: review for those cases where the helm chart is a reference and # is not part of the NF package prometheus_jobs = await self.extract_prometheus_scrape_jobs( @@ -2381,14 +2226,6 @@ class NsLcm(LcmBase): df = vnfd.get("df", [{}])[0] # Checking for auto-scaling configuration if "scaling-aspect" in df: - rel_operation_types = { - "GE": ">=", - "LE": "<=", - "GT": ">", - "LT": "<", - "EQ": "==", - "NE": "!=", - } scaling_aspects = df["scaling-aspect"] all_vnfd_monitoring_params = {} for ivld in vnfd.get("int-virtual-link-desc", ()): @@ -2464,7 +2301,9 @@ class NsLcm(LcmBase): operation = scaling_criteria[ "scale-in-relational-operation" ] - rel_operator = rel_operation_types.get(operation, "<=") + rel_operator = self.rel_operation_types.get( + operation, "<=" + ) metric_selector = f'{metric_name}{{ns_id="{nsr_id}", vnf_member_index="{vnf_member_index}", vdu_id="{vdu_id}"}}' expression = f"(count ({metric_selector}) > {instances_min_number}) and (avg({metric_selector}) {rel_operator} {scalein_threshold})" labels = { @@ -2505,7 +2344,9 @@ class NsLcm(LcmBase): operation = scaling_criteria[ "scale-out-relational-operation" ] - rel_operator = rel_operation_types.get(operation, "<=") + rel_operator = self.rel_operation_types.get( + operation, "<=" + ) metric_selector = f'{metric_name}{{ns_id="{nsr_id}", vnf_member_index="{vnf_member_index}", vdu_id="{vdu_id}"}}' expression = f"(count ({metric_selector}) < {instances_max_number}) and (avg({metric_selector}) {rel_operator} {scaleout_threshold})" labels = { @@ -2541,6 +2382,96 @@ class NsLcm(LcmBase): alerts.append(alert) return alerts + def _gather_vnfr_alarm_alerts(self, vnfr, vnfd): + alerts = [] + nsr_id = vnfr["nsr-id-ref"] + vnf_member_index = vnfr["member-vnf-index-ref"] + + # Checking for VNF alarm configuration + for vdur in vnfr["vdur"]: + vdu_id = vdur["vdu-id-ref"] + vdu = next(filter(lambda vdu: vdu["id"] == vdu_id, vnfd["vdu"])) + if "alarm" in vdu: + # Get VDU monitoring params, since alerts are based on them + vdu_monitoring_params = {} + for mp in vdu.get("monitoring-parameter", []): + vdu_monitoring_params[mp.get("id")] = mp + if not vdu_monitoring_params: + self.logger.error( + "VDU alarm refers to a VDU monitoring param, but there are no VDU monitoring params in the VDU" + ) + continue + # Get alarms in the VDU + alarm_descriptors = vdu["alarm"] + # Create VDU alarms for each alarm in the VDU + for alarm_descriptor in alarm_descriptors: + # Check that the VDU alarm refers to a proper monitoring param + alarm_monitoring_param = alarm_descriptor.get( + "vnf-monitoring-param-ref", "" + ) + vdu_specific_monitoring_param = vdu_monitoring_params.get( + alarm_monitoring_param, {} + ) + if not vdu_specific_monitoring_param: + self.logger.error( + "VDU alarm refers to a VDU monitoring param not present in the VDU" + ) + continue + metric_name = vdu_specific_monitoring_param.get( + "performance-metric" + ) + if not metric_name: + self.logger.error( + "VDU alarm refers to a VDU monitoring param that has no associated performance-metric" + ) + continue + # Set params of the alarm to be created in Prometheus + metric_name = f"osm_{metric_name}" + metric_threshold = alarm_descriptor.get("value") + uuid = str(uuid4()) + alert_name = f"vdu_alarm_{uuid}" + operation = alarm_descriptor["operation"] + rel_operator = self.rel_operation_types.get(operation, "<=") + metric_selector = f'{metric_name}{{ns_id="{nsr_id}", vnf_member_index="{vnf_member_index}", vdu_id="{vdu_id}"}}' + expression = f"{metric_selector} {rel_operator} {metric_threshold}" + labels = { + "ns_id": nsr_id, + "vnf_member_index": vnf_member_index, + "vdu_id": vdu_id, + "vdu_name": "{{ $labels.vdu_name }}", + } + prom_cfg = { + "alert": alert_name, + "expr": expression, + "for": "1m", # default value. Ideally, this should be related to an IM param, but there is not such param + "labels": labels, + } + alarm_action = dict() + for action_type in ["ok", "insufficient-data", "alarm"]: + if ( + "actions" in alarm_descriptor + and action_type in alarm_descriptor["actions"] + ): + alarm_action[action_type] = alarm_descriptor["actions"][ + action_type + ] + alert = { + "uuid": uuid, + "name": alert_name, + "metric": metric_name, + "tags": { + "ns_id": nsr_id, + "vnf_member_index": vnf_member_index, + "vdu_id": vdu_id, + }, + "alarm_status": "ok", + "action_type": "vdu_alarm", + "action": alarm_action, + "prometheus_config": prom_cfg, + } + alerts.append(alert) + return alerts + def update_nsrs_with_pla_result(self, params): try: nslcmop_id = deep_get(params, ("placement", "nslcmopId")) @@ -2750,13 +2681,16 @@ class NsLcm(LcmBase): # create namespace and certificate if any helm based EE is present in the NS if check_helm_ee_in_ns(db_vnfds): - # TODO: create EE namespace + await self.vca_map["helm-v3"].setup_ns_namespace( + name=nsr_id, + ) # create TLS certificates await self.vca_map["helm-v3"].create_tls_certificate( - secret_name="ee-tls-{}".format(nsr_id), + secret_name=self.EE_TLS_NAME, dns_prefix="*", nsr_id=nsr_id, usage="server auth", + namespace=nsr_id, ) nsi_id = None # TODO put nsi_id when this nsr belongs to a NSI @@ -3033,7 +2967,15 @@ class NsLcm(LcmBase): stage[1] = stage[2] = "" except asyncio.CancelledError: error_list.append("Cancelled") - # TODO cancel all tasks + await self._cancel_pending_tasks(logging_text, tasks_dict_info) + await self._wait_for_tasks( + logging_text, + tasks_dict_info, + timeout_ns_deploy, + stage, + nslcmop_id, + nsr_id=nsr_id, + ) except Exception as exc: error_list.append(str(exc)) @@ -3091,6 +3033,10 @@ class NsLcm(LcmBase): self.logger.info(f"Storing scaling alert in MongoDB: {alert}") self.db.create("alerts", alert) + alarm_alerts = self._gather_vnfr_alarm_alerts(vnfr, vnfd) + for alert in alarm_alerts: + self.logger.info(f"Storing VNF alarm alert in MongoDB: {alert}") + self.db.create("alerts", alert) if db_nsr: self._write_ns_status( nsr_id=nsr_id, @@ -3118,6 +3064,12 @@ class NsLcm(LcmBase): "nsr_id": nsr_id, "nslcmop_id": nslcmop_id, "operationState": nslcmop_operation_state, + "startTime": db_nslcmop["startTime"], + "links": db_nslcmop["links"], + "operationParams": { + "nsInstanceId": nsr_id, + "nsdId": db_nsr["nsd-id"], + }, }, ) except Exception as e: @@ -3686,9 +3638,11 @@ class NsLcm(LcmBase): vnfr_data.get("_id"), {"kdur.{}.status".format(kdu_index): "ERROR"}, ) - except Exception: + except Exception as error: # ignore to keep original exception - pass + self.logger.warning( + f"An exception occurred while updating DB: {str(error)}" + ) # reraise original error raise @@ -3707,7 +3661,6 @@ class NsLcm(LcmBase): k8scluster_id_2_uuic = { "helm-chart-v3": {}, - "helm-chart": {}, "juju-bundle": {}, } @@ -3806,11 +3759,6 @@ class NsLcm(LcmBase): # Default version: helm3, if helm-version is v2 assign v2 k8sclustertype = "helm-chart-v3" self.logger.debug("kdur: {}".format(kdur)) - if ( - kdur.get("helm-version") - and kdur.get("helm-version") == "v2" - ): - k8sclustertype = "helm-chart" elif kdur.get("juju-bundle"): kdumodel = kdur["juju-bundle"] k8sclustertype = "juju-bundle" @@ -3848,8 +3796,8 @@ class NsLcm(LcmBase): kdumodel = self.fs.path + filename except (asyncio.TimeoutError, asyncio.CancelledError): raise - except Exception: # it is not a file - pass + except Exception as e: # it is not a file + self.logger.warning(f"An exception occurred: {str(e)}") k8s_cluster_id = kdur["k8s-cluster"]["id"] step = "Synchronize repos for k8s cluster '{}'".format( @@ -4031,10 +3979,7 @@ class NsLcm(LcmBase): vca_type = "native_charm" elif ee_item.get("helm-chart"): vca_name = ee_item["helm-chart"] - if ee_item.get("helm-version") and ee_item.get("helm-version") == "v2": - vca_type = "helm" - else: - vca_type = "helm-v3" + vca_type = "helm-v3" else: self.logger.debug( logging_text + "skipping non juju neither charm configuration" @@ -4130,41 +4075,6 @@ class NsLcm(LcmBase): member_vnf_index or "", vdu_id or "" ) - @staticmethod - def _create_nslcmop(nsr_id, operation, params): - """ - Creates a ns-lcm-opp content to be stored at database. - :param nsr_id: internal id of the instance - :param operation: instantiate, terminate, scale, action, ... - :param params: user parameters for the operation - :return: dictionary following SOL005 format - """ - # Raise exception if invalid arguments - if not (nsr_id and operation and params): - raise LcmException( - "Parameters 'nsr_id', 'operation' and 'params' needed to create primitive not provided" - ) - now = time() - _id = str(uuid4()) - nslcmop = { - "id": _id, - "_id": _id, - # COMPLETED,PARTIALLY_COMPLETED,FAILED_TEMP,FAILED,ROLLING_BACK,ROLLED_BACK - "operationState": "PROCESSING", - "statusEnteredTime": now, - "nsInstanceId": nsr_id, - "lcmOperationType": operation, - "startTime": now, - "isAutomaticInvocation": False, - "operationParams": params, - "isCancelPending": False, - "links": { - "self": "/osm/nslcm/v1/ns_lcm_op_occs/" + _id, - "nsInstance": "/osm/nslcm/v1/ns_instances/" + nsr_id, - }, - } - return nslcmop - def _format_additional_params(self, params): params = params or {} for key, value in params.items(): @@ -4360,12 +4270,6 @@ class NsLcm(LcmBase): # Function to return execution_environment id - def _get_ee_id(self, vnf_index, vdu_id, vca_deployed_list): - # TODO vdu_index_count - for vca in vca_deployed_list: - if vca["member-vnf-index"] == vnf_index and vca["vdu_id"] == vdu_id: - return vca.get("ee_id") - async def destroy_N2VC( self, logging_text, @@ -4591,9 +4495,7 @@ class NsLcm(LcmBase): ) and vca.get("needed_terminate") # For helm we must destroy_ee. Also for native_charm, as juju_model cannot be deleted if there are # pending native charms - destroy_ee = ( - True if vca_type in ("helm", "helm-v3", "native_charm") else False - ) + destroy_ee = True if vca_type in ("helm-v3", "native_charm") else False # self.logger.debug(logging_text + "vca_index: {}, ee_id: {}, vca_type: {} destroy_ee: {}".format( # vca_index, vca.get("ee_id"), vca_type, destroy_ee)) task = asyncio.ensure_future( @@ -4646,9 +4548,12 @@ class NsLcm(LcmBase): # Delete Namespace and Certificates if necessary if check_helm_ee_in_ns(list(db_vnfds_from_member_index.values())): await self.vca_map["helm-v3"].delete_tls_certificate( - certificate_name=db_nslcmop["nsInstanceId"], + namespace=db_nslcmop["nsInstanceId"], + certificate_name=self.EE_TLS_NAME, + ) + await self.vca_map["helm-v3"].delete_namespace( + namespace=db_nslcmop["nsInstanceId"], ) - # TODO: Delete namespace # Delete from k8scluster stage[1] = "Deleting KDUs." @@ -4730,7 +4635,14 @@ class NsLcm(LcmBase): stage[1] = stage[2] = "" except asyncio.CancelledError: error_list.append("Cancelled") - # TODO cancell all tasks + await self._cancel_pending_tasks(logging_text, tasks_dict_info) + await self._wait_for_tasks( + logging_text, + tasks_dict_info, + timeout_ns_terminate, + stage, + nslcmop_id, + ) except Exception as exc: error_list.append(str(exc)) # update status at database @@ -4893,6 +4805,11 @@ class NsLcm(LcmBase): self._write_op_status(nslcmop_id, stage) return error_detail_list + async def _cancel_pending_tasks(self, logging_text, created_tasks_info): + for task, name in created_tasks_info.items(): + self.logger.debug(logging_text + "Cancelling task: " + name) + task.cancel() + @staticmethod def _map_primitive_params(primitive_desc, params, instantiation_params): """ @@ -5257,7 +5174,7 @@ class NsLcm(LcmBase): kdu_action = ( True if primitive_name in actions - and kdu["k8scluster-type"] not in ("helm-chart", "helm-chart-v3") + and kdu["k8scluster-type"] != "helm-chart-v3" else False ) @@ -5947,13 +5864,7 @@ class NsLcm(LcmBase): # add chart to list and all parameters step = "Getting helm chart name" chart_name = ee_item.get("helm-chart") - if ( - ee_item.get("helm-version") - and ee_item.get("helm-version") == "v2" - ): - vca_type = "helm" - else: - vca_type = "helm-v3" + vca_type = "helm-v3" step = "Setting Helm chart artifact paths" helm_artifacts.append( @@ -6005,7 +5916,25 @@ class NsLcm(LcmBase): if result == "FAILED": nslcmop_operation_state = result error_description_nslcmop = detailed_status + old_operational_status = "failed" db_nslcmop_update["detailed-status"] = detailed_status + db_nsr_update["detailed-status"] = detailed_status + scaling_aspect = get_scaling_aspect(latest_vnfd) + scaling_group_desc = db_nsr.get("_admin").get( + "scaling-group", None + ) + if scaling_group_desc: + for aspect in scaling_aspect: + scaling_group_id = aspect.get("id") + for scale_index, scaling_group in enumerate( + scaling_group_desc + ): + if scaling_group.get("name") == scaling_group_id: + db_nsr_update[ + "_admin.scaling-group.{}.nb-scale-op".format( + scale_index + ) + ] = 0 self.logger.debug( logging_text + " step {} Done with result {} {}".format( @@ -6334,6 +6263,7 @@ class NsLcm(LcmBase): old_operational_status = "" old_config_status = "" nsi_id = None + prom_job_name = "" try: # wait for any previous tasks in process step = "Waiting for previous operations to terminate" @@ -6444,6 +6374,15 @@ class NsLcm(LcmBase): vdud = get_vdu(db_vnfd, vdu_delta["id"]) # vdu_index also provides the number of instance of the targeted vdu vdu_count = vdu_index = get_vdur_index(db_vnfr, vdu_delta) + if vdu_index <= len(db_vnfr["vdur"]): + vdu_name_id = db_vnfr["vdur"][vdu_index - 1]["vdu-name"] + prom_job_name = ( + db_vnfr["_id"] + vdu_name_id + str(vdu_index - 1) + ) + prom_job_name = prom_job_name.replace("_", "") + prom_job_name = prom_job_name.replace("-", "") + else: + prom_job_name = None cloud_init_text = self._get_vdu_cloud_init_content( vdud, db_vnfd ) @@ -6520,11 +6459,6 @@ class NsLcm(LcmBase): if kdur.get("helm-chart"): k8s_cluster_type = "helm-chart-v3" self.logger.debug("kdur: {}".format(kdur)) - if ( - kdur.get("helm-version") - and kdur.get("helm-version") == "v2" - ): - k8s_cluster_type = "helm-chart" elif kdur.get("juju-bundle"): k8s_cluster_type = "juju-bundle" else: @@ -6653,11 +6587,6 @@ class NsLcm(LcmBase): if kdur.get("helm-chart"): k8s_cluster_type = "helm-chart-v3" self.logger.debug("kdur: {}".format(kdur)) - if ( - kdur.get("helm-version") - and kdur.get("helm-version") == "v2" - ): - k8s_cluster_type = "helm-chart" elif kdur.get("juju-bundle"): k8s_cluster_type = "juju-bundle" else: @@ -7242,7 +7171,69 @@ class NsLcm(LcmBase): db_nsr_update["config-status"] = old_config_status scale_process = None # POST-SCALE END + # Check if each vnf has exporter for metric collection if so update prometheus job records + if scaling_type == "SCALE_OUT": + if "exporters-endpoints" in db_vnfd.get("df")[0]: + vnfr_id = db_vnfr["id"] + db_vnfr = self.db.get_one("vnfrs", {"_id": vnfr_id}) + exporter_config = db_vnfd.get("df")[0].get("exporters-endpoints") + self.logger.debug("exporter config :{}".format(exporter_config)) + artifact_path = "{}/{}/{}".format( + base_folder["folder"], + base_folder["pkg-dir"], + "exporter-endpoint", + ) + ee_id = None + ee_config_descriptor = exporter_config + rw_mgmt_ip = await self.wait_vm_up_insert_key_ro( + logging_text, + nsr_id, + vnfr_id, + vdu_id=db_vnfr["vdur"][-1]["vdu-id-ref"], + vdu_index=db_vnfr["vdur"][-1]["count-index"], + user=None, + pub_key=None, + ) + self.logger.debug("rw_mgmt_ip:{}".format(rw_mgmt_ip)) + self.logger.debug("Artifact_path:{}".format(artifact_path)) + vdu_id_for_prom = None + vdu_index_for_prom = None + for x in get_iterable(db_vnfr, "vdur"): + vdu_id_for_prom = x.get("vdu-id-ref") + vdu_index_for_prom = x.get("count-index") + vnfr_id = vnfr_id + vdu_id + str(vdu_index) + vnfr_id = vnfr_id.replace("_", "") + prometheus_jobs = await self.extract_prometheus_scrape_jobs( + ee_id=ee_id, + artifact_path=artifact_path, + ee_config_descriptor=ee_config_descriptor, + vnfr_id=vnfr_id, + nsr_id=nsr_id, + target_ip=rw_mgmt_ip, + element_type="VDU", + vdu_id=vdu_id_for_prom, + vdu_index=vdu_index_for_prom, + ) + self.logger.debug("Prometheus job:{}".format(prometheus_jobs)) + if prometheus_jobs: + db_nsr_update[ + "_admin.deployed.prometheus_jobs" + ] = prometheus_jobs + self.update_db_2( + "nsrs", + nsr_id, + db_nsr_update, + ) + + for job in prometheus_jobs: + self.db.set_one( + "prometheus_jobs", + {"job_name": ""}, + job, + upsert=True, + fail_on_empty=False, + ) db_nsr_update[ "detailed-status" ] = "" # "scaled {} {}".format(scaling_group, scaling_type) @@ -7273,16 +7264,31 @@ class NsLcm(LcmBase): exc_info=True, ) finally: + error_list = list() + if exc: + error_list.append(str(exc)) self._write_ns_status( nsr_id=nsr_id, ns_state=None, current_operation="IDLE", current_operation_id=None, ) - if tasks_dict_info: - stage[1] = "Waiting for instantiate pending tasks." - self.logger.debug(logging_text + stage[1]) - exc = await self._wait_for_tasks( + try: + if tasks_dict_info: + stage[1] = "Waiting for instantiate pending tasks." + self.logger.debug(logging_text + stage[1]) + exc = await self._wait_for_tasks( + logging_text, + tasks_dict_info, + self.timeout.ns_deploy, + stage, + nslcmop_id, + nsr_id=nsr_id, + ) + except asyncio.CancelledError: + error_list.append("Cancelled") + await self._cancel_pending_tasks(logging_text, tasks_dict_info) + await self._wait_for_tasks( logging_text, tasks_dict_info, self.timeout.ns_deploy, @@ -7290,10 +7296,13 @@ class NsLcm(LcmBase): nslcmop_id, nsr_id=nsr_id, ) - if exc: + if error_list: + error_detail = "; ".join(error_list) db_nslcmop_update[ "detailed-status" - ] = error_description_nslcmop = "FAILED {}: {}".format(step, exc) + ] = error_description_nslcmop = "FAILED {}: {}".format( + step, error_detail + ) nslcmop_operation_state = "FAILED" if db_nsr: db_nsr_update["operational-status"] = old_operational_status @@ -7307,12 +7316,18 @@ class NsLcm(LcmBase): db_nsr_update[ "detailed-status" ] = "FAILED scaling nslcmop={} {}: {}".format( - nslcmop_id, step, exc + nslcmop_id, step, error_detail ) else: error_description_nslcmop = None nslcmop_operation_state = "COMPLETED" db_nslcmop_update["detailed-status"] = "Done" + if scaling_type == "SCALE_IN" and prom_job_name is not None: + self.db.del_one( + "prometheus_jobs", + {"job_name": prom_job_name}, + fail_on_empty=False, + ) self._write_op_status( op_id=nslcmop_id, @@ -7608,10 +7623,11 @@ class NsLcm(LcmBase): f"Timeout waiting KDU with name={kdu_name} and index={kdu_index} to be intantiated" ) - # TODO get_service if ee_id is not None: - _, _, service = ee_id.partition(".") # remove prefix "namespace." - host_name = "{}-{}".format(service, ee_config_descriptor["metric-service"]) + _, namespace, helm_id = get_ee_id_parts( + ee_id + ) # get namespace and EE gRPC service name + host_name = f'{helm_id}-{ee_config_descriptor["metric-service"]}.{namespace}.svc' # svc_name.namespace.svc host_port = "80" vnfr_id = vnfr_id.replace("-", "") variables = { @@ -7735,28 +7751,6 @@ class NsLcm(LcmBase): ) return "FAILED", "Error in operate VNF {}".format(exc) - def get_vca_cloud_and_credentials(self, vim_account_id: str) -> (str, str): - """ - Get VCA Cloud and VCA Cloud Credentials for the VIM account - - :param: vim_account_id: VIM Account ID - - :return: (cloud_name, cloud_credential) - """ - config = VimAccountDB.get_vim_account_with_id(vim_account_id).get("config", {}) - return config.get("vca_cloud"), config.get("vca_cloud_credential") - - def get_vca_k8s_cloud_and_credentials(self, vim_account_id: str) -> (str, str): - """ - Get VCA K8s Cloud and VCA K8s Cloud Credentials for the VIM account - - :param: vim_account_id: VIM Account ID - - :return: (cloud_name, cloud_credential) - """ - config = VimAccountDB.get_vim_account_with_id(vim_account_id).get("config", {}) - return config.get("vca_k8s_cloud"), config.get("vca_k8s_cloud_credential") - async def migrate(self, nsr_id, nslcmop_id): """ Migrate VNFs and VDUs instances in a NS @@ -8077,10 +8071,25 @@ class NsLcm(LcmBase): exc_info=True, ) finally: - if tasks_dict_info: - stage[1] = "Waiting for healing pending tasks." - self.logger.debug(logging_text + stage[1]) - exc = await self._wait_for_tasks( + error_list = list() + if exc: + error_list.append(str(exc)) + try: + if tasks_dict_info: + stage[1] = "Waiting for healing pending tasks." + self.logger.debug(logging_text + stage[1]) + exc = await self._wait_for_tasks( + logging_text, + tasks_dict_info, + self.timeout.ns_deploy, + stage, + nslcmop_id, + nsr_id=nsr_id, + ) + except asyncio.CancelledError: + error_list.append("Cancelled") + await self._cancel_pending_tasks(logging_text, tasks_dict_info) + await self._wait_for_tasks( logging_text, tasks_dict_info, self.timeout.ns_deploy, @@ -8088,17 +8097,22 @@ class NsLcm(LcmBase): nslcmop_id, nsr_id=nsr_id, ) - if exc: + if error_list: + error_detail = "; ".join(error_list) db_nslcmop_update[ "detailed-status" - ] = error_description_nslcmop = "FAILED {}: {}".format(step, exc) + ] = error_description_nslcmop = "FAILED {}: {}".format( + step, error_detail + ) nslcmop_operation_state = "FAILED" if db_nsr: db_nsr_update["operational-status"] = old_operational_status db_nsr_update["config-status"] = old_config_status db_nsr_update[ "detailed-status" - ] = "FAILED healing nslcmop={} {}: {}".format(nslcmop_id, step, exc) + ] = "FAILED healing nslcmop={} {}: {}".format( + nslcmop_id, step, error_detail + ) for task, task_name in tasks_dict_info.items(): if not task.done() or task.cancelled() or task.exception(): if task_name.startswith(self.task_name_deploy_vca): @@ -8298,10 +8312,7 @@ class NsLcm(LcmBase): vca_type = "native_charm" elif ee_item.get("helm-chart"): vca_name = ee_item["helm-chart"] - if ee_item.get("helm-version") and ee_item.get("helm-version") == "v2": - vca_type = "helm" - else: - vca_type = "helm-v3" + vca_type = "helm-v3" else: self.logger.debug( logging_text + "skipping non juju neither charm configuration" @@ -8633,7 +8644,7 @@ class NsLcm(LcmBase): # if SSH access is required, then get execution environment SSH public # if native charm we have waited already to VM be UP - if vca_type in ("k8s_proxy_charm", "lxc_proxy_charm", "helm", "helm-v3"): + if vca_type in ("k8s_proxy_charm", "lxc_proxy_charm", "helm-v3"): pub_key = None user = None # self.logger.debug("get ssh key block") @@ -8841,6 +8852,10 @@ class NsLcm(LcmBase): db_nslcmop = None db_nslcmop_update = {} nslcmop_operation_state = None + old_db_update = {} + q_filter = {} + old_vdu_index = None + old_flavor_id = None db_nsr_update = {} target = {} exc = None @@ -8864,6 +8879,59 @@ class NsLcm(LcmBase): ) db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id}) operationParams = db_nslcmop.get("operationParams") + # Update the VNFRS and NSRS with the requested flavour detail, So that ro tasks can function properly + db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) + db_flavor = db_nsr.get("flavor") + db_flavor_index = str(len(db_flavor)) + change_vnf_flavor_data = operationParams["changeVnfFlavorData"] + flavor_dict = change_vnf_flavor_data["additionalParams"] + count_index = flavor_dict["vduCountIndex"] + vdu_id_ref = flavor_dict["vduid"] + flavor_dict_update = { + "id": db_flavor_index, + "memory-mb": flavor_dict["virtualMemory"], + "name": f"{vdu_id_ref}-{count_index}-flv", + "storage-gb": flavor_dict["sizeOfStorage"], + "vcpu-count": flavor_dict["numVirtualCpu"], + } + db_flavor.append(flavor_dict_update) + db_update = {} + db_update["flavor"] = db_flavor + ns_q_filter = { + "_id": nsr_id, + } + self.db.set_one( + "nsrs", + q_filter=ns_q_filter, + update_dict=db_update, + fail_on_empty=True, + ) + db_vnfr = self.db.get_one( + "vnfrs", {"_id": change_vnf_flavor_data["vnfInstanceId"]} + ) + for vdu_index, vdur in enumerate(db_vnfr.get("vdur", ())): + if ( + vdur.get("count-index") == count_index + and vdur.get("vdu-id-ref") == vdu_id_ref + ): + old_flavor_id = vdur.get("ns-flavor-id", 0) + old_vdu_index = vdu_index + filter_text = { + "_id": change_vnf_flavor_data["vnfInstanceId"], + "vdur.count-index": count_index, + "vdur.vdu-id-ref": vdu_id_ref, + } + q_filter.update(filter_text) + db_update = {} + db_update[ + "vdur.{}.ns-flavor-id".format(vdu_index) + ] = db_flavor_index + self.db.set_one( + "vnfrs", + q_filter=q_filter, + update_dict=db_update, + fail_on_empty=True, + ) target = {} target.update(operationParams) desc = await self.RO.vertical_scale(nsr_id, target) @@ -8877,7 +8945,12 @@ class NsLcm(LcmBase): self.timeout.verticalscale, operation="verticalscale", ) - except (ROclient.ROClientException, DbException, LcmException) as e: + except ( + NgRoException, + ROclient.ROClientException, + DbException, + LcmException, + ) as e: self.logger.error("Exit Exception {}".format(e)) exc = e except asyncio.CancelledError: @@ -8898,6 +8971,9 @@ class NsLcm(LcmBase): if exc: db_nslcmop_update["detailed-status"] = "FAILED {}: {}".format(step, exc) nslcmop_operation_state = "FAILED" + old_db_update[ + "vdur.{}.ns-flavor-id".format(old_vdu_index) + ] = old_flavor_id else: nslcmop_operation_state = "COMPLETED" db_nslcmop_update["detailed-status"] = "Done" @@ -8910,6 +8986,16 @@ class NsLcm(LcmBase): operation_state=nslcmop_operation_state, other_update=db_nslcmop_update, ) + if old_vdu_index and old_db_update != {}: + self.logger.critical( + "Reverting Old Flavor -- : {}".format(old_db_update) + ) + self.db.set_one( + "vnfrs", + q_filter=q_filter, + update_dict=old_db_update, + fail_on_empty=True, + ) if nslcmop_operation_state: try: msg = {