from osm_lcm.data_utils.dict_utils import parse_yaml_strings
from osm_lcm.data_utils.database.vim_account import VimAccountDB
from n2vc.definitions import RelationEndpoint
-from n2vc.k8s_helm_conn import K8sHelmConnector
from n2vc.k8s_helm3_conn import K8sHelm3Connector
from n2vc.k8s_juju_conn import K8sJujuConnector
SUBOPERATION_STATUS_NOT_FOUND = -1
SUBOPERATION_STATUS_NEW = -2
SUBOPERATION_STATUS_SKIP = -3
+ EE_TLS_NAME = "ee-tls"
task_name_deploy_vca = "Deploying VCA"
+ rel_operation_types = {
+ "GE": ">=",
+ "LE": "<=",
+ "GT": ">",
+ "LT": "<",
+ "EQ": "==",
+ "NE": "!=",
+ }
def __init__(self, msg, lcm_tasks, config: LcmCfg):
"""
on_update_db=self._on_update_n2vc_db,
)
- self.k8sclusterhelm2 = K8sHelmConnector(
- kubectl_command=self.vca_config.kubectlpath,
- helm_command=self.vca_config.helmpath,
- log=self.logger,
- on_update_db=None,
- fs=self.fs,
- db=self.db,
- )
-
self.k8sclusterhelm3 = K8sHelm3Connector(
kubectl_command=self.vca_config.kubectlpath,
helm_command=self.vca_config.helm3path,
)
self.k8scluster_map = {
- "helm-chart": self.k8sclusterhelm2,
"helm-chart-v3": self.k8sclusterhelm3,
"chart": self.k8sclusterhelm3,
"juju-bundle": self.k8sclusterjuju,
)
vdur = next((vdur for vdur in target_vnf.get("vdur", ())), None)
if not vdur:
- return
+ continue
for a_index, a_vld in enumerate(target["ns"]["vld"]):
target_vld = find_in_list(
get_iterable(vdur, "interfaces"),
vca_id = self.get_vca_id(db_vnfr, db_nsr)
# create or register execution environment in VCA
- if vca_type in ("lxc_proxy_charm", "k8s_proxy_charm", "helm", "helm-v3"):
+ if vca_type in ("lxc_proxy_charm", "k8s_proxy_charm", "helm-v3"):
self._write_configuration_status(
nsr_id=nsr_id,
vca_index=vca_index,
db_dict=db_dict,
vca_id=vca_id,
)
- elif vca_type == "helm" or vca_type == "helm-v3":
+ elif vca_type == "helm-v3":
ee_id, credentials = await self.vca_map[
vca_type
].create_execution_environment(
- namespace=namespace,
+ namespace=nsr_id,
reuse_ee_id=ee_id,
db_dict=db_dict,
config=osm_config,
# if SSH access is required, then get execution environment SSH public
# if native charm we have waited already to VM be UP
- if vca_type in ("k8s_proxy_charm", "lxc_proxy_charm", "helm", "helm-v3"):
+ if vca_type in ("k8s_proxy_charm", "lxc_proxy_charm", "helm-v3"):
pub_key = None
user = None
# self.logger.debug("get ssh key block")
# TODO register in database that primitive is done
# STEP 7 Configure metrics
- if vca_type == "helm" or vca_type == "helm-v3":
+ if vca_type == "helm-v3":
# TODO: review for those cases where the helm chart is a reference and
# is not part of the NF package
prometheus_jobs = await self.extract_prometheus_scrape_jobs(
df = vnfd.get("df", [{}])[0]
# Checking for auto-scaling configuration
if "scaling-aspect" in df:
- rel_operation_types = {
- "GE": ">=",
- "LE": "<=",
- "GT": ">",
- "LT": "<",
- "EQ": "==",
- "NE": "!=",
- }
scaling_aspects = df["scaling-aspect"]
all_vnfd_monitoring_params = {}
for ivld in vnfd.get("int-virtual-link-desc", ()):
operation = scaling_criteria[
"scale-in-relational-operation"
]
- rel_operator = rel_operation_types.get(operation, "<=")
+ rel_operator = self.rel_operation_types.get(
+ operation, "<="
+ )
metric_selector = f'{metric_name}{{ns_id="{nsr_id}", vnf_member_index="{vnf_member_index}", vdu_id="{vdu_id}"}}'
expression = f"(count ({metric_selector}) > {instances_min_number}) and (avg({metric_selector}) {rel_operator} {scalein_threshold})"
labels = {
operation = scaling_criteria[
"scale-out-relational-operation"
]
- rel_operator = rel_operation_types.get(operation, "<=")
+ rel_operator = self.rel_operation_types.get(
+ operation, "<="
+ )
metric_selector = f'{metric_name}{{ns_id="{nsr_id}", vnf_member_index="{vnf_member_index}", vdu_id="{vdu_id}"}}'
expression = f"(count ({metric_selector}) < {instances_max_number}) and (avg({metric_selector}) {rel_operator} {scaleout_threshold})"
labels = {
alerts.append(alert)
return alerts
+ def _gather_vnfr_alarm_alerts(self, vnfr, vnfd):
+ alerts = []
+ nsr_id = vnfr["nsr-id-ref"]
+ vnf_member_index = vnfr["member-vnf-index-ref"]
+
+ # Checking for VNF alarm configuration
+ for vdur in vnfr["vdur"]:
+ vdu_id = vdur["vdu-id-ref"]
+ vdu = next(filter(lambda vdu: vdu["id"] == vdu_id, vnfd["vdu"]))
+ if "alarm" in vdu:
+ # Get VDU monitoring params, since alerts are based on them
+ vdu_monitoring_params = {}
+ for mp in vdu.get("monitoring-parameter", []):
+ vdu_monitoring_params[mp.get("id")] = mp
+ if not vdu_monitoring_params:
+ self.logger.error(
+ "VDU alarm refers to a VDU monitoring param, but there are no VDU monitoring params in the VDU"
+ )
+ continue
+ # Get alarms in the VDU
+ alarm_descriptors = vdu["alarm"]
+ # Create VDU alarms for each alarm in the VDU
+ for alarm_descriptor in alarm_descriptors:
+ # Check that the VDU alarm refers to a proper monitoring param
+ alarm_monitoring_param = alarm_descriptor.get(
+ "vnf-monitoring-param-ref", ""
+ )
+ vdu_specific_monitoring_param = vdu_monitoring_params.get(
+ alarm_monitoring_param, {}
+ )
+ if not vdu_specific_monitoring_param:
+ self.logger.error(
+ "VDU alarm refers to a VDU monitoring param not present in the VDU"
+ )
+ continue
+ metric_name = vdu_specific_monitoring_param.get(
+ "performance-metric"
+ )
+ if not metric_name:
+ self.logger.error(
+ "VDU alarm refers to a VDU monitoring param that has no associated performance-metric"
+ )
+ continue
+ # Set params of the alarm to be created in Prometheus
+ metric_name = f"osm_{metric_name}"
+ metric_threshold = alarm_descriptor.get("value")
+ uuid = str(uuid4())
+ alert_name = f"vdu_alarm_{uuid}"
+ operation = alarm_descriptor["operation"]
+ rel_operator = self.rel_operation_types.get(operation, "<=")
+ metric_selector = f'{metric_name}{{ns_id="{nsr_id}", vnf_member_index="{vnf_member_index}", vdu_id="{vdu_id}"}}'
+ expression = f"{metric_selector} {rel_operator} {metric_threshold}"
+ labels = {
+ "ns_id": nsr_id,
+ "vnf_member_index": vnf_member_index,
+ "vdu_id": vdu_id,
+ "vdu_name": "{{ $labels.vdu_name }}",
+ }
+ prom_cfg = {
+ "alert": alert_name,
+ "expr": expression,
+ "for": "1m", # default value. Ideally, this should be related to an IM param, but there is not such param
+ "labels": labels,
+ }
+ alarm_action = dict()
+ for action_type in ["ok", "insufficient-data", "alarm"]:
+ if (
+ "actions" in alarm_descriptor
+ and action_type in alarm_descriptor["actions"]
+ ):
+ alarm_action[action_type] = alarm_descriptor["actions"][
+ action_type
+ ]
+ alert = {
+ "uuid": uuid,
+ "name": alert_name,
+ "metric": metric_name,
+ "tags": {
+ "ns_id": nsr_id,
+ "vnf_member_index": vnf_member_index,
+ "vdu_id": vdu_id,
+ },
+ "alarm_status": "ok",
+ "action_type": "vdu_alarm",
+ "action": alarm_action,
+ "prometheus_config": prom_cfg,
+ }
+ alerts.append(alert)
+ return alerts
+
def update_nsrs_with_pla_result(self, params):
try:
nslcmop_id = deep_get(params, ("placement", "nslcmopId"))
# create namespace and certificate if any helm based EE is present in the NS
if check_helm_ee_in_ns(db_vnfds):
- # TODO: create EE namespace
+ await self.vca_map["helm-v3"].setup_ns_namespace(
+ name=nsr_id,
+ )
# create TLS certificates
await self.vca_map["helm-v3"].create_tls_certificate(
- secret_name="ee-tls-{}".format(nsr_id),
+ secret_name=self.EE_TLS_NAME,
dns_prefix="*",
nsr_id=nsr_id,
usage="server auth",
+ namespace=nsr_id,
)
nsi_id = None # TODO put nsi_id when this nsr belongs to a NSI
stage[1] = stage[2] = ""
except asyncio.CancelledError:
error_list.append("Cancelled")
- # TODO cancel all tasks
+ await self._cancel_pending_tasks(logging_text, tasks_dict_info)
+ await self._wait_for_tasks(
+ logging_text,
+ tasks_dict_info,
+ timeout_ns_deploy,
+ stage,
+ nslcmop_id,
+ nsr_id=nsr_id,
+ )
except Exception as exc:
error_list.append(str(exc))
self.logger.info(f"Storing scaling alert in MongoDB: {alert}")
self.db.create("alerts", alert)
+ alarm_alerts = self._gather_vnfr_alarm_alerts(vnfr, vnfd)
+ for alert in alarm_alerts:
+ self.logger.info(f"Storing VNF alarm alert in MongoDB: {alert}")
+ self.db.create("alerts", alert)
if db_nsr:
self._write_ns_status(
nsr_id=nsr_id,
"nsr_id": nsr_id,
"nslcmop_id": nslcmop_id,
"operationState": nslcmop_operation_state,
+ "startTime": db_nslcmop["startTime"],
+ "links": db_nslcmop["links"],
+ "operationParams": {
+ "nsInstanceId": nsr_id,
+ "nsdId": db_nsr["nsd-id"],
+ },
},
)
except Exception as e:
vnfr_data.get("_id"),
{"kdur.{}.status".format(kdu_index): "ERROR"},
)
- except Exception:
+ except Exception as error:
# ignore to keep original exception
- pass
+ self.logger.warning(
+ f"An exception occurred while updating DB: {str(error)}"
+ )
# reraise original error
raise
k8scluster_id_2_uuic = {
"helm-chart-v3": {},
- "helm-chart": {},
"juju-bundle": {},
}
# Default version: helm3, if helm-version is v2 assign v2
k8sclustertype = "helm-chart-v3"
self.logger.debug("kdur: {}".format(kdur))
- if (
- kdur.get("helm-version")
- and kdur.get("helm-version") == "v2"
- ):
- k8sclustertype = "helm-chart"
elif kdur.get("juju-bundle"):
kdumodel = kdur["juju-bundle"]
k8sclustertype = "juju-bundle"
kdumodel = self.fs.path + filename
except (asyncio.TimeoutError, asyncio.CancelledError):
raise
- except Exception: # it is not a file
- pass
+ except Exception as e: # it is not a file
+ self.logger.warning(f"An exception occurred: {str(e)}")
k8s_cluster_id = kdur["k8s-cluster"]["id"]
step = "Synchronize repos for k8s cluster '{}'".format(
vca_type = "native_charm"
elif ee_item.get("helm-chart"):
vca_name = ee_item["helm-chart"]
- if ee_item.get("helm-version") and ee_item.get("helm-version") == "v2":
- vca_type = "helm"
- else:
- vca_type = "helm-v3"
+ vca_type = "helm-v3"
else:
self.logger.debug(
logging_text + "skipping non juju neither charm configuration"
) and vca.get("needed_terminate")
# For helm we must destroy_ee. Also for native_charm, as juju_model cannot be deleted if there are
# pending native charms
- destroy_ee = (
- True if vca_type in ("helm", "helm-v3", "native_charm") else False
- )
+ destroy_ee = True if vca_type in ("helm-v3", "native_charm") else False
# self.logger.debug(logging_text + "vca_index: {}, ee_id: {}, vca_type: {} destroy_ee: {}".format(
# vca_index, vca.get("ee_id"), vca_type, destroy_ee))
task = asyncio.ensure_future(
# Delete Namespace and Certificates if necessary
if check_helm_ee_in_ns(list(db_vnfds_from_member_index.values())):
await self.vca_map["helm-v3"].delete_tls_certificate(
- certificate_name=db_nslcmop["nsInstanceId"],
+ namespace=db_nslcmop["nsInstanceId"],
+ certificate_name=self.EE_TLS_NAME,
+ )
+ await self.vca_map["helm-v3"].delete_namespace(
+ namespace=db_nslcmop["nsInstanceId"],
)
- # TODO: Delete namespace
# Delete from k8scluster
stage[1] = "Deleting KDUs."
stage[1] = stage[2] = ""
except asyncio.CancelledError:
error_list.append("Cancelled")
- # TODO cancell all tasks
+ await self._cancel_pending_tasks(logging_text, tasks_dict_info)
+ await self._wait_for_tasks(
+ logging_text,
+ tasks_dict_info,
+ timeout_ns_terminate,
+ stage,
+ nslcmop_id,
+ )
except Exception as exc:
error_list.append(str(exc))
# update status at database
self._write_op_status(nslcmop_id, stage)
return error_detail_list
+ async def _cancel_pending_tasks(self, logging_text, created_tasks_info):
+ for task, name in created_tasks_info.items():
+ self.logger.debug(logging_text + "Cancelling task: " + name)
+ task.cancel()
+
@staticmethod
def _map_primitive_params(primitive_desc, params, instantiation_params):
"""
kdu_action = (
True
if primitive_name in actions
- and kdu["k8scluster-type"] not in ("helm-chart", "helm-chart-v3")
+ and kdu["k8scluster-type"] != "helm-chart-v3"
else False
)
# add chart to list and all parameters
step = "Getting helm chart name"
chart_name = ee_item.get("helm-chart")
- if (
- ee_item.get("helm-version")
- and ee_item.get("helm-version") == "v2"
- ):
- vca_type = "helm"
- else:
- vca_type = "helm-v3"
+ vca_type = "helm-v3"
step = "Setting Helm chart artifact paths"
helm_artifacts.append(
if kdur.get("helm-chart"):
k8s_cluster_type = "helm-chart-v3"
self.logger.debug("kdur: {}".format(kdur))
- if (
- kdur.get("helm-version")
- and kdur.get("helm-version") == "v2"
- ):
- k8s_cluster_type = "helm-chart"
elif kdur.get("juju-bundle"):
k8s_cluster_type = "juju-bundle"
else:
if kdur.get("helm-chart"):
k8s_cluster_type = "helm-chart-v3"
self.logger.debug("kdur: {}".format(kdur))
- if (
- kdur.get("helm-version")
- and kdur.get("helm-version") == "v2"
- ):
- k8s_cluster_type = "helm-chart"
elif kdur.get("juju-bundle"):
k8s_cluster_type = "juju-bundle"
else:
exc_info=True,
)
finally:
+ error_list = list()
+ if exc:
+ error_list.append(str(exc))
self._write_ns_status(
nsr_id=nsr_id,
ns_state=None,
current_operation="IDLE",
current_operation_id=None,
)
- if tasks_dict_info:
- stage[1] = "Waiting for instantiate pending tasks."
- self.logger.debug(logging_text + stage[1])
- exc = await self._wait_for_tasks(
+ try:
+ if tasks_dict_info:
+ stage[1] = "Waiting for instantiate pending tasks."
+ self.logger.debug(logging_text + stage[1])
+ exc = await self._wait_for_tasks(
+ logging_text,
+ tasks_dict_info,
+ self.timeout.ns_deploy,
+ stage,
+ nslcmop_id,
+ nsr_id=nsr_id,
+ )
+ except asyncio.CancelledError:
+ error_list.append("Cancelled")
+ await self._cancel_pending_tasks(logging_text, tasks_dict_info)
+ await self._wait_for_tasks(
logging_text,
tasks_dict_info,
self.timeout.ns_deploy,
nslcmop_id,
nsr_id=nsr_id,
)
- if exc:
+ if error_list:
+ error_detail = "; ".join(error_list)
db_nslcmop_update[
"detailed-status"
- ] = error_description_nslcmop = "FAILED {}: {}".format(step, exc)
+ ] = error_description_nslcmop = "FAILED {}: {}".format(
+ step, error_detail
+ )
nslcmop_operation_state = "FAILED"
if db_nsr:
db_nsr_update["operational-status"] = old_operational_status
db_nsr_update[
"detailed-status"
] = "FAILED scaling nslcmop={} {}: {}".format(
- nslcmop_id, step, exc
+ nslcmop_id, step, error_detail
)
else:
error_description_nslcmop = None
f"Timeout waiting KDU with name={kdu_name} and index={kdu_index} to be intantiated"
)
- # TODO get_service
if ee_id is not None:
- _, _, service = ee_id.partition(".") # remove prefix "namespace."
- host_name = "{}-{}".format(service, ee_config_descriptor["metric-service"])
+ _, namespace, helm_id = get_ee_id_parts(
+ ee_id
+ ) # get namespace and EE gRPC service name
+ host_name = f'{helm_id}-{ee_config_descriptor["metric-service"]}.{namespace}.svc' # svc_name.namespace.svc
host_port = "80"
vnfr_id = vnfr_id.replace("-", "")
variables = {
exc_info=True,
)
finally:
- if tasks_dict_info:
- stage[1] = "Waiting for healing pending tasks."
- self.logger.debug(logging_text + stage[1])
- exc = await self._wait_for_tasks(
+ error_list = list()
+ if exc:
+ error_list.append(str(exc))
+ try:
+ if tasks_dict_info:
+ stage[1] = "Waiting for healing pending tasks."
+ self.logger.debug(logging_text + stage[1])
+ exc = await self._wait_for_tasks(
+ logging_text,
+ tasks_dict_info,
+ self.timeout.ns_deploy,
+ stage,
+ nslcmop_id,
+ nsr_id=nsr_id,
+ )
+ except asyncio.CancelledError:
+ error_list.append("Cancelled")
+ await self._cancel_pending_tasks(logging_text, tasks_dict_info)
+ await self._wait_for_tasks(
logging_text,
tasks_dict_info,
self.timeout.ns_deploy,
nslcmop_id,
nsr_id=nsr_id,
)
- if exc:
+ if error_list:
+ error_detail = "; ".join(error_list)
db_nslcmop_update[
"detailed-status"
- ] = error_description_nslcmop = "FAILED {}: {}".format(step, exc)
+ ] = error_description_nslcmop = "FAILED {}: {}".format(
+ step, error_detail
+ )
nslcmop_operation_state = "FAILED"
if db_nsr:
db_nsr_update["operational-status"] = old_operational_status
db_nsr_update["config-status"] = old_config_status
db_nsr_update[
"detailed-status"
- ] = "FAILED healing nslcmop={} {}: {}".format(nslcmop_id, step, exc)
+ ] = "FAILED healing nslcmop={} {}: {}".format(
+ nslcmop_id, step, error_detail
+ )
for task, task_name in tasks_dict_info.items():
if not task.done() or task.cancelled() or task.exception():
if task_name.startswith(self.task_name_deploy_vca):
vca_type = "native_charm"
elif ee_item.get("helm-chart"):
vca_name = ee_item["helm-chart"]
- if ee_item.get("helm-version") and ee_item.get("helm-version") == "v2":
- vca_type = "helm"
- else:
- vca_type = "helm-v3"
+ vca_type = "helm-v3"
else:
self.logger.debug(
logging_text + "skipping non juju neither charm configuration"
# if SSH access is required, then get execution environment SSH public
# if native charm we have waited already to VM be UP
- if vca_type in ("k8s_proxy_charm", "lxc_proxy_charm", "helm", "helm-v3"):
+ if vca_type in ("k8s_proxy_charm", "lxc_proxy_charm", "helm-v3"):
pub_key = None
user = None
# self.logger.debug("get ssh key block")