From 6500339f6d90776325acc9973f0c8243b62b9439 Mon Sep 17 00:00:00 2001 From: garciaale Date: Wed, 21 Oct 2020 15:21:37 -0300 Subject: [PATCH] Updates branch sol006 with master Change-Id: Ic562cbb8273139da0ff20389eec14bfca8c6f5ac Signed-off-by: garciaale --- .gitignore-common | 24 +- Dockerfile | 2 +- Dockerfile.fromdeb | 1 + Dockerfile.local | 49 +- Makefile | 4 +- debian/python3-osm-lcm.postinst | 13 +- devops-stages/stage-build.sh | 3 +- devops-stages/stage-test.sh | 2 +- nose2.cfg | 31 + osm_lcm/ROclient.py | 312 ++- osm_lcm/__init__.py | 11 +- osm_lcm/frontend_grpc.py | 73 + osm_lcm/frontend_pb2.py | 250 ++ osm_lcm/lcm.cfg | 18 + osm_lcm/lcm.py | 170 +- osm_lcm/lcm_hc.py | 48 + osm_lcm/lcm_helm_conn.py | 502 ++++ osm_lcm/lcm_utils.py | 56 +- osm_lcm/netslice.py | 81 +- osm_lcm/ng_ro.py | 189 ++ osm_lcm/ns.py | 3775 +++++++++++++++++--------- osm_lcm/prometheus.py | 220 ++ osm_lcm/tests/test_db_descriptors.py | 105 +- osm_lcm/tests/test_lcm_helm_conn.py | 103 + osm_lcm/tests/test_ns.py | 279 +- osm_lcm/tests/test_prometheus.py | 130 + osm_lcm/vim_sdn.py | 472 ++-- requirements.txt | 5 +- setup.py | 12 +- stdeb.cfg | 21 +- tox.ini | 30 +- 31 files changed, 5088 insertions(+), 1903 deletions(-) create mode 100644 nose2.cfg create mode 100644 osm_lcm/frontend_grpc.py create mode 100644 osm_lcm/frontend_pb2.py create mode 100644 osm_lcm/lcm_hc.py create mode 100644 osm_lcm/lcm_helm_conn.py create mode 100644 osm_lcm/ng_ro.py create mode 100644 osm_lcm/prometheus.py create mode 100644 osm_lcm/tests/test_lcm_helm_conn.py create mode 100644 osm_lcm/tests/test_prometheus.py diff --git a/.gitignore-common b/.gitignore-common index 4a339f3..2e84215 100644 --- a/.gitignore-common +++ b/.gitignore-common @@ -18,33 +18,43 @@ *.pyc *.pyo -#auto-ignore +# auto-ignore .gitignore -#logs +# logs logs -#pycharm +# pycharm .idea +venv -#eclipse +# tox +.tox + +# eclipse .project .pydevproject .settings -#local stuff files that end in ".local" or folders called "local" +# local stuff files that end in ".local" or folders called "local" local osm_lcm/local osm_lcm/test/local -#local stuff files that end in ".temp" or folders called "temp" +# local stuff files that end in ".temp" or folders called "temp" *.temp osm_lcm/temp osm_lcm/test/temp -#distribution and package generation +# distribution and package generation build dist *.egg-info .eggs +# Unit test / coverage reports +.coverage +cover +coverage.xml +nosetests.xml + diff --git a/Dockerfile b/Dockerfile index b0e81bb..79c1a50 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,7 +18,7 @@ # Use Dockerfile.local for running osm/LCM in a docker container from source -FROM ubuntu:16.04 +FROM ubuntu:18.04 RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get --yes install git tox make debhelper wget \ diff --git a/Dockerfile.fromdeb b/Dockerfile.fromdeb index d49995a..95b0ff3 100644 --- a/Dockerfile.fromdeb +++ b/Dockerfile.fromdeb @@ -63,6 +63,7 @@ ENV OSMLCM_VCA_HOST vca ENV OSMLCM_VCA_PORT: 17070 ENV OSMLCM_VCA_USER: admin ENV OSMLCM_VCA_SECRET: secret +ENV OSMLCM_VCA_CLOUD: localhost # database ENV OSMLCM_DATABASE_DRIVER mongo diff --git a/Dockerfile.local b/Dockerfile.local index 2043879..40994c0 100644 --- a/Dockerfile.local +++ b/Dockerfile.local @@ -13,12 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM ubuntu:16.04 +FROM ubuntu:18.04 # Set the working directory to /app WORKDIR /app/LCM -RUN apt-get update && apt-get install -y curl xz-utils \ +RUN apt-get update && apt-get install -y curl xz-utils gnupg2 \ && apt-get update && apt-get install -y apt-transport-https \ && curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - \ && echo "deb https://apt.kubernetes.io/ kubernetes-xenial main" | tee -a /etc/apt/sources.list.d/kubernetes.list \ @@ -29,14 +29,14 @@ RUN curl https://get.helm.sh/helm-v2.15.2-linux-amd64.tar.gz --output helm-v2.15 && mv linux-amd64/helm /usr/local/bin/helm \ && rm -r linux-amd64/ -RUN curl -L https://launchpad.net/juju/2.7/2.7.0/+download/juju-2.7.0-k8s.tar.xz --output juju-2.7.0-k8s.tar.xz \ - && tar -xvf juju-2.7.0-k8s.tar.xz \ +RUN curl -L https://launchpad.net/juju/2.7/2.7.6/+download/juju-2.7.6-k8s.tar.xz --output juju-2.7.6-k8s.tar.xz \ + && tar -xvf juju-2.7.6-k8s.tar.xz \ && mv juju /usr/local/bin/juju RUN apt-get update && apt-get install -y git tox python3 \ - python3-pip python3-aiohttp python3-jinja2 \ + python3-pip python3-jinja2 \ && python3 -m pip install pip --upgrade \ - && python3 -m pip install -U 'PyYAML==3.*' 'aiohttp==0.20.2' flake8 + && python3 -m pip install -U 'PyYAML' 'aiohttp>=2.3.10' flake8 RUN git -C /app clone https://osm.etsi.org/gerrit/osm/N2VC.git \ && python3 -m pip install -e /app/N2VC \ @@ -49,6 +49,8 @@ RUN git -C /app clone https://osm.etsi.org/gerrit/osm/common.git \ && python3 -m pip install -e /app/common # python3-pymongo python3-yaml pycrypto aiokafka +RUN python3 -m pip install grpcio-tools grpclib + RUN mkdir -p /app/storage/kafka && mkdir -p /app/log @@ -69,11 +71,19 @@ ENV OSMLCM_RO_TENANT osm # VCA ENV OSMLCM_VCA_HOST vca -ENV OSMLCM_VCA_PORT: 17070 -ENV OSMLCM_VCA_USER: admin -ENV OSMLCM_VCA_SECRET: secret -# ENV OSMLCM_VCA_PUBKEY: pubkey -# ENV OSMLCM_VCA_CACERT: cacert +ENV OSMLCM_VCA_PORT 17070 +ENV OSMLCM_VCA_USER admin +ENV OSMLCM_VCA_SECRET secret +ENV OSMLCM_VCA_CLOUD: localhost +# ENV OSMLCM_VCA_ENABLEOSUPGRADE false +# ENV OSMLCM_VCA_APTMIRROR http://archive.ubuntu.com/ubuntu/ +# ENV OSMLCM_VCA_PUBKEY pubkey +# ENV OSMLCM_VCA_CACERT cacert + +# VCA - k8s +ENV OSMLCM_VCA_HELMPATH /usr/local/bin/helm +ENV OSMLCM_VCA_KUBECTLPATH /usr/bin/kubectl +ENV OSMLCM_VCA_JUJUPATH /usr/local/bin/juju # database ENV OSMLCM_DATABASE_DRIVER mongo @@ -94,20 +104,25 @@ ENV OSMLCM_MESSAGE_DRIVER kafka ENV OSMLCM_MESSAGE_HOST kafka ENV OSMLCM_MESSAGE_PORT 9092 -# k8s -ENV OSMLCM_VCA_HELMPATH /usr/local/bin/helm -ENV OSMLCM_VCA_KUBECTLPATH /usr/bin/kubectl -ENV OSMLCM_VCA_JUJUPATH /usr/local/bin/juju +# tsdb +# ENV OSMLCM_TSDB_DRIVER prometheus +# ENV OSMLCM_TSDB_PATH /etc/prometheus +# ENV OSMLCM_TSDB_URI http://prometheus:9090/ # logs # ENV OSMLCM_GLOBAL_LOGFILE /app/log/lcm.log ENV OSMLCM_GLOBAL_LOGLEVEL DEBUG +# timeouts +# ENV OSMLCM_TIMEOUT_NS_DEPLOY 7200 +# ENV OSMLCM_TIMEOUT_NSI_DEPLOY 7200 + # Copy the current directory contents into the container at /app/LCM ADD . /app/LCM # Run app.py when the container launches CMD python3 -m osm_lcm.lcm -# HEALTHCHECK --interval=120s --timeout=15s --retries=1 \ -# CMD python3 -m osm_lcm.lcm --health-check || exit 1 +# HEALTHCHECK --start-period=120s --interval=30s --timeout=30s --retries=1 \ +# CMD python3 -m osm_lcm.lcm_hc || exit 1 + diff --git a/Makefile b/Makefile index 0ee3ddd..cdbb183 100644 --- a/Makefile +++ b/Makefile @@ -20,8 +20,8 @@ clean: package: python3 setup.py --command-packages=stdeb.command sdist_dsc - # cp debian/python3-osm-lcm.postinst deb_dist/osm-lcm*/debian - cd deb_dist/osm-lcm*/debian && echo "osm-common python3-osm-common" > py3dist-overrides + cp debian/python3-osm-lcm.postinst deb_dist/osm-lcm*/debian + # cd deb_dist/osm-lcm*/debian && echo "osm-common python3-osm-common" > py3dist-overrides # cd deb_dist/osm-lcm*/debian && echo "pip3 python3-pip" >> py3dist-overrides cd deb_dist/osm-lcm*/ && dpkg-buildpackage -rfakeroot -uc -us diff --git a/debian/python3-osm-lcm.postinst b/debian/python3-osm-lcm.postinst index 7ee6c6c..8a7082c 100755 --- a/debian/python3-osm-lcm.postinst +++ b/debian/python3-osm-lcm.postinst @@ -18,12 +18,11 @@ ## echo "POST INSTALL OSM-LCM" -# Currently it is not needed pip3 installation -# echo "Installing python dependencies via pip..." -# pip3 install pip==9.0.3 -# pip3 install --user aiokafka +echo "Installing python dependencies grpcio-tools grpclib via pip..." +echo +python3 -m pip install -U pip +python3 -m pip install grpcio-tools grpclib -#Creation of log folder -mkdir -p /var/log/osm +# Creation of log folder +# mkdir -p /var/log/osm -# systemctl enable osm-lcm.service diff --git a/devops-stages/stage-build.sh b/devops-stages/stage-build.sh index b6d8a45..2db54db 100755 --- a/devops-stages/stage-build.sh +++ b/devops-stages/stage-build.sh @@ -16,6 +16,5 @@ # For the moment it is not needed any post-intall action, so tox instead of Makefile is used -make clean # rm -rf deb_dist -tox -e build # make package +make clean package diff --git a/devops-stages/stage-test.sh b/devops-stages/stage-test.sh index e637200..36e445d 100755 --- a/devops-stages/stage-test.sh +++ b/devops-stages/stage-test.sh @@ -13,5 +13,5 @@ # License for the specific language governing permissions and limitations # under the License. ## - +rm -f nosetests.xml tox # flake8 unittest diff --git a/nose2.cfg b/nose2.cfg new file mode 100644 index 0000000..ec96ace --- /dev/null +++ b/nose2.cfg @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +## +# Copyright ETSI +# This file is part of ETSI OSM +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# For those usages not covered by the Apache License, Version 2.0 please +# contact: osslegalrouting@vmware.com +## + +[unittest] +plugins = nose2.plugins.junitxml + +[junit-xml] +always-on = True +keep_restricted = False +path = nosetests.xml +test_fullname = False + diff --git a/osm_lcm/ROclient.py b/osm_lcm/ROclient.py index 6198527..cf11490 100644 --- a/osm_lcm/ROclient.py +++ b/osm_lcm/ROclient.py @@ -112,14 +112,14 @@ class ROClient: 'wim': ("name", "wim_url"), 'vim_account': (), 'wim_account': (), - 'sdn': ("name", "port", 'ip', 'dpid', 'type'), + 'sdn': ("name", 'type'), } timeout_large = 120 timeout_short = 30 - def __init__(self, loop, endpoint_url, **kwargs): + def __init__(self, loop, uri, **kwargs): self.loop = loop - self.endpoint_url = endpoint_url + self.uri = uri self.username = kwargs.get("username") self.password = kwargs.get("password") @@ -127,7 +127,7 @@ class ROClient: self.tenant = None self.datacenter_id_name = kwargs.get("datacenter") self.datacenter = None - logger_name = kwargs.get('logger_name', 'ROClient') + logger_name = kwargs.get('logger_name', 'lcm.ro') self.logger = logging.getLogger(logger_name) if kwargs.get("loglevel"): self.logger.setLevel(kwargs["loglevel"]) @@ -143,11 +143,11 @@ class ROClient: return self.username elif index == 'password': return self.password - elif index == 'endpoint_url': - return self.endpoint_url + elif index == 'uri': + return self.uri else: raise KeyError("Invalid key '{}'".format(index)) - + def __setitem__(self, index, value): if index == 'tenant': self.tenant_id_name = value @@ -157,14 +157,15 @@ class ROClient: self.username = value elif index == 'password': self.password = value - elif index == 'endpoint_url': - self.endpoint_url = value + elif index == 'uri': + self.uri = value else: raise KeyError("Invalid key '{}'".format(index)) self.tenant = None # force to reload tenant with different credentials self.datacenter = None # force to reload datacenter with different credentials - def _parse(self, descriptor, descriptor_format, response=False): + @staticmethod + def _parse(descriptor, descriptor_format, response=False): if descriptor_format and descriptor_format != "json" and descriptor_format != "yaml": raise ROClientException("'descriptor_format' must be a 'json' or 'yaml' text") if descriptor_format != "json": @@ -186,8 +187,18 @@ class ROClient: if response: raise ROClientException(error_text) raise ROClientException(error_text) - - def _parse_yaml(self, descriptor, response=False): + + @staticmethod + def _parse_error_yaml(descriptor): + json_error = None + try: + json_error = yaml.load(descriptor, Loader=yaml.Loader) + return json_error["error"]["description"] + except Exception: + return str(json_error or descriptor) + + @staticmethod + def _parse_yaml(descriptor, response=False): try: return yaml.load(descriptor, Loader=yaml.Loader) except yaml.YAMLError as exc: @@ -280,45 +291,66 @@ class ROClient: :param ns_descriptor: instance descriptor obtained with self.show("ns", ) :return: status, message: status can be BUILD,ACTIVE,ERROR, message is a text message """ - net_total = 0 - vm_total = 0 - net_done = 0 - vm_done = 0 + error_list = [] + total = {"VMs": 0, "networks": 0, "SDN_networks": 0} + done = {"VMs": 0, "networks": 0, "SDN_networks": 0} - def _get_ref(desc): # return an identification for the network or vm. Try vim_id if exist, if not descriptor id - # for net + def _get_ref(desc): + # return an identification for the network or vm. Try vim_id if exist, if not descriptor id for net if desc.get("vim_net_id"): - return "'vim-id={}'".format(desc["vim_net_id"]) + return "'vim-net-id={}'".format(desc["vim_net_id"]) elif desc.get("ns_net_osm_id"): return "'nsd-vld-id={}'".format(desc["ns_net_osm_id"]) elif desc.get("vnf_net_osm_id"): return "'vnfd-vld-id={}'".format(desc["vnf_net_osm_id"]) # for VM elif desc.get("vim_vm_id"): - return "'vim-id={}'".format(desc["vim_vm_id"]) + return "'vim-vm-id={}'".format(desc["vim_vm_id"]) elif desc.get("vdu_osm_id"): return "'vnfd-vdu-id={}'".format(desc["vdu_osm_id"]) else: return "" - for net in ns_descriptor["nets"]: - net_total += 1 - if net["status"] in ("ERROR", "VIM_ERROR"): - return "ERROR", "VIM network ({}) on error: {}".format(_get_ref(net), net["error_msg"]) - elif net["status"] == "ACTIVE": - net_done += 1 - for vnf in ns_descriptor["vnfs"]: - for vm in vnf["vms"]: - vm_total += 1 - if vm["status"] in ("ERROR", "VIM_ERROR"): - return "ERROR", "VIM VM ({}) on error: {}".format(_get_ref(vm), vm["error_msg"]) - elif vm["status"] == "ACTIVE": - vm_done += 1 - - if net_total == net_done and vm_total == vm_done: - return "ACTIVE", "VMs {}, networks: {}".format(vm_total, net_total) - else: - return "BUILD", "VMs: {}/{}, networks: {}/{}".format(vm_done, vm_total, net_done, net_total) + def _get_sdn_ref(sce_net_id): + # look for the network associated to the SDN network and obtain the identification + net = next((x for x in ns_descriptor["nets"] if x.get("sce_net_id") == sce_net_id), None) + if not sce_net_id or not net: + return "" + return _get_ref(net) + + try: + total["networks"] = len(ns_descriptor["nets"]) + for net in ns_descriptor["nets"]: + if net["status"] in ("ERROR", "VIM_ERROR"): + error_list.append("Error at VIM network {}: {}".format(_get_ref(net), net["error_msg"])) + elif net["status"] == "ACTIVE": + done["networks"] += 1 + + total["SDN_networks"] = len(ns_descriptor["sdn_nets"]) + for sdn_net in ns_descriptor["sdn_nets"]: + if sdn_net["status"] in ("ERROR", "VIM_ERROR", "WIM_ERROR"): + error_list.append("Error at SDN network {}: {}".format(_get_sdn_ref(sdn_net.get("sce_net_id")), + sdn_net["error_msg"])) + elif sdn_net["status"] == "ACTIVE": + done["SDN_networks"] += 1 + + for vnf in ns_descriptor["vnfs"]: + for vm in vnf["vms"]: + total["VMs"] += 1 + if vm["status"] in ("ERROR", "VIM_ERROR"): + error_list.append("Error at VIM VM {}: {}".format(_get_ref(vm), vm["error_msg"])) + elif vm["status"] == "ACTIVE": + done["VMs"] += 1 + if error_list: + # skip errors caused because other dependendent task is on error + return "ERROR", "; ".join([el for el in error_list if "because depends on failed ACTION" not in el]) + if all(total[x] == done[x] for x in total): # DONE == TOTAL for all items + return "ACTIVE", str({x: total[x] for x in total if total[x]}) # print only those which value is not 0 + else: + return "BUILD", str({x: "{}/{}".format(done[x], total[x]) for x in total if total[x]}) + # print done/total for each item if total is not 0 + except Exception as e: + raise ROClientException("Unexpected RO ns descriptor. Wrong version? {}".format(e)) from e @staticmethod def check_action_status(action_descriptor): @@ -414,23 +446,23 @@ class ROClient: tenant_text = "/" + self.tenant item_id = 0 - url = "{}{}/{}".format(self.endpoint_url, tenant_text, item) + url = "{}{}/{}".format(self.uri, tenant_text, item) if self.check_if_uuid(item_id_name): item_id = item_id_name url += "/" + item_id_name elif item_id_name and item_id_name.startswith("'") and item_id_name.endswith("'"): item_id_name = item_id_name[1:-1] self.logger.debug("RO GET %s", url) - with aiohttp.Timeout(self.timeout_short): - async with session.get(url, headers=self.headers_req) as response: - response_text = await response.read() - self.logger.debug("GET {} [{}] {}".format(url, response.status, response_text[:100])) - if response.status == 404: # NOT_FOUND - raise ROClientException("No {} found with id '{}'".format(item[:-1], item_id_name), - http_code=404) - if response.status >= 300: - raise ROClientException(response_text, http_code=response.status) - content = self._parse_yaml(response_text, response=True) + # timeout = aiohttp.ClientTimeout(total=self.timeout_short) + async with session.get(url, headers=self.headers_req) as response: + response_text = await response.read() + self.logger.debug("GET {} [{}] {}".format(url, response.status, response_text[:100])) + if response.status == 404: # NOT_FOUND + raise ROClientException("No {} found with id '{}'".format(item[:-1], item_id_name), + http_code=404) + if response.status >= 300: + raise ROClientException(self._parse_error_yaml(response_text), http_code=response.status) + content = self._parse_yaml(response_text, response=True) if item_id: return item_id @@ -465,18 +497,18 @@ class ROClient: # check that exist uuid = await self._get_item_uuid(session, item, item_id_name, all_tenants) - url = "{}{}/{}/{}".format(self.endpoint_url, tenant_text, item, uuid) + url = "{}{}/{}/{}".format(self.uri, tenant_text, item, uuid) if extra_item: url += "/" + extra_item if extra_item_id: url += "/" + extra_item_id self.logger.debug("GET %s", url) - with aiohttp.Timeout(self.timeout_short): - async with session.get(url, headers=self.headers_req) as response: - response_text = await response.read() - self.logger.debug("GET {} [{}] {}".format(url, response.status, response_text[:100])) - if response.status >= 300: - raise ROClientException(response_text, http_code=response.status) + # timeout = aiohttp.ClientTimeout(total=self.timeout_short) + async with session.get(url, headers=self.headers_req) as response: + response_text = await response.read() + self.logger.debug("GET {} [{}] {}".format(url, response.status, response_text[:100])) + if response.status >= 300: + raise ROClientException(self._parse_error_yaml(response_text), http_code=response.status) return self._parse_yaml(response_text, response=True) @@ -484,7 +516,7 @@ class ROClient: if not self.tenant: self.tenant = await self._get_item_uuid(session, "tenants", self.tenant_id_name, None) return self.tenant - + async def _get_datacenter(self, session): if not self.tenant: await self._get_tenant(session) @@ -527,15 +559,15 @@ class ROClient: else: action = "/{}".format(action) - url = "{}{apiver}{tenant}/{item}{id}{action}".format(self.endpoint_url, apiver=api_version_text, + url = "{}{apiver}{tenant}/{item}{id}{action}".format(self.uri, apiver=api_version_text, tenant=tenant_text, item=item, id=uuid, action=action) self.logger.debug("RO POST %s %s", url, payload_req) - with aiohttp.Timeout(self.timeout_large): - async with session.post(url, headers=self.headers_req, data=payload_req) as response: - response_text = await response.read() - self.logger.debug("POST {} [{}] {}".format(url, response.status, response_text[:100])) - if response.status >= 300: - raise ROClientException(response_text, http_code=response.status) + # timeout = aiohttp.ClientTimeout(total=self.timeout_large) + async with session.post(url, headers=self.headers_req, data=payload_req) as response: + response_text = await response.read() + self.logger.debug("POST {} [{}] {}".format(url, response.status, response_text[:100])) + if response.status >= 300: + raise ROClientException(self._parse_error_yaml(response_text), http_code=response.status) return self._parse_yaml(response_text, response=True) @@ -557,14 +589,15 @@ class ROClient: else: uuid = item_id_name - url = "{}{}/{}/{}".format(self.endpoint_url, tenant_text, item, uuid) + url = "{}{}/{}/{}".format(self.uri, tenant_text, item, uuid) self.logger.debug("DELETE %s", url) - with aiohttp.Timeout(self.timeout_short): - async with session.delete(url, headers=self.headers_req) as response: - response_text = await response.read() - self.logger.debug("DELETE {} [{}] {}".format(url, response.status, response_text[:100])) - if response.status >= 300: - raise ROClientException(response_text, http_code=response.status) + # timeout = aiohttp.ClientTimeout(total=self.timeout_short) + async with session.delete(url, headers=self.headers_req) as response: + response_text = await response.read() + self.logger.debug("DELETE {} [{}] {}".format(url, response.status, response_text[:100])) + if response.status >= 300: + raise ROClientException(self._parse_error_yaml(response_text), http_code=response.status) + return self._parse_yaml(response_text, response=True) async def _list_item(self, session, item, all_tenants=False, filter_dict=None): @@ -577,19 +610,20 @@ class ROClient: await self._get_tenant(session) tenant_text = "/" + self.tenant - url = "{}{}/{}".format(self.endpoint_url, tenant_text, item) + url = "{}{}/{}".format(self.uri, tenant_text, item) separator = "?" if filter_dict: for k in filter_dict: url += separator + quote(str(k)) + "=" + quote(str(filter_dict[k])) separator = "&" self.logger.debug("RO GET %s", url) - with aiohttp.Timeout(self.timeout_short): - async with session.get(url, headers=self.headers_req) as response: - response_text = await response.read() - self.logger.debug("GET {} [{}] {}".format(url, response.status, response_text[:100])) - if response.status >= 300: - raise ROClientException(response_text, http_code=response.status) + # timeout = aiohttp.ClientTimeout(total=self.timeout_short) + async with session.get(url, headers=self.headers_req) as response: + response_text = await response.read() + self.logger.debug("GET {} [{}] {}".format(url, response.status, response_text[:100])) + if response.status >= 300: + raise ROClientException(self._parse_error_yaml(response_text), http_code=response.status) + return self._parse_yaml(response_text, response=True) async def _edit_item(self, session, item, item_id, descriptor, all_tenants=False): @@ -603,16 +637,17 @@ class ROClient: tenant_text = "/" + self.tenant payload_req = yaml.safe_dump(descriptor) - + # print payload_req - url = "{}{}/{}/{}".format(self.endpoint_url, tenant_text, item, item_id) + url = "{}{}/{}/{}".format(self.uri, tenant_text, item, item_id) self.logger.debug("RO PUT %s %s", url, payload_req) - with aiohttp.Timeout(self.timeout_large): - async with session.put(url, headers=self.headers_req, data=payload_req) as response: - response_text = await response.read() - self.logger.debug("PUT {} [{}] {}".format(url, response.status, response_text[:100])) - if response.status >= 300: - raise ROClientException(response_text, http_code=response.status) + # timeout = aiohttp.ClientTimeout(total=self.timeout_large) + async with session.put(url, headers=self.headers_req, data=payload_req) as response: + response_text = await response.read() + self.logger.debug("PUT {} [{}] {}".format(url, response.status, response_text[:100])) + if response.status >= 300: + raise ROClientException(self._parse_error_yaml(response_text), http_code=response.status) + return self._parse_yaml(response_text, response=True) async def get_version(self): @@ -621,21 +656,23 @@ class ROClient: :return: a list with integers ["major", "minor", "release"]. Raises ROClientException on Error, """ try: - with aiohttp.ClientSession(loop=self.loop) as session: - url = "{}/version".format(self.endpoint_url) + response_text = "" + async with aiohttp.ClientSession(loop=self.loop) as session: + url = "{}/version".format(self.uri) self.logger.debug("RO GET %s", url) - with aiohttp.Timeout(self.timeout_short): - async with session.get(url, headers=self.headers_req) as response: - response_text = await response.read() - self.logger.debug("GET {} [{}] {}".format(url, response.status, response_text[:100])) - if response.status >= 300: - raise ROClientException(response_text, http_code=response.status) + # timeout = aiohttp.ClientTimeout(total=self.timeout_short) + async with session.get(url, headers=self.headers_req) as response: + response_text = await response.read() + self.logger.debug("GET {} [{}] {}".format(url, response.status, response_text[:100])) + if response.status >= 300: + raise ROClientException(self._parse_error_yaml(response_text), http_code=response.status) + for word in str(response_text).split(" "): if "." in word: version_text, _, _ = word.partition("-") return version_text raise ROClientException("Got invalid version text: '{}'".format(response_text), http_code=500) - except aiohttp.errors.ClientOSError as e: + except (aiohttp.ClientOSError, aiohttp.ClientError) as e: raise ROClientException(e, http_code=504) except asyncio.TimeoutError: raise ROClientException("Timeout", http_code=504) @@ -656,7 +693,7 @@ class ROClient: raise ROClientException("Invalid item {}".format(item)) if item == 'tenant': all_tenants = None - with aiohttp.ClientSession(loop=self.loop) as session: + async with aiohttp.ClientSession(loop=self.loop) as session: content = await self._list_item(session, self.client_to_RO[item], all_tenants=all_tenants, filter_dict=filter_by) if isinstance(content, dict): @@ -667,7 +704,7 @@ class ROClient: else: raise ROClientException("Output not a list neither dict with len equal 1", http_code=500) return content - except aiohttp.errors.ClientOSError as e: + except (aiohttp.ClientOSError, aiohttp.ClientError) as e: raise ROClientException(e, http_code=504) except asyncio.TimeoutError: raise ROClientException("Timeout", http_code=504) @@ -693,11 +730,11 @@ class ROClient: elif item == 'vim_account': all_tenants = False - with aiohttp.ClientSession(loop=self.loop) as session: + async with aiohttp.ClientSession(loop=self.loop) as session: content = await self._get_item(session, self.client_to_RO[item], item_id_name, extra_item=extra_item, extra_item_id=extra_item_id, all_tenants=all_tenants) return remove_envelop(item, content) - except aiohttp.errors.ClientOSError as e: + except (aiohttp.ClientOSError, aiohttp.ClientError) as e: raise ROClientException(e, http_code=504) except asyncio.TimeoutError: raise ROClientException("Timeout", http_code=504) @@ -716,7 +753,7 @@ class ROClient: if item in ('tenant', 'vim', 'wim'): all_tenants = None - with aiohttp.ClientSession(loop=self.loop) as session: + async with aiohttp.ClientSession(loop=self.loop) as session: result = await self._del_item(session, self.client_to_RO[item], item_id_name, all_tenants=all_tenants) # in case of ns delete, get the action_id embeded in text if item == "ns" and result.get("result"): @@ -725,7 +762,7 @@ class ROClient: if action_id: result["action_id"] = action_id return result - except aiohttp.errors.ClientOSError as e: + except (aiohttp.ClientOSError, aiohttp.ClientError) as e: raise ROClientException(e, http_code=504) except asyncio.TimeoutError: raise ROClientException("Timeout", http_code=504) @@ -761,7 +798,7 @@ class ROClient: create_desc = self._create_envelop(item, desc) - with aiohttp.ClientSession(loop=self.loop) as session: + async with aiohttp.ClientSession(loop=self.loop) as session: _all_tenants = all_tenants if item == 'vim': _all_tenants = True @@ -773,7 +810,7 @@ class ROClient: outdata = await self._edit_item(session, self.client_to_RO[item], item_id, create_desc, all_tenants=_all_tenants) return remove_envelop(item, outdata) - except aiohttp.errors.ClientOSError as e: + except (aiohttp.ClientOSError, aiohttp.ClientError) as e: raise ROClientException(e, http_code=504) except asyncio.TimeoutError: raise ROClientException("Timeout", http_code=504) @@ -814,11 +851,11 @@ class ROClient: create_desc = self._create_envelop(item, desc) - with aiohttp.ClientSession(loop=self.loop) as session: + async with aiohttp.ClientSession(loop=self.loop) as session: outdata = await self._create_item(session, self.client_to_RO[item], create_desc, all_tenants=all_tenants) return remove_envelop(item, outdata) - except aiohttp.errors.ClientOSError as e: + except (aiohttp.ClientOSError, aiohttp.ClientError) as e: raise ROClientException(e, http_code=504) except asyncio.TimeoutError: raise ROClientException("Timeout", http_code=504) @@ -863,7 +900,7 @@ class ROClient: # create_desc = self._create_envelop(item, desc) create_desc = desc - with aiohttp.ClientSession(loop=self.loop) as session: + async with aiohttp.ClientSession(loop=self.loop) as session: _all_tenants = all_tenants if item == 'vim': _all_tenants = True @@ -873,7 +910,7 @@ class ROClient: item_id_name=item_id_name, # item_id_name=item_id action=action, all_tenants=_all_tenants) return remove_envelop(item, outdata) - except aiohttp.errors.ClientOSError as e: + except (aiohttp.ClientOSError, aiohttp.ClientError) as e: raise ROClientException(e, http_code=504) except asyncio.TimeoutError: raise ROClientException("Timeout", http_code=504) @@ -913,25 +950,25 @@ class ROClient: format(item)) create_desc = self._create_envelop(item, desc) payload_req = yaml.safe_dump(create_desc) - with aiohttp.ClientSession(loop=self.loop) as session: + async with aiohttp.ClientSession(loop=self.loop) as session: # check that exist item_id = await self._get_item_uuid(session, self.client_to_RO[item], item_id_name, all_tenants=True) await self._get_tenant(session) - url = "{}/{tenant}/{item}/{item_id}".format(self.endpoint_url, tenant=self.tenant, + url = "{}/{tenant}/{item}/{item_id}".format(self.uri, tenant=self.tenant, item=self.client_to_RO[item], item_id=item_id) self.logger.debug("RO POST %s %s", url, payload_req) - with aiohttp.Timeout(self.timeout_large): - async with session.post(url, headers=self.headers_req, data=payload_req) as response: - response_text = await response.read() - self.logger.debug("POST {} [{}] {}".format(url, response.status, response_text[:100])) - if response.status >= 300: - raise ROClientException(response_text, http_code=response.status) + # timeout = aiohttp.ClientTimeout(total=self.timeout_large) + async with session.post(url, headers=self.headers_req, data=payload_req) as response: + response_text = await response.read() + self.logger.debug("POST {} [{}] {}".format(url, response.status, response_text[:100])) + if response.status >= 300: + raise ROClientException(self._parse_error_yaml(response_text), http_code=response.status) response_desc = self._parse_yaml(response_text, response=True) desc = remove_envelop(item, response_desc) return desc - except aiohttp.errors.ClientOSError as e: + except (aiohttp.ClientOSError, aiohttp.ClientError) as e: raise ROClientException(e, http_code=504) except asyncio.TimeoutError: raise ROClientException("Timeout", http_code=504) @@ -939,25 +976,26 @@ class ROClient: async def detach(self, item, item_id_name=None): # TODO replace the code with delete_item(vim_account,...) try: - with aiohttp.ClientSession(loop=self.loop) as session: + async with aiohttp.ClientSession(loop=self.loop) as session: # check that exist item_id = await self._get_item_uuid(session, self.client_to_RO[item], item_id_name, all_tenants=False) tenant = await self._get_tenant(session) - url = "{}/{tenant}/{item}/{datacenter}".format(self.endpoint_url, tenant=tenant, + url = "{}/{tenant}/{item}/{datacenter}".format(self.uri, tenant=tenant, item=self.client_to_RO[item], datacenter=item_id) self.logger.debug("RO DELETE %s", url) - with aiohttp.Timeout(self.timeout_large): - async with session.delete(url, headers=self.headers_req) as response: - response_text = await response.read() - self.logger.debug("DELETE {} [{}] {}".format(url, response.status, response_text[:100])) - if response.status >= 300: - raise ROClientException(response_text, http_code=response.status) + + # timeout = aiohttp.ClientTimeout(total=self.timeout_large) + async with session.delete(url, headers=self.headers_req) as response: + response_text = await response.read() + self.logger.debug("DELETE {} [{}] {}".format(url, response.status, response_text[:100])) + if response.status >= 300: + raise ROClientException(self._parse_error_yaml(response_text), http_code=response.status) response_desc = self._parse_yaml(response_text, response=True) desc = remove_envelop(item, response_desc) return desc - except aiohttp.errors.ClientOSError as e: + except (aiohttp.ClientOSError, aiohttp.ClientError) as e: raise ROClientException(e, http_code=504) except asyncio.TimeoutError: raise ROClientException("Timeout", http_code=504) @@ -1071,35 +1109,35 @@ class ROClient: datacenter = self.get_datacenter(session) if action == "list": - url = "{}{}/vim/{}/{}".format(self.endpoint_url, tenant_text, datacenter, item) + url = "{}{}/vim/{}/{}".format(self.uri, tenant_text, datacenter, item) self.logger.debug("GET %s", url) mano_response = requests.get(url, headers=self.headers_req) self.logger.debug("RO response: %s", mano_response.text) - content = self._parse_yaml(mano_response.text, response=True) + content = self._parse_yaml(mano_response.text, response=True) if mano_response.status_code == 200: return content else: - raise ROClientException(str(content), http_code=mano_response.status) + raise ROClientException(str(content), http_code=mano_response.status) elif action == "get" or action == "show": - url = "{}{}/vim/{}/{}/{}".format(self.endpoint_url, tenant_text, datacenter, item, uuid) + url = "{}{}/vim/{}/{}/{}".format(self.uri, tenant_text, datacenter, item, uuid) self.logger.debug("GET %s", url) mano_response = requests.get(url, headers=self.headers_req) self.logger.debug("RO response: %s", mano_response.text) - content = self._parse_yaml(mano_response.text, response=True) + content = self._parse_yaml(mano_response.text, response=True) if mano_response.status_code == 200: return content else: - raise ROClientException(str(content), http_code=mano_response.status) + raise ROClientException(str(content), http_code=mano_response.status) elif action == "delete": - url = "{}{}/vim/{}/{}/{}".format(self.endpoint_url, tenant_text, datacenter, item, uuid) + url = "{}{}/vim/{}/{}/{}".format(self.uri, tenant_text, datacenter, item, uuid) self.logger.debug("DELETE %s", url) mano_response = requests.delete(url, headers=self.headers_req) self.logger.debug("RO response: %s", mano_response.text) - content = self._parse_yaml(mano_response.text, response=True) + content = self._parse_yaml(mano_response.text, response=True) if mano_response.status_code == 200: return content else: - raise ROClientException(str(content), http_code=mano_response.status) + raise ROClientException(str(content), http_code=mano_response.status) elif action == "create": if "descriptor" in kwargs: if isinstance(kwargs["descriptor"], str): @@ -1110,7 +1148,7 @@ class ROClient: descriptor = {item[:-1]: {"name": kwargs["name"]}} else: raise ROClientException("Missing descriptor") - + if item[:-1] not in descriptor or len(descriptor) != 1: raise ROClientException("Descriptor must contain only one 'tenant' field") if "name" in kwargs: @@ -1119,7 +1157,7 @@ class ROClient: descriptor[item[:-1]]['description'] = kwargs["description"] payload_req = yaml.safe_dump(descriptor) # print payload_req - url = "{}{}/vim/{}/{}".format(self.endpoint_url, tenant_text, datacenter, item) + url = "{}{}/vim/{}/{}".format(self.uri, tenant_text, datacenter, item) self.logger.debug("RO POST %s %s", url, payload_req) mano_response = requests.post(url, headers=self.headers_req, data=payload_req) self.logger.debug("RO response: %s", mano_response.text) @@ -1151,7 +1189,7 @@ if __name__ == '__main__': tenant_id = None vim_id = False loop = asyncio.get_event_loop() - myClient = ROClient(endpoint_url=RO_URL, loop=loop, loglevel="DEBUG") + myClient = ROClient(uri=RO_URL, loop=loop, loglevel="DEBUG") try: # test tenant content = loop.run_until_complete(myClient.get_list("tenant")) diff --git a/osm_lcm/__init__.py b/osm_lcm/__init__.py index 7100a22..d013101 100644 --- a/osm_lcm/__init__.py +++ b/osm_lcm/__init__.py @@ -13,5 +13,12 @@ ## # version moved to lcm.py. uncomment if LCM is installed as library and installed -version = '7.0.0.post1' -version_date = '2019-12-10' +version = '7.0.1.post18' +version_date = '2020-04-08' + +# Try to get version from package using pkg_resources (available with setuptools) +try: + from pkg_resources import get_distribution + version = get_distribution("osm_lcm").version +except Exception: + pass diff --git a/osm_lcm/frontend_grpc.py b/osm_lcm/frontend_grpc.py new file mode 100644 index 0000000..88308e1 --- /dev/null +++ b/osm_lcm/frontend_grpc.py @@ -0,0 +1,73 @@ +## +# Copyright 2020 Telefonica Investigacion y Desarrollo, S.A.U. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +## +# Generated by the Protocol Buffers compiler. DO NOT EDIT! +# source: osm_lcm/frontend.proto +# plugin: grpclib.plugin.main +import abc +import typing + +import grpclib.const +import grpclib.client +if typing.TYPE_CHECKING: + import grpclib.server + +import osm_lcm.frontend_pb2 + + +class FrontendExecutorBase(abc.ABC): + + @abc.abstractmethod + async def RunPrimitive(self, stream: 'grpclib.server.Stream[osm_lcm.frontend_pb2.PrimitiveRequest, osm_lcm.frontend_pb2.PrimitiveReply]') -> None: + pass + + @abc.abstractmethod + async def GetSshKey(self, stream: 'grpclib.server.Stream[osm_lcm.frontend_pb2.SshKeyRequest, osm_lcm.frontend_pb2.SshKeyReply]') -> None: + pass + + def __mapping__(self) -> typing.Dict[str, grpclib.const.Handler]: + return { + '/osm_ee.FrontendExecutor/RunPrimitive': grpclib.const.Handler( + self.RunPrimitive, + grpclib.const.Cardinality.UNARY_STREAM, + osm_lcm.frontend_pb2.PrimitiveRequest, + osm_lcm.frontend_pb2.PrimitiveReply, + ), + '/osm_ee.FrontendExecutor/GetSshKey': grpclib.const.Handler( + self.GetSshKey, + grpclib.const.Cardinality.UNARY_UNARY, + osm_lcm.frontend_pb2.SshKeyRequest, + osm_lcm.frontend_pb2.SshKeyReply, + ), + } + + +class FrontendExecutorStub: + + def __init__(self, channel: grpclib.client.Channel) -> None: + self.RunPrimitive = grpclib.client.UnaryStreamMethod( + channel, + '/osm_ee.FrontendExecutor/RunPrimitive', + osm_lcm.frontend_pb2.PrimitiveRequest, + osm_lcm.frontend_pb2.PrimitiveReply, + ) + self.GetSshKey = grpclib.client.UnaryUnaryMethod( + channel, + '/osm_ee.FrontendExecutor/GetSshKey', + osm_lcm.frontend_pb2.SshKeyRequest, + osm_lcm.frontend_pb2.SshKeyReply, + ) diff --git a/osm_lcm/frontend_pb2.py b/osm_lcm/frontend_pb2.py new file mode 100644 index 0000000..3407e73 --- /dev/null +++ b/osm_lcm/frontend_pb2.py @@ -0,0 +1,250 @@ +# -*- coding: utf-8 -*- +## +# Copyright 2020 Telefonica Investigacion y Desarrollo, S.A.U. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +## +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: osm_lcm/frontend.proto + +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor.FileDescriptor( + name='osm_lcm/frontend.proto', + package='osm_ee', + syntax='proto3', + serialized_options=b'\n\027com.etsi.osm.lcm.osm_eeB\014GrpcExecutorP\001\242\002\003OEE', + serialized_pb=b'\n\x16osm_lcm/frontend.proto\x12\x06osm_ee\"<\n\x10PrimitiveRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06params\x18\x03 \x01(\t\":\n\x0ePrimitiveReply\x12\x0e\n\x06status\x18\x01 \x01(\t\x12\x18\n\x10\x64\x65tailed_message\x18\x02 \x01(\t\"\x0f\n\rSshKeyRequest\"\x1e\n\x0bSshKeyReply\x12\x0f\n\x07message\x18\x01 \x01(\t2\x93\x01\n\x10\x46rontendExecutor\x12\x44\n\x0cRunPrimitive\x12\x18.osm_ee.PrimitiveRequest\x1a\x16.osm_ee.PrimitiveReply\"\x00\x30\x01\x12\x39\n\tGetSshKey\x12\x15.osm_ee.SshKeyRequest\x1a\x13.osm_ee.SshKeyReply\"\x00\x42/\n\x17\x63om.etsi.osm.lcm.osm_eeB\x0cGrpcExecutorP\x01\xa2\x02\x03OEEb\x06proto3' +) + + + + +_PRIMITIVEREQUEST = _descriptor.Descriptor( + name='PrimitiveRequest', + full_name='osm_ee.PrimitiveRequest', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='id', full_name='osm_ee.PrimitiveRequest.id', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=b"".decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='name', full_name='osm_ee.PrimitiveRequest.name', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=b"".decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='params', full_name='osm_ee.PrimitiveRequest.params', index=2, + number=3, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=b"".decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=34, + serialized_end=94, +) + + +_PRIMITIVEREPLY = _descriptor.Descriptor( + name='PrimitiveReply', + full_name='osm_ee.PrimitiveReply', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='status', full_name='osm_ee.PrimitiveReply.status', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=b"".decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='detailed_message', full_name='osm_ee.PrimitiveReply.detailed_message', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=b"".decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=96, + serialized_end=154, +) + + +_SSHKEYREQUEST = _descriptor.Descriptor( + name='SshKeyRequest', + full_name='osm_ee.SshKeyRequest', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=156, + serialized_end=171, +) + + +_SSHKEYREPLY = _descriptor.Descriptor( + name='SshKeyReply', + full_name='osm_ee.SshKeyReply', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='message', full_name='osm_ee.SshKeyReply.message', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=b"".decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=173, + serialized_end=203, +) + +DESCRIPTOR.message_types_by_name['PrimitiveRequest'] = _PRIMITIVEREQUEST +DESCRIPTOR.message_types_by_name['PrimitiveReply'] = _PRIMITIVEREPLY +DESCRIPTOR.message_types_by_name['SshKeyRequest'] = _SSHKEYREQUEST +DESCRIPTOR.message_types_by_name['SshKeyReply'] = _SSHKEYREPLY +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +PrimitiveRequest = _reflection.GeneratedProtocolMessageType('PrimitiveRequest', (_message.Message,), { + 'DESCRIPTOR' : _PRIMITIVEREQUEST, + '__module__' : 'osm_lcm.frontend_pb2' + # @@protoc_insertion_point(class_scope:osm_ee.PrimitiveRequest) + }) +_sym_db.RegisterMessage(PrimitiveRequest) + +PrimitiveReply = _reflection.GeneratedProtocolMessageType('PrimitiveReply', (_message.Message,), { + 'DESCRIPTOR' : _PRIMITIVEREPLY, + '__module__' : 'osm_lcm.frontend_pb2' + # @@protoc_insertion_point(class_scope:osm_ee.PrimitiveReply) + }) +_sym_db.RegisterMessage(PrimitiveReply) + +SshKeyRequest = _reflection.GeneratedProtocolMessageType('SshKeyRequest', (_message.Message,), { + 'DESCRIPTOR' : _SSHKEYREQUEST, + '__module__' : 'osm_lcm.frontend_pb2' + # @@protoc_insertion_point(class_scope:osm_ee.SshKeyRequest) + }) +_sym_db.RegisterMessage(SshKeyRequest) + +SshKeyReply = _reflection.GeneratedProtocolMessageType('SshKeyReply', (_message.Message,), { + 'DESCRIPTOR' : _SSHKEYREPLY, + '__module__' : 'osm_lcm.frontend_pb2' + # @@protoc_insertion_point(class_scope:osm_ee.SshKeyReply) + }) +_sym_db.RegisterMessage(SshKeyReply) + + +DESCRIPTOR._options = None + +_FRONTENDEXECUTOR = _descriptor.ServiceDescriptor( + name='FrontendExecutor', + full_name='osm_ee.FrontendExecutor', + file=DESCRIPTOR, + index=0, + serialized_options=None, + serialized_start=206, + serialized_end=353, + methods=[ + _descriptor.MethodDescriptor( + name='RunPrimitive', + full_name='osm_ee.FrontendExecutor.RunPrimitive', + index=0, + containing_service=None, + input_type=_PRIMITIVEREQUEST, + output_type=_PRIMITIVEREPLY, + serialized_options=None, + ), + _descriptor.MethodDescriptor( + name='GetSshKey', + full_name='osm_ee.FrontendExecutor.GetSshKey', + index=1, + containing_service=None, + input_type=_SSHKEYREQUEST, + output_type=_SSHKEYREPLY, + serialized_options=None, + ), +]) +_sym_db.RegisterServiceDescriptor(_FRONTENDEXECUTOR) + +DESCRIPTOR.services_by_name['FrontendExecutor'] = _FRONTENDEXECUTOR + +# @@protoc_insertion_point(module_scope) diff --git a/osm_lcm/lcm.cfg b/osm_lcm/lcm.cfg index 3bc1ba0..e2e34d4 100644 --- a/osm_lcm/lcm.cfg +++ b/osm_lcm/lcm.cfg @@ -22,6 +22,11 @@ global: # logfile: /app/log # or /var/log/osm/lcm.log # nologging: True # do no log to stdout/stderr +#[timeout] +timeout: + # ns_deploy: 7200 # total deploy timeout for a ns 2 hours + # nsi_deploy: 7200 # total deploy timeout for a nsi 2 hours + #[RO] RO: host: ro # hostname or IP @@ -36,6 +41,11 @@ VCA: port: 17070 user: admin secret: secret + cloud: localhost + k8s_cloud: k8scloud + helmpath: /usr/local/bin/helm + kubectlpath: /usr/bin/kubectl + jujupath: /usr/local/bin/juju # pubkey: pubkey # cacert: cacert # apiproxy: apiproxy @@ -74,3 +84,11 @@ message: # loglevel: DEBUG # logfile: /var/log/osm/lcm-message.log group_id: lcm-server + +tsdb: # time series database + driver: prometheus + # local file to store the configuration + path: /etc/prometheus + uri: http://prometheus:9090/ + # loglevel: DEBUG + # logfile: /var/log/osm/lcm-tsdb.log diff --git a/osm_lcm/lcm.py b/osm_lcm/lcm.py index 67e0534..eaf2558 100644 --- a/osm_lcm/lcm.py +++ b/osm_lcm/lcm.py @@ -29,14 +29,13 @@ import logging.handlers import getopt import sys -from osm_lcm import ns -from osm_lcm import vim_sdn -from osm_lcm import netslice -from osm_lcm import ROclient +from osm_lcm import ns, prometheus, vim_sdn, netslice +from osm_lcm.ng_ro import NgRoException, NgRoClient +from osm_lcm.ROclient import ROClient, ROClientException -from time import time, sleep +from time import time from osm_lcm.lcm_utils import versiontuple, LcmException, TaskRegistry, LcmExceptionExit -from osm_lcm import version as _lcm_version, version_date as lcm_version_date +from osm_lcm import version as lcm_version, version_date as lcm_version_date from osm_common import dbmemory, dbmongo, fslocal, fsmongo, msglocal, msgkafka from osm_common import version as common_version @@ -56,7 +55,6 @@ min_RO_version = "6.0.2" min_n2vc_version = "0.0.2" min_common_version = "0.1.19" -lcm_version = _lcm_version health_check_file = path.expanduser("~") + "/time_last_ping" # TODO find better location for this file @@ -64,6 +62,8 @@ class Lcm: ping_interval_pace = 120 # how many time ping is send once is confirmed all is running ping_interval_boot = 5 # how many time ping is sent when booting + cfg_logger_name = {"message": "lcm.msg", "database": "lcm.db", "storage": "lcm.fs", "tsdb": "lcm.prometheus"} + # ^ contains for each section at lcm.cfg the used logger name def __init__(self, config_file, loop=None): """ @@ -71,7 +71,6 @@ class Lcm: :param config: two level dictionary with configuration. Top level should contain 'database', 'storage', :return: None """ - global lcm_version self.db = None self.msg = None self.msg_admin = None @@ -87,14 +86,19 @@ class Lcm: # load configuration config = self.read_config_file(config_file) self.config = config - self.ro_config = { - "endpoint_url": "http://{}:{}/openmano".format(config["RO"]["host"], config["RO"]["port"]), + self.config["ro_config"] = { + "ng": config["RO"].get("ng", False), + "uri": config["RO"].get("uri"), "tenant": config.get("tenant", "osm"), - "logger_name": "lcm.ROclient", - "loglevel": "ERROR", + "logger_name": "lcm.roclient", + "loglevel": config["RO"].get("loglevel", "ERROR"), } - - self.vca_config = config["VCA"] + if not self.config["ro_config"]["uri"]: + if not self.config["ro_config"]["ng"]: + self.config["ro_config"]["uri"] = "http://{}:{}/openmano".format(config["RO"]["host"], + config["RO"]["port"]) + else: + self.config["ro_config"]["uri"] = "http://{}:{}/ro".format(config["RO"]["host"], config["RO"]["port"]) self.loop = loop or asyncio.get_event_loop() @@ -118,7 +122,7 @@ class Lcm: self.logger.setLevel(config["global"]["loglevel"]) # logging other modules - for k1, logname in {"message": "lcm.msg", "database": "lcm.db", "storage": "lcm.fs"}.items(): + for k1, logname in self.cfg_logger_name.items(): config[k1]["logger_name"] = logname logger_module = logging.getLogger(logname) if config[k1].get("logfile"): @@ -188,30 +192,41 @@ class Lcm: # contains created tasks/futures to be able to cancel self.lcm_tasks = TaskRegistry(self.worker_id, self.db, self.logger) - self.ns = ns.NsLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, self.vca_config, self.loop) - self.netslice = netslice.NetsliceLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, - self.vca_config, self.loop) - self.vim = vim_sdn.VimLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, self.loop) - self.wim = vim_sdn.WimLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, self.loop) - self.sdn = vim_sdn.SdnLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, self.loop) - self.k8scluster = vim_sdn.K8sClusterLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.vca_config, self.loop) - self.k8srepo = vim_sdn.K8sRepoLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.vca_config, self.loop) + if self.config.get("tsdb") and self.config["tsdb"].get("driver"): + if self.config["tsdb"]["driver"] == "prometheus": + self.prometheus = prometheus.Prometheus(self.config["tsdb"], self.worker_id, self.db, self.loop) + else: + raise LcmException("Invalid configuration param '{}' at '[tsdb]':'driver'".format( + config["tsdb"]["driver"])) + else: + self.prometheus = None + self.ns = ns.NsLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.config, self.loop, self.prometheus) + self.netslice = netslice.NetsliceLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.config, self.loop, + self.ns) + self.vim = vim_sdn.VimLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.config, self.loop) + self.wim = vim_sdn.WimLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.config, self.loop) + self.sdn = vim_sdn.SdnLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.config, self.loop) + self.k8scluster = vim_sdn.K8sClusterLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.config, self.loop) + self.k8srepo = vim_sdn.K8sRepoLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.config, self.loop) async def check_RO_version(self): tries = 14 last_error = None while True: try: - ro_server = ROclient.ROClient(self.loop, **self.ro_config) + if self.config["ro_config"].get("ng"): + ro_server = NgRoClient(self.loop, **self.config["ro_config"]) + else: + ro_server = ROClient(self.loop, **self.config["ro_config"]) ro_version = await ro_server.get_version() if versiontuple(ro_version) < versiontuple(min_RO_version): raise LcmException("Not compatible osm/RO version '{}'. Needed '{}' or higher".format( ro_version, min_RO_version)) self.logger.info("Connected to RO version {}".format(ro_version)) return - except ROclient.ROClientException as e: + except (ROClientException, NgRoException) as e: tries -= 1 - error_text = "Error while connecting to RO on {}: {}".format(self.ro_config["endpoint_url"], e) + error_text = "Error while connecting to RO on {}: {}".format(self.config["ro_config"]["uri"], e) if tries <= 0: self.logger.critical(error_text) raise LcmException(error_text) @@ -224,7 +239,6 @@ class Lcm: self.logger.debug("Starting/Ending test task: {}".format(param)) async def kafka_ping(self): - global lcm_version self.logger.debug("Task kafka_ping Enter") consecutive_errors = 0 first_start = True @@ -291,6 +305,10 @@ class Lcm: except Exception as e: self.logger.error("Cannot write into '{}' for healthcheck: {}".format(health_check_file, e)) return + elif topic == "pla": + if command == "placement": + self.ns.update_nsrs_with_pla_result(params) + return elif topic == "k8scluster": if command == "create" or command == "created": k8scluster_id = params.get("_id") @@ -315,7 +333,7 @@ class Lcm: self.lcm_tasks.register("k8srepo", k8srepo_id, order_id, "k8srepo_delete", task) return elif topic == "ns": - if command == "instantiate" or command == "instantiated": + if command == "instantiate": # self.logger.debug("Deploying NS {}".format(nsr_id)) nslcmop = params nslcmop_id = nslcmop["_id"] @@ -323,7 +341,7 @@ class Lcm: task = asyncio.ensure_future(self.ns.instantiate(nsr_id, nslcmop_id)) self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "ns_instantiate", task) return - elif command == "terminate" or command == "terminated": + elif command == "terminate": # self.logger.debug("Deleting NS {}".format(nsr_id)) nslcmop = params nslcmop_id = nslcmop["_id"] @@ -366,7 +384,7 @@ class Lcm: elif command in ("terminated", "instantiated", "scaled", "actioned"): # "scaled-cooldown-time" return elif topic == "nsi": # netslice LCM processes (instantiate, terminate, etc) - if command == "instantiate" or command == "instantiated": + if command == "instantiate": # self.logger.debug("Instantiating Network Slice {}".format(nsilcmop["netsliceInstanceId"])) nsilcmop = params nsilcmop_id = nsilcmop["_id"] # slice operation id @@ -374,7 +392,7 @@ class Lcm: task = asyncio.ensure_future(self.netslice.instantiate(nsir_id, nsilcmop_id)) self.lcm_tasks.register("nsi", nsir_id, nsilcmop_id, "nsi_instantiate", task) return - elif command == "terminate" or command == "terminated": + elif command == "terminate": # self.logger.debug("Terminating Network Slice NS {}".format(nsilcmop["netsliceInstanceId"])) nsilcmop = params nsilcmop_id = nsilcmop["_id"] # slice operation id @@ -468,10 +486,10 @@ class Lcm: self.first_start = True while self.consecutive_errors < 10: try: - topics = ("ns", "vim_account", "wim_account", "sdn", "nsi", "k8scluster", "k8srepo") + topics = ("ns", "vim_account", "wim_account", "sdn", "nsi", "k8scluster", "k8srepo", "pla") topics_admin = ("admin", ) await asyncio.gather( - self.msg.aioread(topics, self.loop, self.kafka_read_callback), + self.msg.aioread(topics, self.loop, self.kafka_read_callback, from_beginning=True), self.msg_admin.aioread(topics_admin, self.loop, self.kafka_read_callback, group_id=False) ) @@ -497,6 +515,10 @@ class Lcm: # check RO version self.loop.run_until_complete(self.check_RO_version()) + # configure tsdb prometheus + if self.prometheus: + self.loop.run_until_complete(self.prometheus.start()) + self.loop.run_until_complete(asyncio.gather( self.kafka_read(), self.kafka_ping() @@ -527,27 +549,51 @@ class Lcm: # the configparser library is not suitable, because it does not admit comments at the end of line, # and not parse integer or boolean try: + # read file as yaml format with open(config_file) as f: conf = yaml.load(f, Loader=yaml.Loader) + # Ensure all sections are not empty + for k in ("global", "timeout", "RO", "VCA", "database", "storage", "message"): + if not conf.get(k): + conf[k] = {} + + # read all environ that starts with OSMLCM_ for k, v in environ.items(): if not k.startswith("OSMLCM_"): continue - k_items = k.lower().split("_") - if len(k_items) < 3: + subject, _, item = k[7:].lower().partition("_") + if not item: continue - if k_items[1] in ("ro", "vca"): + if subject in ("ro", "vca"): # put in capital letter - k_items[1] = k_items[1].upper() - c = conf + subject = subject.upper() try: - for k_item in k_items[1:-1]: - c = c[k_item] - if k_items[-1] == "port": - c[k_items[-1]] = int(v) + if item == "port" or subject == "timeout": + conf[subject][item] = int(v) else: - c[k_items[-1]] = v + conf[subject][item] = v except Exception as e: - self.logger.warn("skipping environ '{}' on exception '{}'".format(k, e)) + self.logger.warning("skipping environ '{}' on exception '{}'".format(k, e)) + + # backward compatibility of VCA parameters + + if 'pubkey' in conf["VCA"]: + conf["VCA"]['public_key'] = conf["VCA"].pop('pubkey') + if 'cacert' in conf["VCA"]: + conf["VCA"]['ca_cert'] = conf["VCA"].pop('cacert') + if 'apiproxy' in conf["VCA"]: + conf["VCA"]['api_proxy'] = conf["VCA"].pop('apiproxy') + + if 'enableosupgrade' in conf["VCA"]: + conf["VCA"]['enable_os_upgrade'] = conf["VCA"].pop('enableosupgrade') + if isinstance(conf["VCA"].get('enable_os_upgrade'), str): + if conf["VCA"]['enable_os_upgrade'].lower() == 'false': + conf["VCA"]['enable_os_upgrade'] = False + elif conf["VCA"]['enable_os_upgrade'].lower() == 'true': + conf["VCA"]['enable_os_upgrade'] = True + + if 'aptmirror' in conf["VCA"]: + conf["VCA"]['apt_mirror'] = conf["VCA"].pop('aptmirror') return conf except Exception as e: @@ -575,18 +621,6 @@ class Lcm: return ''.join(random_choice("0123456789abcdef") for _ in range(12)) -def _get_version(): - """ - Try to get version from package using pkg_resources (available with setuptools) - """ - global lcm_version - try: - from pkg_resources import get_distribution - lcm_version = get_distribution("osm_lcm").version - except Exception: - pass - - def usage(): print("""Usage: {} [options] -c|--config [configuration_file]: loads the configuration file (default: ./lcm.cfg) @@ -597,27 +631,10 @@ def usage(): # --log-socket-port PORT: send logs using this port (default: 9022)") -def health_check(): - retry = 2 - while retry: - retry -= 1 - try: - with open(health_check_file, "r") as f: - last_received_ping = f.read() - - if time() - float(last_received_ping) < Lcm.ping_interval_pace + 10: - exit(0) - except Exception: - pass - if retry: - sleep(6) - exit(1) - - if __name__ == '__main__': try: - print("SYS.PATH='{}'".format(sys.path)) + # print("SYS.PATH='{}'".format(sys.path)) # load parameters and configuration # -h # -c value @@ -634,7 +651,8 @@ if __name__ == '__main__': elif o in ("-c", "--config"): config_file = a elif o == "--health-check": - health_check() + from osm_lcm.lcm_hc import health_check + health_check(health_check_file, Lcm.ping_interval_pace) # elif o == "--log-socket-port": # log_socket_port = a # elif o == "--log-socket-host": @@ -655,8 +673,6 @@ if __name__ == '__main__': else: print("No configuration file 'lcm.cfg' found neither at local folder nor at /etc/osm/", file=sys.stderr) exit(1) - # get version from package and upate global lcm_version - _get_version() lcm = Lcm(config_file) lcm.start() except (LcmException, getopt.GetoptError) as e: diff --git a/osm_lcm/lcm_hc.py b/osm_lcm/lcm_hc.py new file mode 100644 index 0000000..701e5b6 --- /dev/null +++ b/osm_lcm/lcm_hc.py @@ -0,0 +1,48 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +## +# Copyright 2018 Telefonica S.A. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +## + +from os import path +from time import time, sleep +from sys import stderr + +""" This module is used for helth check. A file called time_last_ping is used +This contains the last time where something is received from kafka +""" + + +def health_check(health_check_file=None, ping_interval_pace=120): + health_check_file = health_check_file or path.expanduser("~") + "/time_last_ping" + retry = 2 + while retry: + retry -= 1 + try: + with open(health_check_file, "r") as f: + last_received_ping = f.read() + + if time() - float(last_received_ping) < 2 * ping_interval_pace: # allow one ping not received every two + exit(0) + except Exception as e: + print(e, file=stderr) + if retry: + sleep(6) + exit(1) + + +if __name__ == '__main__': + health_check() diff --git a/osm_lcm/lcm_helm_conn.py b/osm_lcm/lcm_helm_conn.py new file mode 100644 index 0000000..555613e --- /dev/null +++ b/osm_lcm/lcm_helm_conn.py @@ -0,0 +1,502 @@ +## +# Copyright 2020 Telefonica Investigacion y Desarrollo, S.A.U. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +## +import functools +import yaml +import asyncio +import socket +import uuid + +from grpclib.client import Channel + +from osm_lcm.frontend_pb2 import PrimitiveRequest +from osm_lcm.frontend_pb2 import SshKeyRequest, SshKeyReply +from osm_lcm.frontend_grpc import FrontendExecutorStub + +from n2vc.n2vc_conn import N2VCConnector +from n2vc.k8s_helm_conn import K8sHelmConnector +from n2vc.exceptions import N2VCBadArgumentsException, N2VCException, N2VCExecutionException + +from osm_lcm.lcm_utils import deep_get + + +def retryer(max_wait_time=60, delay_time=10): + def wrapper(func): + retry_exceptions = ( + ConnectionRefusedError + ) + + @functools.wraps(func) + async def wrapped(*args, **kwargs): + wait_time = max_wait_time + while wait_time > 0: + try: + return await func(*args, **kwargs) + except retry_exceptions: + wait_time = wait_time - delay_time + await asyncio.sleep(delay_time) + continue + else: + return ConnectionRefusedError + return wrapped + return wrapper + + +class LCMHelmConn(N2VCConnector): + _KUBECTL_OSM_NAMESPACE = "osm" + _KUBECTL_OSM_CLUSTER_NAME = "_system-osm-k8s" + _EE_SERVICE_PORT = 50050 + + # Time beetween retries + _EE_RETRY_DELAY = 10 + # Initial max retry time + _MAX_INITIAL_RETRY_TIME = 300 + # Other retry time + _MAX_RETRY_TIME = 30 + + def __init__(self, + db: object, + fs: object, + log: object = None, + loop: object = None, + url: str = None, + username: str = None, + vca_config: dict = None, + on_update_db=None, ): + """ + Initialize EE helm connector. + """ + + # parent class constructor + N2VCConnector.__init__( + self, + db=db, + fs=fs, + log=log, + loop=loop, + url=url, + username=username, + vca_config=vca_config, + on_update_db=on_update_db, + ) + + self.log.debug("Initialize helm N2VC connector") + + # TODO - Obtain data from configuration + self._ee_service_port = self._EE_SERVICE_PORT + + self._retry_delay = self._EE_RETRY_DELAY + self._max_retry_time = self._MAX_RETRY_TIME + self._initial_retry_time = self._MAX_INITIAL_RETRY_TIME + + # initialize helm connector + self._k8sclusterhelm = K8sHelmConnector( + kubectl_command=self.vca_config.get("kubectlpath"), + helm_command=self.vca_config.get("helmpath"), + fs=self.fs, + log=self.log, + db=self.db, + on_update_db=None, + ) + + self._system_cluster_id = None + self.log.info("Helm N2VC connector initialized") + + # TODO - ¿reuse_ee_id? + async def create_execution_environment(self, + namespace: str, + db_dict: dict, + reuse_ee_id: str = None, + progress_timeout: float = None, + total_timeout: float = None, + config: dict = None, + artifact_path: str = None, + vca_type: str = None) -> (str, dict): + """ + Creates a new helm execution environment deploying the helm-chat indicated in the + attifact_path + :param str namespace: This param is not used, all helm charts are deployed in the osm + system namespace + :param dict db_dict: where to write to database when the status changes. + It contains a dictionary with {collection: str, filter: {}, path: str}, + e.g. {collection: "nsrs", filter: {_id: , path: + "_admin.deployed.VCA.3"} + :param str reuse_ee_id: ee id from an older execution. TODO - right now this params is not used + :param float progress_timeout: + :param float total_timeout: + :param dict config: General variables to instantiate KDU + :param str artifact_path: path of package content + :param str vca_type: Type of vca, not used as assumed of type helm + :returns str, dict: id of the new execution environment including namespace.helm_id + and credentials object set to None as all credentials should be osm kubernetes .kubeconfig + """ + + self.log.info( + "create_execution_environment: namespace: {}, artifact_path: {}, db_dict: {}, " + "reuse_ee_id: {}".format( + namespace, artifact_path, db_dict, reuse_ee_id) + ) + + # Validate artifact-path is provided + if artifact_path is None or len(artifact_path) == 0: + raise N2VCBadArgumentsException( + message="artifact_path is mandatory", bad_args=["artifact_path"] + ) + + # Validate artifact-path exists + + # remove / in charm path + while artifact_path.find("//") >= 0: + artifact_path = artifact_path.replace("//", "/") + + # check charm path + if self.fs.file_exists(artifact_path): + helm_chart_path = artifact_path + else: + msg = "artifact path does not exist: {}".format(artifact_path) + raise N2VCBadArgumentsException(message=msg, bad_args=["artifact_path"]) + + if artifact_path.startswith("/"): + full_path = self.fs.path + helm_chart_path + else: + full_path = self.fs.path + "/" + helm_chart_path + + try: + # Call helm conn install + # Obtain system cluster id from database + system_cluster_uuid = self._get_system_cluster_id() + # Add parameter osm if exist to global + if config and config.get("osm"): + if not config.get("global"): + config["global"] = {} + config["global"]["osm"] = config.get("osm") + + self.log.debug("install helm chart: {}".format(full_path)) + helm_id = await self._k8sclusterhelm.install(system_cluster_uuid, kdu_model=full_path, + namespace=self._KUBECTL_OSM_NAMESPACE, + params=config, + db_dict=db_dict, + timeout=progress_timeout) + + ee_id = "{}.{}".format(self._KUBECTL_OSM_NAMESPACE, helm_id) + return ee_id, None + except N2VCException: + raise + except Exception as e: + self.log.error("Error deploying chart ee: {}".format(e), exc_info=True) + raise N2VCException("Error deploying chart ee: {}".format(e)) + + async def register_execution_environment(self, namespace: str, credentials: dict, db_dict: dict, + progress_timeout: float = None, total_timeout: float = None) -> str: + # nothing to do + pass + + async def install_configuration_sw(self, + ee_id: str, + artifact_path: str, + db_dict: dict, + progress_timeout: float = None, + total_timeout: float = None, + config: dict = None, + num_units: int = 1, + vca_type: str = None + ): + # nothing to do + pass + + async def add_relation(self, ee_id_1: str, ee_id_2: str, endpoint_1: str, endpoint_2: str): + # nothing to do + pass + + async def remove_relation(self): + # nothing to to + pass + + async def get_status(self, namespace: str, yaml_format: bool = True): + # not used for this connector + pass + + async def get_ee_ssh_public__key(self, ee_id: str, db_dict: dict, progress_timeout: float = None, + total_timeout: float = None) -> str: + """ + Obtains ssh-public key from ee executing GetSShKey method from the ee. + + :param str ee_id: the id of the execution environment returned by + create_execution_environment or register_execution_environment + :param dict db_dict: + :param float progress_timeout: + :param float total_timeout: + :returns: public key of the execution environment + """ + + self.log.info( + "get_ee_ssh_public_key: ee_id: {}, db_dict: {}".format( + ee_id, db_dict) + ) + + # check arguments + if ee_id is None or len(ee_id) == 0: + raise N2VCBadArgumentsException( + message="ee_id is mandatory", bad_args=["ee_id"] + ) + + try: + # Obtain ip_addr for the ee service, it is resolved by dns from the ee name by kubernetes + namespace, helm_id = self._get_ee_id_parts(ee_id) + ip_addr = socket.gethostbyname(helm_id) + + # Obtain ssh_key from the ee, this method will implement retries to allow the ee + # install libraries and start successfully + ssh_key = await self._get_ssh_key(ip_addr) + return ssh_key + except Exception as e: + self.log.error("Error obtaining ee ssh_key: {}".format(e), exc_info=True) + raise N2VCException("Error obtaining ee ssh_ke: {}".format(e)) + + async def exec_primitive(self, ee_id: str, primitive_name: str, params_dict: dict, db_dict: dict = None, + progress_timeout: float = None, total_timeout: float = None) -> str: + """ + Execute a primitive in the execution environment + + :param str ee_id: the one returned by create_execution_environment or + register_execution_environment with the format namespace.helm_id + :param str primitive_name: must be one defined in the software. There is one + called 'config', where, for the proxy case, the 'credentials' of VM are + provided + :param dict params_dict: parameters of the action + :param dict db_dict: where to write into database when the status changes. + It contains a dict with + {collection: , filter: {}, path: }, + e.g. {collection: "nslcmops", filter: + {_id: , path: "_admin.VCA"} + It will be used to store information about intermediate notifications + :param float progress_timeout: + :param float total_timeout: + :returns str: primitive result, if ok. It raises exceptions in case of fail + """ + + self.log.info("exec primitive for ee_id : {}, primitive_name: {}, params_dict: {}, db_dict: {}".format( + ee_id, primitive_name, params_dict, db_dict + )) + + # check arguments + if ee_id is None or len(ee_id) == 0: + raise N2VCBadArgumentsException( + message="ee_id is mandatory", bad_args=["ee_id"] + ) + if primitive_name is None or len(primitive_name) == 0: + raise N2VCBadArgumentsException( + message="action_name is mandatory", bad_args=["action_name"] + ) + if params_dict is None: + params_dict = dict() + + try: + namespace, helm_id = self._get_ee_id_parts(ee_id) + ip_addr = socket.gethostbyname(helm_id) + except Exception as e: + self.log.error("Error getting ee ip ee: {}".format(e)) + raise N2VCException("Error getting ee ip ee: {}".format(e)) + + if primitive_name == "config": + try: + # Execute config primitive, higher timeout to check the case ee is starting + status, detailed_message = await self._execute_config_primitive(ip_addr, params_dict, db_dict=db_dict) + self.log.debug("Executed config primitive ee_id_ {}, status: {}, message: {}".format( + ee_id, status, detailed_message)) + if status != "OK": + self.log.error("Error configuring helm ee, status: {}, message: {}".format( + status, detailed_message)) + raise N2VCExecutionException( + message="Error configuring helm ee_id: {}, status: {}, message: {}: ".format( + ee_id, status, detailed_message + ), + primitive_name=primitive_name, + ) + except Exception as e: + self.log.error("Error configuring helm ee: {}".format(e)) + raise N2VCExecutionException( + message="Error configuring helm ee_id: {}, {}".format( + ee_id, e + ), + primitive_name=primitive_name, + ) + return "CONFIG OK" + else: + try: + # Execute primitive + status, detailed_message = await self._execute_primitive(ip_addr, primitive_name, + params_dict, db_dict=db_dict) + self.log.debug("Executed primitive {} ee_id_ {}, status: {}, message: {}".format( + primitive_name, ee_id, status, detailed_message)) + if status != "OK" and status != "PROCESSING": + self.log.error( + "Execute primitive {} returned not ok status: {}, message: {}".format( + primitive_name, status, detailed_message) + ) + raise N2VCExecutionException( + message="Execute primitive {} returned not ok status: {}, message: {}".format( + primitive_name, status, detailed_message + ), + primitive_name=primitive_name, + ) + except Exception as e: + self.log.error( + "Error executing primitive {}: {}".format(primitive_name, e) + ) + raise N2VCExecutionException( + message="Error executing primitive {} into ee={} : {}".format( + primitive_name, ee_id, e + ), + primitive_name=primitive_name, + ) + return detailed_message + + async def deregister_execution_environments(self): + # nothing to be done + pass + + async def delete_execution_environment(self, ee_id: str, db_dict: dict = None, total_timeout: float = None): + """ + Delete an execution environment + :param str ee_id: id of the execution environment to delete, included namespace.helm_id + :param dict db_dict: where to write into database when the status changes. + It contains a dict with + {collection: , filter: {}, path: }, + e.g. {collection: "nsrs", filter: + {_id: , path: "_admin.deployed.VCA.3"} + :param float total_timeout: + """ + + self.log.info("ee_id: {}".format(ee_id)) + + # check arguments + if ee_id is None: + raise N2VCBadArgumentsException( + message="ee_id is mandatory", bad_args=["ee_id"] + ) + + try: + + # Obtain cluster_uuid + system_cluster_uuid = self._get_system_cluster_id() + + # Get helm_id + namespace, helm_id = self._get_ee_id_parts(ee_id) + + # Uninstall chart + await self._k8sclusterhelm.uninstall(system_cluster_uuid, helm_id) + self.log.info("ee_id: {} deleted".format(ee_id)) + except N2VCException: + raise + except Exception as e: + self.log.error("Error deleting ee id: {}: {}".format(ee_id, e), exc_info=True) + raise N2VCException("Error deleting ee id {}: {}".format(ee_id, e)) + + async def delete_namespace(self, namespace: str, db_dict: dict = None, total_timeout: float = None): + # method not implemented for this connector, execution environments must be deleted individually + pass + + async def install_k8s_proxy_charm( + self, + charm_name: str, + namespace: str, + artifact_path: str, + db_dict: dict, + progress_timeout: float = None, + total_timeout: float = None, + config: dict = None, + ) -> str: + pass + + @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY) + async def _get_ssh_key(self, ip_addr): + channel = Channel(ip_addr, self._ee_service_port) + try: + stub = FrontendExecutorStub(channel) + self.log.debug("get ssh key, ip_addr: {}".format(ip_addr)) + reply: SshKeyReply = await stub.GetSshKey(SshKeyRequest()) + return reply.message + finally: + channel.close() + + @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY) + async def _execute_config_primitive(self, ip_addr, params, db_dict=None): + return await self._execute_primitive_internal(ip_addr, "config", params, db_dict=db_dict) + + @retryer(max_wait_time=_MAX_RETRY_TIME, delay_time=_EE_RETRY_DELAY) + async def _execute_primitive(self, ip_addr, primitive_name, params, db_dict=None): + return await self._execute_primitive_internal(ip_addr, primitive_name, params, db_dict=db_dict) + + async def _execute_primitive_internal(self, ip_addr, primitive_name, params, db_dict=None): + + channel = Channel(ip_addr, self._ee_service_port) + try: + stub = FrontendExecutorStub(channel) + async with stub.RunPrimitive.open() as stream: + primitive_id = str(uuid.uuid1()) + result = None + self.log.debug("Execute primitive internal: id:{}, name:{}, params: {}". + format(primitive_id, primitive_name, params)) + await stream.send_message( + PrimitiveRequest(id=primitive_id, name=primitive_name, params=yaml.dump(params)), end=True) + async for reply in stream: + self.log.debug("Received reply: {}".format(reply)) + result = reply + # If db_dict provided write notifs in database + if db_dict: + self._write_op_detailed_status(db_dict, reply.status, reply.detailed_message) + if result: + return reply.status, reply.detailed_message + else: + return "ERROR", "No result received" + finally: + channel.close() + + def _write_op_detailed_status(self, db_dict, status, detailed_message): + + # write ee_id to database: _admin.deployed.VCA.x + try: + the_table = db_dict["collection"] + the_filter = db_dict["filter"] + update_dict = {"detailed-status": "{}: {}".format(status, detailed_message)} + # self.log.debug('Writing ee_id to database: {}'.format(the_path)) + self.db.set_one( + table=the_table, + q_filter=the_filter, + update_dict=update_dict, + fail_on_empty=True, + ) + except asyncio.CancelledError: + raise + except Exception as e: + self.log.error("Error writing detailedStatus to database: {}".format(e)) + + def _get_system_cluster_id(self): + if not self._system_cluster_id: + db_k8cluster = self.db.get_one("k8sclusters", {"name": self._KUBECTL_OSM_CLUSTER_NAME}) + k8s_hc_id = deep_get(db_k8cluster, ("_admin", "helm-chart", "id")) + if not k8s_hc_id: + self.log.error("osm system cluster has not been properly initialized for helm connector, " + "helm-chart id is not defined") + raise N2VCException("osm system cluster has not been properly initialized for helm connector") + self._system_cluster_id = k8s_hc_id + return self._system_cluster_id + + def _get_ee_id_parts(self, ee_id): + namespace, _, helm_id = ee_id.partition('.') + return namespace, helm_id diff --git a/osm_lcm/lcm_utils.py b/osm_lcm/lcm_utils.py index a83e788..dcea3c0 100644 --- a/osm_lcm/lcm_utils.py +++ b/osm_lcm/lcm_utils.py @@ -49,22 +49,50 @@ def versiontuple(v): return tuple(filled) -def deep_get(target_dict, key_list): +def deep_get(target_dict, key_list, default_value=None): """ Get a value from target_dict entering in the nested keys. If keys does not exist, it returns None Example target_dict={a: {b: 5}}; key_list=[a,b] returns 5; both key_list=[a,b,c] and key_list=[f,h] return None :param target_dict: dictionary to be read :param key_list: list of keys to read from target_dict + :param default_value: value to return if key is not present in the nested dictionary :return: The wanted value if exist, None otherwise """ for key in key_list: if not isinstance(target_dict, dict) or key not in target_dict: - return None + return default_value target_dict = target_dict[key] return target_dict -# LcmBase must be listed before TaskRegistry, as it is a dependency. +def get_iterable(in_dict, in_key): + """ + Similar to .get(), but if value is None, False, ..., An empty tuple is returned instead + :param in_dict: a dictionary + :param in_key: the key to look for at in_dict + :return: in_dict[in_var] or () if it is None or not present + """ + if not in_dict.get(in_key): + return () + return in_dict[in_key] + + +def populate_dict(target_dict, key_list, value): + """ + Update target_dict creating nested dictionaries with the key_list. Last key_list item is asigned the value. + Example target_dict={K: J}; key_list=[a,b,c]; target_dict will be {K: J, a: {b: {c: value}}} + :param target_dict: dictionary to be changed + :param key_list: list of keys to insert at target_dict + :param value: + :return: None + """ + for key in key_list[0:-1]: + if key not in target_dict: + target_dict[key] = {} + target_dict = target_dict[key] + target_dict[key_list[-1]] = value + + class LcmBase: def __init__(self, db, msg, fs, logger): @@ -237,12 +265,10 @@ class TaskRegistry(LcmBase): # Input: op_id, example: 'abc123def:3' Output: account_id='abc123def', op_index=3 def _get_account_and_op_HA(self, op_id): if not op_id: - return (None, None) + return None, None account_id, _, op_index = op_id.rpartition(':') - if not account_id: - return (None, None) - if not op_index.isdigit(): - return (None, None) + if not account_id or not op_index.isdigit(): + return None, None return account_id, op_index # Get '_id' for any topic and operation @@ -333,7 +359,7 @@ class TaskRegistry(LcmBase): return True # Try to lock this task - db_table_name = self.topic2dbtable_dict.get(topic) + db_table_name = self.topic2dbtable_dict[topic] q_filter, update_dict = self._get_dbparams_for_lock_HA(topic, op_type, op_id) db_lock_task = self.db.set_one(db_table_name, q_filter=q_filter, @@ -355,7 +381,7 @@ class TaskRegistry(LcmBase): fail_on_empty=False) return True - def register_HA(self, topic, op_type, op_id, operationState, detailed_status): + def unlock_HA(self, topic, op_type, op_id, operationState, detailed_status): """ Register a task, done when finished a VIM/WIM/SDN 'create' operation. :param topic: Can be "vim", "wim", or "sdn" @@ -365,19 +391,21 @@ class TaskRegistry(LcmBase): """ # Backward compatibility - if not self._is_account_type_HA(topic) or (self._is_account_type_HA(topic) and op_id is None): + if not self._is_account_type_HA(topic) or not op_id: return # Get Account ID and Operation Index account_id, op_index = self._get_account_and_op_HA(op_id) - db_table_name = self.topic2dbtable_dict.get(topic) + db_table_name = self.topic2dbtable_dict[topic] # If this is a 'delete' operation, the account may have been deleted (SUCCESS) or may still exist (FAILED) # If the account exist, register the HA task. # Update DB for HA tasks q_filter = {'_id': account_id} update_dict = {'_admin.operations.{}.operationState'.format(op_index): operationState, - '_admin.operations.{}.detailed-status'.format(op_index): detailed_status} + '_admin.operations.{}.detailed-status'.format(op_index): detailed_status, + '_admin.operations.{}.worker'.format(op_index): None, + '_admin.current_operation': None} self.db.set_one(db_table_name, q_filter=q_filter, update_dict=update_dict, @@ -433,7 +461,7 @@ class TaskRegistry(LcmBase): q_filter = {'_id': _id} # NS/NSI if self._is_service_type_HA(topic): - update_dict = {'detailed-status': step} + update_dict = {'detailed-status': step, 'queuePosition': new_num_related_tasks} # VIM/WIM/SDN elif self._is_account_type_HA(topic): _, op_index = self._get_account_and_op_HA(op_id) diff --git a/osm_lcm/netslice.py b/osm_lcm/netslice.py index d284304..6f4e547 100644 --- a/osm_lcm/netslice.py +++ b/osm_lcm/netslice.py @@ -17,9 +17,8 @@ import asyncio import logging import logging.handlers import traceback -from osm_lcm.ns import populate_dict as populate_dict -from osm_lcm import ROclient, ns -from osm_lcm.lcm_utils import LcmException, LcmBase +from osm_lcm import ROclient +from osm_lcm.lcm_utils import LcmException, LcmBase, populate_dict, get_iterable, deep_get from osm_common.dbbase import DbException from time import time from copy import deepcopy @@ -28,23 +27,11 @@ from copy import deepcopy __author__ = "Felipe Vicens, Pol Alemany, Alfonso Tierno" -def get_iterable(in_dict, in_key): - """ - Similar to .get(), but if value is None, False, ..., An empty tuple is returned instead - :param in_dict: a dictionary - :param in_key: the key to look for at in_dict - :return: in_dict[in_var] or () if it is None or not present - """ - if not in_dict.get(in_key): - return () - return in_dict[in_key] - - class NetsliceLcm(LcmBase): - total_deploy_timeout = 2 * 3600 # global timeout for deployment + timeout_nsi_deploy = 2 * 3600 # default global timeout for deployment a nsi - def __init__(self, db, msg, fs, lcm_tasks, ro_config, vca_config, loop): + def __init__(self, db, msg, fs, lcm_tasks, config, loop, ns): """ Init, Connect to database, filesystem storage, and messaging :param config: two level dictionary with configuration. Top level should contain 'database', 'storage', @@ -54,8 +41,9 @@ class NetsliceLcm(LcmBase): self.logger = logging.getLogger('lcm.netslice') self.loop = loop self.lcm_tasks = lcm_tasks - self.ns = ns.NsLcm(db, msg, fs, lcm_tasks, ro_config, vca_config, loop) - self.ro_config = ro_config + self.ns = ns + self.ro_config = config["ro_config"] + self.timeout = config["timeout"] super().__init__(db, msg, fs, self.logger) @@ -160,7 +148,7 @@ class NetsliceLcm(LcmBase): break # Creating netslice-vld at RO - RO_nsir = db_nsir["_admin"].get("deployed", {}).get("RO", []) + RO_nsir = deep_get(db_nsir, ("_admin", "deployed", "RO"), []) if vld_id in RO_nsir: db_nsir_update["_admin.deployed.RO"] = RO_nsir @@ -260,7 +248,7 @@ class NetsliceLcm(LcmBase): if nss_cp_item["nss-ref"] == nss["nss-id"]: db_nsds = self.db.get_one("nsds", {"_id": nss["nsdId"]}) # Go for nsd, and search the CP that match with nst:CP to get vld-id-ref - for cp_nsd in db_nsds["connection-point"]: + for cp_nsd in db_nsds.get("connection-point", ()): if cp_nsd["name"] == nss_cp_item["nsd-connection-point-ref"]: if nslcmop.get("operationParams"): if nslcmop["operationParams"].get("nsName") == nss["nsName"]: @@ -286,6 +274,13 @@ class NetsliceLcm(LcmBase): step = "Getting nsilcmop={} from db".format(nsilcmop_id) db_nsilcmop = self.db.get_one("nsilcmops", {"_id": nsilcmop_id}) + start_deploy = time() + nsi_params = db_nsilcmop.get("operationParams") + if nsi_params and nsi_params.get("timeout_nsi_deploy"): + timeout_nsi_deploy = nsi_params["timeout_nsi_deploy"] + else: + timeout_nsi_deploy = self.timeout.get("nsi_deploy", self.timeout_nsi_deploy) + # Empty list to keep track of network service records status in the netslice nsir_admin = db_nsir_admin = db_nsir.get("_admin") @@ -293,19 +288,19 @@ class NetsliceLcm(LcmBase): # Slice status Creating db_nsir_update["detailed-status"] = "creating" db_nsir_update["operational-status"] = "init" - self.update_db_2("nsis", nsir_id, db_nsir_update) + db_nsir_update["_admin.nsiState"] = "INSTANTIATED" - step = "Creating netslice VLDs before NS instantiation" + step = "Instantiating netslice VLDs before NS instantiation" # Creating netslice VLDs networking before NS instantiation - db_nsir_update["detailed-status"] = "Creating netslice-vld at RO" + db_nsir_update["detailed-status"] = step self.update_db_2("nsis", nsir_id, db_nsir_update) db_nsir_update["_admin.deployed.RO"] = db_nsir_admin["deployed"]["RO"] for vld_item in get_iterable(nsir_admin, "netslice-vld"): await netslice_scenario_create(self, vld_item, nsir_id, db_nsir, db_nsir_admin, db_nsir_update) self.update_db_2("nsis", nsir_id, db_nsir_update) - step = "Creating netslice subnets at RO" - db_nsir_update["detailed-status"] = "Creating netslice subnets at RO" + step = "Instantiating netslice subnets" + db_nsir_update["detailed-status"] = step self.update_db_2("nsis", nsir_id, db_nsir_update) db_nsir = self.db.get_one("nsis", {"_id": nsir_id}) @@ -321,9 +316,6 @@ class NetsliceLcm(LcmBase): # self.update_db_2("nsis", nsir_id, db_nsir_update) # Iterate over the network services operation ids to instantiate NSs - # TODO: (future improvement) look another way check the tasks instead of keep asking - # -> https://docs.python.org/3/library/asyncio-task.html#waiting-primitives - # steps: declare ns_tasks, add task when terminate is called, await asyncio.wait(vca_task_list, timeout=300) step = "Instantiating Netslice Subnets" db_nsir = self.db.get_one("nsis", {"_id": nsir_id}) nslcmop_ids = db_nsilcmop["operationParams"].get("nslcmops_ids") @@ -336,17 +328,15 @@ class NetsliceLcm(LcmBase): self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "ns_instantiate", task) # Wait until Network Slice is ready - step = nsir_status_detailed = " Waiting nsi ready. nsi_id={}".format(nsir_id) + step = " Waiting nsi ready." nsrs_detailed_list_old = None self.logger.debug(logging_text + step) - # TODO: substitute while for await (all task to be done or not) - deployment_timeout = 2 * 3600 # Two hours - while deployment_timeout > 0: + # For HA, it is checked from database, as the ns operation may be managed by other LCM worker + while time() <= start_deploy + timeout_nsi_deploy: # Check ns instantiation status nsi_ready = True nsir = self.db.get_one("nsis", {"_id": nsir_id}) - nsir_admin = nsir["_admin"] nsrs_detailed_list = nsir["_admin"]["nsrs-detailed-list"] nsrs_detailed_list_new = [] for nslcmop_item in nslcmop_ids: @@ -356,8 +346,7 @@ class NetsliceLcm(LcmBase): for nss in nsrs_detailed_list: if nss["nsrId"] == nslcmop["nsInstanceId"]: nss.update({"nsrId": nslcmop["nsInstanceId"], "status": nslcmop["operationState"], - "detailed-status": - nsir_status_detailed + "; {}".format(nslcmop.get("detailed-status")), + "detailed-status": nslcmop.get("detailed-status"), "instantiated": True}) nsrs_detailed_list_new.append(nss) if status not in ["COMPLETED", "PARTIALLY_COMPLETED", "FAILED", "FAILED_TEMP"]: @@ -368,18 +357,22 @@ class NetsliceLcm(LcmBase): self.update_db_2("nsis", nsir_id, {"_admin.nsrs-detailed-list": nsrs_detailed_list_new}) if nsi_ready: - step = "Network Slice Instance is ready. nsi_id={}".format(nsir_id) - for items in nsrs_detailed_list: - if "FAILED" in items.values(): - raise LcmException("Error deploying NSI: {}".format(nsir_id)) + error_list = [] + step = "Network Slice Instance instantiated" + for nss in nsrs_detailed_list: + if nss["status"] in ("FAILED", "FAILED_TEMP"): + error_list.append("NS {} {}: {}".format(nss["nsrId"], nss["status"], + nss["detailed-status"])) + if error_list: + step = "instantiating" + raise LcmException("; ".join(error_list)) break # TODO: future improvement due to synchronism -> await asyncio.wait(vca_task_list, timeout=300) await asyncio.sleep(5, loop=self.loop) - deployment_timeout -= 5 - if deployment_timeout <= 0: - raise LcmException("Timeout waiting nsi to be ready. nsi_id={}".format(nsir_id)) + else: # timeout_nsi_deploy reached: + raise LcmException("Timeout waiting nsi to be ready.") db_nsir_update["operational-status"] = "running" db_nsir_update["detailed-status"] = "done" @@ -404,13 +397,13 @@ class NetsliceLcm(LcmBase): if db_nsir: db_nsir_update["detailed-status"] = "ERROR {}: {}".format(step, exc) db_nsir_update["operational-status"] = "failed" + db_nsir_update["config-status"] = "configured" if db_nsilcmop: db_nsilcmop_update["detailed-status"] = "FAILED {}: {}".format(step, exc) db_nsilcmop_update["operationState"] = nsilcmop_operation_state = "FAILED" db_nsilcmop_update["statusEnteredTime"] = time() try: if db_nsir: - db_nsir_update["_admin.nsiState"] = "INSTANTIATED" db_nsir_update["_admin.nsilcmop"] = None self.update_db_2("nsis", nsir_id, db_nsir_update) if db_nsilcmop: diff --git a/osm_lcm/ng_ro.py b/osm_lcm/ng_ro.py new file mode 100644 index 0000000..6e9f683 --- /dev/null +++ b/osm_lcm/ng_ro.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +## +# Copyright 2020 Telefónica Investigación y Desarrollo, S.A.U. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +## + +""" +asyncio RO python client to interact with New Generation RO server +""" + +import asyncio +import aiohttp +import yaml +import logging + +__author__ = "Alfonso Tierno = 300: + raise NgRoException(response_text, http_code=response.status) + return self._parse_yaml(response_text, response=True) + except (aiohttp.ClientOSError, aiohttp.ClientError) as e: + raise NgRoException(e, http_code=504) + except asyncio.TimeoutError: + raise NgRoException("Timeout", http_code=504) + + async def status(self, nsr_id, action_id): + try: + url = "{}/ns/v1/deploy/{nsr_id}/{action_id}".format(self.endpoint_url, nsr_id=nsr_id, action_id=action_id) + async with aiohttp.ClientSession(loop=self.loop) as session: + self.logger.debug("GET %s", url) + # timeout = aiohttp.ClientTimeout(total=self.timeout_short) + async with session.get(url, headers=self.headers_req) as response: + response_text = await response.read() + self.logger.debug("GET {} [{}] {}".format(url, response.status, response_text[:100])) + if response.status >= 300: + raise NgRoException(response_text, http_code=response.status) + return self._parse_yaml(response_text, response=True) + + except (aiohttp.ClientOSError, aiohttp.ClientError) as e: + raise NgRoException(e, http_code=504) + except asyncio.TimeoutError: + raise NgRoException("Timeout", http_code=504) + + async def delete(self, nsr_id): + try: + url = "{}/ns/v1/deploy/{nsr_id}".format(self.endpoint_url, nsr_id=nsr_id) + async with aiohttp.ClientSession(loop=self.loop) as session: + self.logger.debug("DELETE %s", url) + # timeout = aiohttp.ClientTimeout(total=self.timeout_short) + async with session.delete(url, headers=self.headers_req) as response: + self.logger.debug("DELETE {} [{}]".format(url, response.status)) + if response.status >= 300: + raise NgRoException("Delete {}".format(nsr_id), http_code=response.status) + return + + except (aiohttp.ClientOSError, aiohttp.ClientError) as e: + raise NgRoException(e, http_code=504) + except asyncio.TimeoutError: + raise NgRoException("Timeout", http_code=504) + + async def get_version(self): + """ + Obtain RO server version. + :return: a list with integers ["major", "minor", "release"]. Raises NgRoException on Error, + """ + try: + response_text = "" + async with aiohttp.ClientSession(loop=self.loop) as session: + url = "{}/version".format(self.endpoint_url) + self.logger.debug("RO GET %s", url) + # timeout = aiohttp.ClientTimeout(total=self.timeout_short) + async with session.get(url, headers=self.headers_req) as response: + response_text = await response.read() + self.logger.debug("GET {} [{}] {}".format(url, response.status, response_text[:100])) + if response.status >= 300: + raise NgRoException(response_text, http_code=response.status) + + for word in str(response_text).split(" "): + if "." in word: + version_text, _, _ = word.partition("-") + return version_text + raise NgRoException("Got invalid version text: '{}'".format(response_text), http_code=500) + except (aiohttp.ClientOSError, aiohttp.ClientError) as e: + raise NgRoException(e, http_code=504) + except asyncio.TimeoutError: + raise NgRoException("Timeout", http_code=504) + except Exception as e: + raise NgRoException("Got invalid version text: '{}'; causing exception {}".format(response_text, e), + http_code=500) + + @staticmethod + def _parse_yaml(descriptor, response=False): + try: + return yaml.safe_load(descriptor) + except yaml.YAMLError as exc: + error_pos = "" + if hasattr(exc, 'problem_mark'): + mark = exc.problem_mark + error_pos = " at line:{} column:{}s".format(mark.line + 1, mark.column + 1) + error_text = "yaml format error" + error_pos + if response: + raise NgRoException("reponse with " + error_text) + raise NgRoException(error_text) diff --git a/osm_lcm/ns.py b/osm_lcm/ns.py index de1c6a0..a83495a 100644 --- a/osm_lcm/ns.py +++ b/osm_lcm/ns.py @@ -22,10 +22,11 @@ import logging import logging.handlers import traceback import json -from jinja2 import Environment, Template, meta, TemplateError, TemplateNotFound, TemplateSyntaxError +from jinja2 import Environment, TemplateError, TemplateNotFound, StrictUndefined, UndefinedError from osm_lcm import ROclient -from osm_lcm.lcm_utils import LcmException, LcmExceptionNoMgmtIP, LcmBase, deep_get +from osm_lcm.ng_ro import NgRoClient, NgRoException +from osm_lcm.lcm_utils import LcmException, LcmExceptionNoMgmtIP, LcmBase, deep_get, get_iterable, populate_dict from n2vc.k8s_helm_conn import K8sHelmConnector from n2vc.k8s_juju_conn import K8sJujuConnector @@ -33,55 +34,63 @@ from osm_common.dbbase import DbException from osm_common.fsbase import FsException from n2vc.n2vc_juju_conn import N2VCJujuConnector -from n2vc.exceptions import N2VCException +from n2vc.exceptions import N2VCException, N2VCNotFound, K8sException + +from osm_lcm.lcm_helm_conn import LCMHelmConn from copy import copy, deepcopy from http import HTTPStatus from time import time from uuid import uuid4 -__author__ = "Alfonso Tierno" - - -def get_iterable(in_dict, in_key): - """ - Similar to .get(), but if value is None, False, ..., An empty tuple is returned instead - :param in_dict: a dictionary - :param in_key: the key to look for at in_dict - :return: in_dict[in_var] or () if it is None or not present - """ - if not in_dict.get(in_key): - return () - return in_dict[in_key] - - -def populate_dict(target_dict, key_list, value): - """ - Update target_dict creating nested dictionaries with the key_list. Last key_list item is asigned the value. - Example target_dict={K: J}; key_list=[a,b,c]; target_dict will be {K: J, a: {b: {c: value}}} - :param target_dict: dictionary to be changed - :param key_list: list of keys to insert at target_dict - :param value: - :return: None - """ - for key in key_list[0:-1]: - if key not in target_dict: - target_dict[key] = {} - target_dict = target_dict[key] - target_dict[key_list[-1]] = value +from random import randint + +__author__ = "Alfonso Tierno " + + +class N2VCJujuConnectorLCM(N2VCJujuConnector): + + async def create_execution_environment(self, namespace: str, db_dict: dict, reuse_ee_id: str = None, + progress_timeout: float = None, total_timeout: float = None, + config: dict = None, artifact_path: str = None, + vca_type: str = None) -> (str, dict): + # admit two new parameters, artifact_path and vca_type + if vca_type == "k8s_proxy_charm": + ee_id = await self.install_k8s_proxy_charm( + charm_name=artifact_path[artifact_path.rfind("/") + 1:], + namespace=namespace, + artifact_path=artifact_path, + db_dict=db_dict) + return ee_id, None + else: + return await super().create_execution_environment( + namespace=namespace, db_dict=db_dict, reuse_ee_id=reuse_ee_id, + progress_timeout=progress_timeout, total_timeout=total_timeout) + + async def install_configuration_sw(self, ee_id: str, artifact_path: str, db_dict: dict, + progress_timeout: float = None, total_timeout: float = None, + config: dict = None, num_units: int = 1, vca_type: str = "lxc_proxy_charm"): + if vca_type == "k8s_proxy_charm": + return + return await super().install_configuration_sw( + ee_id=ee_id, artifact_path=artifact_path, db_dict=db_dict, progress_timeout=progress_timeout, + total_timeout=total_timeout, config=config, num_units=num_units) class NsLcm(LcmBase): timeout_vca_on_error = 5 * 60 # Time for charm from first time at blocked,error status to mark as failed - total_deploy_timeout = 2 * 3600 # global timeout for deployment + timeout_ns_deploy = 2 * 3600 # default global timeout for deployment a ns + timeout_ns_terminate = 1800 # default global timeout for un deployment a ns timeout_charm_delete = 10 * 60 - timeout_primitive = 10 * 60 # timeout for primitive execution + timeout_primitive = 30 * 60 # timeout for primitive execution + timeout_progress_primitive = 10 * 60 # timeout for some progress in a primitive execution SUBOPERATION_STATUS_NOT_FOUND = -1 SUBOPERATION_STATUS_NEW = -2 SUBOPERATION_STATUS_SKIP = -3 + task_name_deploy_vca = "Deploying VCA" - def __init__(self, db, msg, fs, lcm_tasks, ro_config, vca_config, loop): + def __init__(self, db, msg, fs, lcm_tasks, config, loop, prometheus=None): """ Init, Connect to database, filesystem storage, and messaging :param config: two level dictionary with configuration. Top level should contain 'database', 'storage', @@ -96,17 +105,13 @@ class NsLcm(LcmBase): self.loop = loop self.lcm_tasks = lcm_tasks - self.ro_config = ro_config - self.vca_config = vca_config - if 'pubkey' in self.vca_config: - self.vca_config['public_key'] = self.vca_config['pubkey'] - if 'cacert' in self.vca_config: - self.vca_config['ca_cert'] = self.vca_config['cacert'] - if 'apiproxy' in self.vca_config: - self.vca_config['api_proxy'] = self.vca_config['apiproxy'] + self.timeout = config["timeout"] + self.ro_config = config["ro_config"] + self.ng_ro = config["ro_config"].get("ng") + self.vca_config = config["VCA"].copy() # create N2VC connector - self.n2vc = N2VCJujuConnector( + self.n2vc = N2VCJujuConnectorLCM( db=self.db, fs=self.fs, log=self.logger, @@ -114,9 +119,18 @@ class NsLcm(LcmBase): url='{}:{}'.format(self.vca_config['host'], self.vca_config['port']), username=self.vca_config.get('user', None), vca_config=self.vca_config, - on_update_db=self._on_update_n2vc_db, - # ca_cert=self.vca_config.get('cacert'), - # api_proxy=self.vca_config.get('apiproxy'), + on_update_db=self._on_update_n2vc_db + ) + + self.conn_helm_ee = LCMHelmConn( + db=self.db, + fs=self.fs, + log=self.logger, + loop=self.loop, + url=None, + username=None, + vca_config=self.vca_config, + on_update_db=self._on_update_n2vc_db ) self.k8sclusterhelm = K8sHelmConnector( @@ -134,38 +148,200 @@ class NsLcm(LcmBase): fs=self.fs, log=self.logger, db=self.db, + loop=self.loop, on_update_db=None, + vca_config=self.vca_config, ) + self.k8scluster_map = { + "helm-chart": self.k8sclusterhelm, + "chart": self.k8sclusterhelm, + "juju-bundle": self.k8sclusterjuju, + "juju": self.k8sclusterjuju, + } + + self.vca_map = { + "lxc_proxy_charm": self.n2vc, + "native_charm": self.n2vc, + "k8s_proxy_charm": self.n2vc, + "helm": self.conn_helm_ee + } + + self.prometheus = prometheus + # create RO client - self.RO = ROclient.ROClient(self.loop, **self.ro_config) + if self.ng_ro: + self.RO = NgRoClient(self.loop, **self.ro_config) + else: + self.RO = ROclient.ROClient(self.loop, **self.ro_config) - def _on_update_n2vc_db(self, table, filter, path, updated_data): + def _on_update_ro_db(self, nsrs_id, ro_descriptor): - self.logger.debug('_on_update_n2vc_db(table={}, filter={}, path={}, updated_data={}' - .format(table, filter, path, updated_data)) + # self.logger.debug('_on_update_ro_db(nsrs_id={}'.format(nsrs_id)) - return - # write NS status to database - # try: - # # nsrs_id = filter.get('_id') - # # print(nsrs_id) - # # get ns record - # nsr = self.db.get_one(table=table, q_filter=filter) - # # get VCA deployed list - # vca_list = deep_get(target_dict=nsr, key_list=('_admin', 'deployed', 'VCA')) - # # get RO deployed - # # ro_list = deep_get(target_dict=nsr, key_list=('_admin', 'deployed', 'RO')) - # for vca in vca_list: - # # status = vca.get('status') - # # print(status) - # # detailed_status = vca.get('detailed-status') - # # print(detailed_status) - # # for ro in ro_list: - # # print(ro) - # - # except Exception as e: - # self.logger.error('Error writing NS status to db: {}'.format(e)) + try: + # TODO filter RO descriptor fields... + + # write to database + db_dict = dict() + # db_dict['deploymentStatus'] = yaml.dump(ro_descriptor, default_flow_style=False, indent=2) + db_dict['deploymentStatus'] = ro_descriptor + self.update_db_2("nsrs", nsrs_id, db_dict) + + except Exception as e: + self.logger.warn('Cannot write database RO deployment for ns={} -> {}'.format(nsrs_id, e)) + + async def _on_update_n2vc_db(self, table, filter, path, updated_data): + + # remove last dot from path (if exists) + if path.endswith('.'): + path = path[:-1] + + # self.logger.debug('_on_update_n2vc_db(table={}, filter={}, path={}, updated_data={}' + # .format(table, filter, path, updated_data)) + + try: + + nsr_id = filter.get('_id') + + # read ns record from database + nsr = self.db.get_one(table='nsrs', q_filter=filter) + current_ns_status = nsr.get('nsState') + + # get vca status for NS + status_dict = await self.n2vc.get_status(namespace='.' + nsr_id, yaml_format=False) + + # vcaStatus + db_dict = dict() + db_dict['vcaStatus'] = status_dict + + # update configurationStatus for this VCA + try: + vca_index = int(path[path.rfind(".")+1:]) + + vca_list = deep_get(target_dict=nsr, key_list=('_admin', 'deployed', 'VCA')) + vca_status = vca_list[vca_index].get('status') + + configuration_status_list = nsr.get('configurationStatus') + config_status = configuration_status_list[vca_index].get('status') + + if config_status == 'BROKEN' and vca_status != 'failed': + db_dict['configurationStatus'][vca_index] = 'READY' + elif config_status != 'BROKEN' and vca_status == 'failed': + db_dict['configurationStatus'][vca_index] = 'BROKEN' + except Exception as e: + # not update configurationStatus + self.logger.debug('Error updating vca_index (ignore): {}'.format(e)) + + # if nsState = 'READY' check if juju is reporting some error => nsState = 'DEGRADED' + # if nsState = 'DEGRADED' check if all is OK + is_degraded = False + if current_ns_status in ('READY', 'DEGRADED'): + error_description = '' + # check machines + if status_dict.get('machines'): + for machine_id in status_dict.get('machines'): + machine = status_dict.get('machines').get(machine_id) + # check machine agent-status + if machine.get('agent-status'): + s = machine.get('agent-status').get('status') + if s != 'started': + is_degraded = True + error_description += 'machine {} agent-status={} ; '.format(machine_id, s) + # check machine instance status + if machine.get('instance-status'): + s = machine.get('instance-status').get('status') + if s != 'running': + is_degraded = True + error_description += 'machine {} instance-status={} ; '.format(machine_id, s) + # check applications + if status_dict.get('applications'): + for app_id in status_dict.get('applications'): + app = status_dict.get('applications').get(app_id) + # check application status + if app.get('status'): + s = app.get('status').get('status') + if s != 'active': + is_degraded = True + error_description += 'application {} status={} ; '.format(app_id, s) + + if error_description: + db_dict['errorDescription'] = error_description + if current_ns_status == 'READY' and is_degraded: + db_dict['nsState'] = 'DEGRADED' + if current_ns_status == 'DEGRADED' and not is_degraded: + db_dict['nsState'] = 'READY' + + # write to database + self.update_db_2("nsrs", nsr_id, db_dict) + + except (asyncio.CancelledError, asyncio.TimeoutError): + raise + except Exception as e: + self.logger.warn('Error updating NS state for ns={}: {}'.format(nsr_id, e)) + + @staticmethod + def _parse_cloud_init(cloud_init_text, additional_params, vnfd_id, vdu_id): + try: + env = Environment(undefined=StrictUndefined) + template = env.from_string(cloud_init_text) + return template.render(additional_params or {}) + except UndefinedError as e: + raise LcmException("Variable {} at vnfd[id={}]:vdu[id={}]:cloud-init/cloud-init-" + "file, must be provided in the instantiation parameters inside the " + "'additionalParamsForVnf/Vdu' block".format(e, vnfd_id, vdu_id)) + except (TemplateError, TemplateNotFound) as e: + raise LcmException("Error parsing Jinja2 to cloud-init content at vnfd[id={}]:vdu[id={}]: {}". + format(vnfd_id, vdu_id, e)) + + def _get_cloud_init(self, vdu, vnfd): + try: + cloud_init_content = cloud_init_file = None + if vdu.get("cloud-init-file"): + base_folder = vnfd["_admin"]["storage"] + cloud_init_file = "{}/{}/cloud_init/{}".format(base_folder["folder"], base_folder["pkg-dir"], + vdu["cloud-init-file"]) + with self.fs.file_open(cloud_init_file, "r") as ci_file: + cloud_init_content = ci_file.read() + elif vdu.get("cloud-init"): + cloud_init_content = vdu["cloud-init"] + + return cloud_init_content + except FsException as e: + raise LcmException("Error reading vnfd[id={}]:vdu[id={}]:cloud-init-file={}: {}". + format(vnfd["id"], vdu["id"], cloud_init_file, e)) + + def _get_osm_params(self, db_vnfr, vdu_id=None, vdu_count_index=0): + osm_params = {x.replace("-", "_"): db_vnfr[x] for x in ("ip-address", "vim-account-id", "vnfd-id", "vnfd-ref") + if db_vnfr.get(x) is not None} + osm_params["ns_id"] = db_vnfr["nsr-id-ref"] + osm_params["vnf_id"] = db_vnfr["_id"] + osm_params["member_vnf_index"] = db_vnfr["member-vnf-index-ref"] + if db_vnfr.get("vdur"): + osm_params["vdu"] = {} + for vdur in db_vnfr["vdur"]: + vdu = { + "count_index": vdur["count-index"], + "vdu_id": vdur["vdu-id-ref"], + "interfaces": {} + } + if vdur.get("ip-address"): + vdu["ip_address"] = vdur["ip-address"] + for iface in vdur["interfaces"]: + vdu["interfaces"][iface["name"]] = \ + {x.replace("-", "_"): iface[x] for x in ("mac-address", "ip-address", "vnf-vld-id", "name") + if iface.get(x) is not None} + vdu_id_index = "{}-{}".format(vdur["vdu-id-ref"], vdur["count-index"]) + osm_params["vdu"][vdu_id_index] = vdu + if vdu_id: + osm_params["vdu_id"] = vdu_id + osm_params["count_index"] = vdu_count_index + return osm_params + + def _get_vdu_additional_params(self, db_vnfr, vdu_id): + vdur = next(vdur for vdur in db_vnfr.get("vdur") if vdu_id == vdur["vdu-id-ref"]) + additional_params = vdur.get("additionalParams") + return self._format_additional_params(additional_params) def vnfd2RO(self, vnfd, new_id=None, additionalParams=None, nsrId=None): """ @@ -176,59 +352,30 @@ class NsLcm(LcmBase): :param nsrId: Id of the NSR :return: copy of vnfd """ - try: - vnfd_RO = deepcopy(vnfd) - # remove unused by RO configuration, monitoring, scaling and internal keys - vnfd_RO.pop("_id", None) - vnfd_RO.pop("_admin", None) - vnfd_RO.pop("vnf-configuration", None) - vnfd_RO.pop("monitoring-param", None) - vnfd_RO.pop("scaling-group-descriptor", None) - vnfd_RO.pop("kdu", None) - vnfd_RO.pop("k8s-cluster", None) - if new_id: - vnfd_RO["id"] = new_id - - # parse cloud-init or cloud-init-file with the provided variables using Jinja2 - for vdu in get_iterable(vnfd_RO, "vdu"): - cloud_init_file = None - if vdu.get("cloud-init-file"): - base_folder = vnfd["_admin"]["storage"] - cloud_init_file = "{}/{}/cloud_init/{}".format(base_folder["folder"], base_folder["pkg-dir"], - vdu["cloud-init-file"]) - with self.fs.file_open(cloud_init_file, "r") as ci_file: - cloud_init_content = ci_file.read() - vdu.pop("cloud-init-file", None) - elif vdu.get("cloud-init"): - cloud_init_content = vdu["cloud-init"] - else: - continue - - env = Environment() - ast = env.parse(cloud_init_content) - mandatory_vars = meta.find_undeclared_variables(ast) - if mandatory_vars: - for var in mandatory_vars: - if not additionalParams or var not in additionalParams.keys(): - raise LcmException("Variable '{}' defined at vnfd[id={}]:vdu[id={}]:cloud-init/cloud-init-" - "file, must be provided in the instantiation parameters inside the " - "'additionalParamsForVnf' block".format(var, vnfd["id"], vdu["id"])) - template = Template(cloud_init_content) - cloud_init_content = template.render(additionalParams or {}) - vdu["cloud-init"] = cloud_init_content - - return vnfd_RO - except FsException as e: - raise LcmException("Error reading vnfd[id={}]:vdu[id={}]:cloud-init-file={}: {}". - format(vnfd["id"], vdu["id"], cloud_init_file, e)) - except (TemplateError, TemplateNotFound, TemplateSyntaxError) as e: - raise LcmException("Error parsing Jinja2 to cloud-init content at vnfd[id={}]:vdu[id={}]: {}". - format(vnfd["id"], vdu["id"], e)) - - def ns_params_2_RO(self, ns_params, nsd, vnfd_dict, n2vc_key_list): + vnfd_RO = deepcopy(vnfd) + # remove unused by RO configuration, monitoring, scaling and internal keys + vnfd_RO.pop("_id", None) + vnfd_RO.pop("_admin", None) + vnfd_RO.pop("vnf-configuration", None) + vnfd_RO.pop("monitoring-param", None) + vnfd_RO.pop("scaling-group-descriptor", None) + vnfd_RO.pop("kdu", None) + vnfd_RO.pop("k8s-cluster", None) + if new_id: + vnfd_RO["id"] = new_id + + # parse cloud-init or cloud-init-file with the provided variables using Jinja2 + for vdu in get_iterable(vnfd_RO, "vdu"): + vdu.pop("cloud-init-file", None) + vdu.pop("cloud-init", None) + return vnfd_RO + + def _ns_params_2_RO(self, ns_params, nsd, vnfd_dict, db_vnfrs, n2vc_key_list): """ Creates a RO ns descriptor from OSM ns_instantiate params :param ns_params: OSM instantiate params + :param vnfd_dict: database content of vnfds, indexed by id (not _id). {id: {vnfd_object}, ...} + :param db_vnfrs: database content of vnfrs, indexed by member-vnf-index. {member-vnf-index: {vnfr_object}, ...} :return: The RO ns descriptor """ vim_2_RO = {} @@ -290,6 +437,13 @@ class NsLcm(LcmBase): "wim_account": wim_account_2_RO(ns_params.get("wimAccountId")), # "scenario": ns_params["nsdId"], } + # set vim_account of each vnf if different from general vim_account. + # Get this information from database content, key vim-account-id + # Vim account can be set by placement_engine and it may be different from + # the instantiate parameters (vnfs.member-vnf-index.datacenter). + for vnf_index, vnfr in db_vnfrs.items(): + if vnfr.get("vim-account-id") and vnfr["vim-account-id"] != ns_params["vimAccountId"]: + populate_dict(RO_ns_params, ("vnfs", vnf_index, "datacenter"), vim_account_2_RO(vnfr["vim-account-id"])) n2vc_key_list = n2vc_key_list or [] for vnfd_ref, vnfd in vnfd_dict.items(): @@ -324,6 +478,25 @@ class NsLcm(LcmBase): populate_dict(RO_ns_params, ("vnfs", vnf_member["member-vnf-index"], "vdus", vdu, "mgmt_keys"), n2vc_key_list) + # cloud init + for vdu in get_iterable(vnfd, "vdu"): + cloud_init_text = self._get_cloud_init(vdu, vnfd) + if not cloud_init_text: + continue + for vnf_member in nsd.get("constituent-vnfd"): + if vnf_member["vnfd-id-ref"] != vnfd_ref: + continue + db_vnfr = db_vnfrs[vnf_member["member-vnf-index"]] + additional_params = self._get_vdu_additional_params(db_vnfr, vdu["id"]) or {} + + cloud_init_list = [] + for vdu_index in range(0, int(vdu.get("count", 1))): + additional_params["OSM"] = self._get_osm_params(db_vnfr, vdu["id"], vdu_index) + cloud_init_list.append(self._parse_cloud_init(cloud_init_text, additional_params, vnfd["id"], + vdu["id"])) + populate_dict(RO_ns_params, + ("vnfs", vnf_member["member-vnf-index"], "vdus", vdu["id"], "cloud_init"), + cloud_init_list) if ns_params.get("vduImage"): RO_ns_params["vduImage"] = ns_params["vduImage"] @@ -338,9 +511,6 @@ class NsLcm(LcmBase): else: raise LcmException("Invalid instantiate parameter vnf:member-vnf-index={} is not present at nsd:" "constituent-vnfd".format(vnf_params["member-vnf-index"])) - if vnf_params.get("vimAccountId"): - populate_dict(RO_ns_params, ("vnfs", vnf_params["member-vnf-index"], "datacenter"), - vim_account_2_RO(vnf_params["vimAccountId"])) for vdu_params in get_iterable(vnf_params, "vdu"): # TODO feature 1417: check that this VDU exist and it is not a PDU @@ -512,11 +682,21 @@ class NsLcm(LcmBase): continue vdu_id_ref = vdur["vdu-id-ref"] if vdu_create and vdu_create.get(vdu_id_ref): + vdur_copy = deepcopy(vdur) + vdur_copy["status"] = "BUILD" + vdur_copy["status-detailed"] = None + vdur_copy["ip_address"]: None + for iface in vdur_copy["interfaces"]: + iface["ip-address"] = None + iface["mac-address"] = None + iface.pop("mgmt_vnf", None) # only first vdu can be managment of vnf # TODO ALF for index in range(0, vdu_create[vdu_id_ref]): - vdur = deepcopy(vdur) - vdur["_id"] = str(uuid4()) - vdur["count-index"] += 1 - vdurs.insert(vdu_index+1+index, vdur) + vdur_copy["_id"] = str(uuid4()) + vdur_copy["count-index"] += 1 + vdurs.insert(vdu_index+1+index, vdur_copy) + self.logger.debug("scale out, adding vdu={}".format(vdur_copy)) + vdur_copy = deepcopy(vdur_copy) + del vdu_create[vdu_id_ref] if vdu_delete and vdu_delete.get(vdu_id_ref): del vdurs[vdu_index] @@ -557,6 +737,21 @@ class NsLcm(LcmBase): else: raise LcmException("ns_update_nsr: Not found vld={} at RO info".format(vld["id"])) + def set_vnfr_at_error(self, db_vnfrs, error_text): + try: + for db_vnfr in db_vnfrs.values(): + vnfr_update = {"status": "ERROR"} + for vdu_index, vdur in enumerate(get_iterable(db_vnfr, "vdur")): + if "status" not in vdur: + vdur["status"] = "ERROR" + vnfr_update["vdur.{}.status".format(vdu_index)] = "ERROR" + if error_text: + vdur["status-detailed"] = str(error_text) + vnfr_update["vdur.{}.status-detailed".format(vdu_index)] = "ERROR" + self.update_db_2("vnfrs", db_vnfr["_id"], vnfr_update) + except DbException as e: + self.logger.error("Cannot update vnf. {}".format(e)) + def ns_update_vnfr(self, db_vnfrs, nsr_desc_RO): """ Updates database vnfr with the RO info, e.g. ip_address, vim_id... Descriptor db_vnfrs is also updated @@ -572,7 +767,8 @@ class NsLcm(LcmBase): if vnf_RO.get("ip_address"): db_vnfr["ip-address"] = vnfr_update["ip-address"] = vnf_RO["ip_address"].split(";")[0] elif not db_vnfr.get("ip-address"): - raise LcmExceptionNoMgmtIP("ns member_vnf_index '{}' has no IP address".format(vnf_index)) + if db_vnfr.get("vdur"): # if not VDUs, there is not ip_address + raise LcmExceptionNoMgmtIP("ns member_vnf_index '{}' has no IP address".format(vnf_index)) for vdu_index, vdur in enumerate(get_iterable(db_vnfr, "vdur")): vdur_RO_count_index = 0 @@ -652,19 +848,27 @@ class NsLcm(LcmBase): return ns_config_info @staticmethod - def _get_initial_config_primitive_list(desc_primitive_list, vca_deployed): + def _get_initial_config_primitive_list(desc_primitive_list, vca_deployed, ee_descriptor_id): """ Generates a list of initial-config-primitive based on the list provided by the descriptor. It includes internal primitives as verify-ssh-credentials, or config when needed :param desc_primitive_list: information of the descriptor :param vca_deployed: information of the deployed, needed for known if it is related to an NS, VNF, VDU and if this element contains a ssh public key + :param ee_descriptor_id: execution environment descriptor id. It is the value of + XXX_configuration.execution-environment-list.INDEX.id; it can be None :return: The modified list. Can ba an empty list, but always a list """ - if desc_primitive_list: - primitive_list = desc_primitive_list.copy() - else: - primitive_list = [] + + primitive_list = desc_primitive_list or [] + + # filter primitives by ee_id + primitive_list = [p for p in primitive_list if p.get("execution-environment-ref") == ee_descriptor_id] + + # sort by 'seq' + if primitive_list: + primitive_list.sort(key=lambda val: int(val['seq'])) + # look for primitive config, and get the position. None if not present config_position = None for index, primitive in enumerate(primitive_list): @@ -676,194 +880,452 @@ class NsLcm(LcmBase): if not vca_deployed["member-vnf-index"] and config_position is None: primitive_list.insert(0, {"name": "config", "parameter": []}) config_position = 0 - # for VNF/VDU add verify-ssh-credentials after config + # TODO revise if needed: for VNF/VDU add verify-ssh-credentials after config if vca_deployed["member-vnf-index"] and config_position is not None and vca_deployed.get("ssh-public-key"): primitive_list.insert(config_position + 1, {"name": "verify-ssh-credentials", "parameter": []}) return primitive_list - async def instantiate_RO(self, logging_text, nsr_id, nsd, db_nsr, - db_nslcmop, db_vnfrs, db_vnfds_ref, n2vc_key_list): + async def _instantiate_ng_ro(self, logging_text, nsr_id, nsd, db_nsr, db_nslcmop, db_vnfrs, db_vnfds_ref, + n2vc_key_list, stage, start_deploy, timeout_ns_deploy): + nslcmop_id = db_nslcmop["_id"] + target = { + "name": db_nsr["name"], + "ns": {"vld": []}, + "vnf": [], + "image": deepcopy(db_nsr["image"]), + "flavor": deepcopy(db_nsr["flavor"]), + "action_id": nslcmop_id, + } + for image in target["image"]: + image["vim_info"] = [] + for flavor in target["flavor"]: + flavor["vim_info"] = [] + + ns_params = db_nslcmop.get("operationParams") + ssh_keys = [] + if ns_params.get("ssh_keys"): + ssh_keys += ns_params.get("ssh_keys") + if n2vc_key_list: + ssh_keys += n2vc_key_list + + cp2target = {} + for vld_index, vld in enumerate(nsd.get("vld")): + target_vld = {"id": vld["id"], + "name": vld["name"], + "mgmt-network": vld.get("mgmt-network", False), + "type": vld.get("type"), + "vim_info": [{"vim-network-name": vld.get("vim-network-name"), + "vim_account_id": ns_params["vimAccountId"]}], + } + for cp in vld["vnfd-connection-point-ref"]: + cp2target["member_vnf:{}.{}".format(cp["member-vnf-index-ref"], cp["vnfd-connection-point-ref"])] = \ + "nsrs:{}:vld.{}".format(nsr_id, vld_index) + target["ns"]["vld"].append(target_vld) + for vnfr in db_vnfrs.values(): + vnfd = db_vnfds_ref[vnfr["vnfd-ref"]] + target_vnf = deepcopy(vnfr) + for vld in target_vnf.get("vld", ()): + # check if connected to a ns.vld + vnf_cp = next((cp for cp in vnfd.get("connection-point", ()) if + cp.get("internal-vld-ref") == vld["id"]), None) + if vnf_cp: + ns_cp = "member_vnf:{}.{}".format(vnfr["member-vnf-index-ref"], vnf_cp["id"]) + if cp2target.get(ns_cp): + vld["target"] = cp2target[ns_cp] + vld["vim_info"] = [{"vim-network-name": vld.get("vim-network-name"), + "vim_account_id": vnfr["vim-account-id"]}] + + for vdur in target_vnf.get("vdur", ()): + vdur["vim_info"] = [{"vim_account_id": vnfr["vim-account-id"]}] + vdud_index, vdud = next(k for k in enumerate(vnfd["vdu"]) if k[1]["id"] == vdur["vdu-id-ref"]) + # vdur["additionalParams"] = vnfr.get("additionalParamsForVnf") # TODO additional params for VDU + + if ssh_keys: + if deep_get(vdud, ("vdu-configuration", "config-access", "ssh-access", "required")): + vdur["ssh-keys"] = ssh_keys + vdur["ssh-access-required"] = True + elif deep_get(vnfd, ("vnf-configuration", "config-access", "ssh-access", "required")) and \ + any(iface.get("mgmt-vnf") for iface in vdur["interfaces"]): + vdur["ssh-keys"] = ssh_keys + vdur["ssh-access-required"] = True + + # cloud-init + if vdud.get("cloud-init-file"): + vdur["cloud-init"] = "{}:file:{}".format(vnfd["_id"], vdud.get("cloud-init-file")) + elif vdud.get("cloud-init"): + vdur["cloud-init"] = "{}:vdu:{}".format(vnfd["_id"], vdud_index) + + # flavor + ns_flavor = target["flavor"][int(vdur["ns-flavor-id"])] + if not next((vi for vi in ns_flavor["vim_info"] if + vi and vi.get("vim_account_id") == vnfr["vim-account-id"]), None): + ns_flavor["vim_info"].append({"vim_account_id": vnfr["vim-account-id"]}) + # image + ns_image = target["image"][int(vdur["ns-image-id"])] + if not next((vi for vi in ns_image["vim_info"] if + vi and vi.get("vim_account_id") == vnfr["vim-account-id"]), None): + ns_image["vim_info"].append({"vim_account_id": vnfr["vim-account-id"]}) + + vdur["vim_info"] = [{"vim_account_id": vnfr["vim-account-id"]}] + target["vnf"].append(target_vnf) + + desc = await self.RO.deploy(nsr_id, target) + action_id = desc["action_id"] + await self._wait_ng_ro(self, nsr_id, action_id, nslcmop_id, start_deploy, timeout_ns_deploy, stage) + + # Updating NSR + db_nsr_update = { + "_admin.deployed.RO.operational-status": "running", + "detailed-status": " ".join(stage) + } + # db_nsr["_admin.deployed.RO.detailed-status"] = "Deployed at VIM" + self.update_db_2("nsrs", nsr_id, db_nsr_update) + self._write_op_status(nslcmop_id, stage) + self.logger.debug(logging_text + "ns deployed at RO. RO_id={}".format(action_id)) + return + + async def _wait_ng_ro(self, nsr_id, action_id, nslcmop_id, start_time, timeout, stage): + detailed_status_old = None + db_nsr_update = {} + while time() <= start_time + timeout: + desc_status = await self.RO.status(nsr_id, action_id) + if desc_status["status"] == "FAILED": + raise NgRoException(desc_status["details"]) + elif desc_status["status"] == "BUILD": + stage[2] = "VIM: ({})".format(desc_status["details"]) + elif desc_status["status"] == "DONE": + stage[2] = "Deployed at VIM" + break + else: + assert False, "ROclient.check_ns_status returns unknown {}".format(desc_status["status"]) + if stage[2] != detailed_status_old: + detailed_status_old = stage[2] + db_nsr_update["detailed-status"] = " ".join(stage) + self.update_db_2("nsrs", nsr_id, db_nsr_update) + self._write_op_status(nslcmop_id, stage) + await asyncio.sleep(5, loop=self.loop) + else: # timeout_ns_deploy + raise NgRoException("Timeout waiting ns to deploy") + async def _terminate_ng_ro(self, logging_text, nsr_deployed, nsr_id, nslcmop_id, stage): db_nsr_update = {} - RO_descriptor_number = 0 # number of descriptors created at RO - vnf_index_2_RO_id = {} # map between vnfd/nsd id to the id used at RO + failed_detail = [] + action_id = None start_deploy = time() - vdu_flag = False # If any of the VNFDs has VDUs - ns_params = db_nslcmop.get("operationParams") + try: + target = { + "ns": {"vld": []}, + "vnf": [], + "image": [], + "flavor": [], + } + desc = await self.RO.deploy(nsr_id, target) + action_id = desc["action_id"] + db_nsr_update["_admin.deployed.RO.nsr_delete_action_id"] = action_id + db_nsr_update["_admin.deployed.RO.nsr_status"] = "DELETING" + self.logger.debug(logging_text + "ns terminate action at RO. action_id={}".format(action_id)) + + # wait until done + delete_timeout = 20 * 60 # 20 minutes + await self._wait_ng_ro(self, nsr_id, action_id, nslcmop_id, start_deploy, delete_timeout, stage) + + db_nsr_update["_admin.deployed.RO.nsr_delete_action_id"] = None + db_nsr_update["_admin.deployed.RO.nsr_status"] = "DELETED" + # delete all nsr + await self.RO.delete(nsr_id) + except Exception as e: + if isinstance(e, NgRoException) and e.http_code == 404: # not found + db_nsr_update["_admin.deployed.RO.nsr_id"] = None + db_nsr_update["_admin.deployed.RO.nsr_status"] = "DELETED" + db_nsr_update["_admin.deployed.RO.nsr_delete_action_id"] = None + self.logger.debug(logging_text + "RO_action_id={} already deleted".format(action_id)) + elif isinstance(e, NgRoException) and e.http_code == 409: # conflict + failed_detail.append("delete conflict: {}".format(e)) + self.logger.debug(logging_text + "RO_action_id={} delete conflict: {}".format(action_id, e)) + else: + failed_detail.append("delete error: {}".format(e)) + self.logger.error(logging_text + "RO_action_id={} delete error: {}".format(action_id, e)) - # deploy RO - - # get vnfds, instantiate at RO - - for c_vnf in nsd.get("constituent-vnfd", ()): - member_vnf_index = c_vnf["member-vnf-index"] - vnfd = db_vnfds_ref[c_vnf['vnfd-id-ref']] - if vnfd.get("vdu"): - vdu_flag = True - vnfd_ref = vnfd["id"] - step = db_nsr_update["_admin.deployed.RO.detailed-status"] = "Creating vnfd='{}' member_vnf_index='{}' at" \ - " RO".format(vnfd_ref, member_vnf_index) - # self.logger.debug(logging_text + step) - vnfd_id_RO = "{}.{}.{}".format(nsr_id, RO_descriptor_number, member_vnf_index[:23]) - vnf_index_2_RO_id[member_vnf_index] = vnfd_id_RO - RO_descriptor_number += 1 + if failed_detail: + stage[2] = "Error deleting from VIM" + else: + stage[2] = "Deleted from VIM" + db_nsr_update["detailed-status"] = " ".join(stage) + self.update_db_2("nsrs", nsr_id, db_nsr_update) + self._write_op_status(nslcmop_id, stage) - # look position at deployed.RO.vnfd if not present it will be appended at the end - for index, vnf_deployed in enumerate(db_nsr["_admin"]["deployed"]["RO"]["vnfd"]): - if vnf_deployed["member-vnf-index"] == member_vnf_index: - break + if failed_detail: + raise LcmException("; ".join(failed_detail)) + return + + async def instantiate_RO(self, logging_text, nsr_id, nsd, db_nsr, db_nslcmop, db_vnfrs, db_vnfds_ref, + n2vc_key_list, stage): + """ + Instantiate at RO + :param logging_text: preffix text to use at logging + :param nsr_id: nsr identity + :param nsd: database content of ns descriptor + :param db_nsr: database content of ns record + :param db_nslcmop: database content of ns operation, in this case, 'instantiate' + :param db_vnfrs: + :param db_vnfds_ref: database content of vnfds, indexed by id (not _id). {id: {vnfd_object}, ...} + :param n2vc_key_list: ssh-public-key list to be inserted to management vdus via cloud-init + :param stage: list with 3 items: [general stage, tasks, vim_specific]. This task will write over vim_specific + :return: None or exception + """ + try: + db_nsr_update = {} + RO_descriptor_number = 0 # number of descriptors created at RO + vnf_index_2_RO_id = {} # map between vnfd/nsd id to the id used at RO + nslcmop_id = db_nslcmop["_id"] + start_deploy = time() + ns_params = db_nslcmop.get("operationParams") + if ns_params and ns_params.get("timeout_ns_deploy"): + timeout_ns_deploy = ns_params["timeout_ns_deploy"] else: - index = len(db_nsr["_admin"]["deployed"]["RO"]["vnfd"]) - db_nsr["_admin"]["deployed"]["RO"]["vnfd"].append(None) - - # look if present - RO_update = {"member-vnf-index": member_vnf_index} - vnfd_list = await self.RO.get_list("vnfd", filter_by={"osm_id": vnfd_id_RO}) - if vnfd_list: - RO_update["id"] = vnfd_list[0]["uuid"] - self.logger.debug(logging_text + "vnfd='{}' member_vnf_index='{}' exists at RO. Using RO_id={}". - format(vnfd_ref, member_vnf_index, vnfd_list[0]["uuid"])) + timeout_ns_deploy = self.timeout.get("ns_deploy", self.timeout_ns_deploy) + + # Check for and optionally request placement optimization. Database will be updated if placement activated + stage[2] = "Waiting for Placement." + if await self._do_placement(logging_text, db_nslcmop, db_vnfrs): + # in case of placement change ns_params[vimAcountId) if not present at any vnfrs + for vnfr in db_vnfrs.values(): + if ns_params["vimAccountId"] == vnfr["vim-account-id"]: + break + else: + ns_params["vimAccountId"] == vnfr["vim-account-id"] + + if self.ng_ro: + return await self._instantiate_ng_ro(logging_text, nsr_id, nsd, db_nsr, db_nslcmop, db_vnfrs, + db_vnfds_ref, n2vc_key_list, stage, start_deploy, + timeout_ns_deploy) + # deploy RO + # get vnfds, instantiate at RO + for c_vnf in nsd.get("constituent-vnfd", ()): + member_vnf_index = c_vnf["member-vnf-index"] + vnfd = db_vnfds_ref[c_vnf['vnfd-id-ref']] + vnfd_ref = vnfd["id"] + + stage[2] = "Creating vnfd='{}' member_vnf_index='{}' at RO".format(vnfd_ref, member_vnf_index) + db_nsr_update["detailed-status"] = " ".join(stage) + self.update_db_2("nsrs", nsr_id, db_nsr_update) + self._write_op_status(nslcmop_id, stage) + + # self.logger.debug(logging_text + stage[2]) + vnfd_id_RO = "{}.{}.{}".format(nsr_id, RO_descriptor_number, member_vnf_index[:23]) + vnf_index_2_RO_id[member_vnf_index] = vnfd_id_RO + RO_descriptor_number += 1 + + # look position at deployed.RO.vnfd if not present it will be appended at the end + for index, vnf_deployed in enumerate(db_nsr["_admin"]["deployed"]["RO"]["vnfd"]): + if vnf_deployed["member-vnf-index"] == member_vnf_index: + break + else: + index = len(db_nsr["_admin"]["deployed"]["RO"]["vnfd"]) + db_nsr["_admin"]["deployed"]["RO"]["vnfd"].append(None) + + # look if present + RO_update = {"member-vnf-index": member_vnf_index} + vnfd_list = await self.RO.get_list("vnfd", filter_by={"osm_id": vnfd_id_RO}) + if vnfd_list: + RO_update["id"] = vnfd_list[0]["uuid"] + self.logger.debug(logging_text + "vnfd='{}' member_vnf_index='{}' exists at RO. Using RO_id={}". + format(vnfd_ref, member_vnf_index, vnfd_list[0]["uuid"])) + else: + vnfd_RO = self.vnfd2RO(vnfd, vnfd_id_RO, db_vnfrs[c_vnf["member-vnf-index"]]. + get("additionalParamsForVnf"), nsr_id) + desc = await self.RO.create("vnfd", descriptor=vnfd_RO) + RO_update["id"] = desc["uuid"] + self.logger.debug(logging_text + "vnfd='{}' member_vnf_index='{}' created at RO. RO_id={}".format( + vnfd_ref, member_vnf_index, desc["uuid"])) + db_nsr_update["_admin.deployed.RO.vnfd.{}".format(index)] = RO_update + db_nsr["_admin"]["deployed"]["RO"]["vnfd"][index] = RO_update + + # create nsd at RO + nsd_ref = nsd["id"] + + stage[2] = "Creating nsd={} at RO".format(nsd_ref) + db_nsr_update["detailed-status"] = " ".join(stage) + self.update_db_2("nsrs", nsr_id, db_nsr_update) + self._write_op_status(nslcmop_id, stage) + + # self.logger.debug(logging_text + stage[2]) + RO_osm_nsd_id = "{}.{}.{}".format(nsr_id, RO_descriptor_number, nsd_ref[:23]) + RO_descriptor_number += 1 + nsd_list = await self.RO.get_list("nsd", filter_by={"osm_id": RO_osm_nsd_id}) + if nsd_list: + db_nsr_update["_admin.deployed.RO.nsd_id"] = RO_nsd_uuid = nsd_list[0]["uuid"] + self.logger.debug(logging_text + "nsd={} exists at RO. Using RO_id={}".format( + nsd_ref, RO_nsd_uuid)) else: - vnfd_RO = self.vnfd2RO(vnfd, vnfd_id_RO, db_vnfrs[c_vnf["member-vnf-index"]]. - get("additionalParamsForVnf"), nsr_id) - desc = await self.RO.create("vnfd", descriptor=vnfd_RO) - RO_update["id"] = desc["uuid"] - self.logger.debug(logging_text + "vnfd='{}' member_vnf_index='{}' created at RO. RO_id={}".format( - vnfd_ref, member_vnf_index, desc["uuid"])) - db_nsr_update["_admin.deployed.RO.vnfd.{}".format(index)] = RO_update - db_nsr["_admin"]["deployed"]["RO"]["vnfd"][index] = RO_update + nsd_RO = deepcopy(nsd) + nsd_RO["id"] = RO_osm_nsd_id + nsd_RO.pop("_id", None) + nsd_RO.pop("_admin", None) + for c_vnf in nsd_RO.get("constituent-vnfd", ()): + member_vnf_index = c_vnf["member-vnf-index"] + c_vnf["vnfd-id-ref"] = vnf_index_2_RO_id[member_vnf_index] + for c_vld in nsd_RO.get("vld", ()): + for cp in c_vld.get("vnfd-connection-point-ref", ()): + member_vnf_index = cp["member-vnf-index-ref"] + cp["vnfd-id-ref"] = vnf_index_2_RO_id[member_vnf_index] + + desc = await self.RO.create("nsd", descriptor=nsd_RO) + db_nsr_update["_admin.nsState"] = "INSTANTIATED" + db_nsr_update["_admin.deployed.RO.nsd_id"] = RO_nsd_uuid = desc["uuid"] + self.logger.debug(logging_text + "nsd={} created at RO. RO_id={}".format(nsd_ref, RO_nsd_uuid)) self.update_db_2("nsrs", nsr_id, db_nsr_update) - self._on_update_n2vc_db("nsrs", {"_id": nsr_id}, "_admin.deployed", db_nsr_update) - - # create nsd at RO - nsd_ref = nsd["id"] - step = db_nsr_update["_admin.deployed.RO.detailed-status"] = "Creating nsd={} at RO".format(nsd_ref) - # self.logger.debug(logging_text + step) - - RO_osm_nsd_id = "{}.{}.{}".format(nsr_id, RO_descriptor_number, nsd_ref[:23]) - RO_descriptor_number += 1 - nsd_list = await self.RO.get_list("nsd", filter_by={"osm_id": RO_osm_nsd_id}) - if nsd_list: - db_nsr_update["_admin.deployed.RO.nsd_id"] = RO_nsd_uuid = nsd_list[0]["uuid"] - self.logger.debug(logging_text + "nsd={} exists at RO. Using RO_id={}".format( - nsd_ref, RO_nsd_uuid)) - else: - nsd_RO = deepcopy(nsd) - nsd_RO["id"] = RO_osm_nsd_id - nsd_RO.pop("_id", None) - nsd_RO.pop("_admin", None) - for c_vnf in nsd_RO.get("constituent-vnfd", ()): - member_vnf_index = c_vnf["member-vnf-index"] - c_vnf["vnfd-id-ref"] = vnf_index_2_RO_id[member_vnf_index] - for c_vld in nsd_RO.get("vld", ()): - for cp in c_vld.get("vnfd-connection-point-ref", ()): - member_vnf_index = cp["member-vnf-index-ref"] - cp["vnfd-id-ref"] = vnf_index_2_RO_id[member_vnf_index] - desc = await self.RO.create("nsd", descriptor=nsd_RO) - db_nsr_update["_admin.nsState"] = "INSTANTIATED" - db_nsr_update["_admin.deployed.RO.nsd_id"] = RO_nsd_uuid = desc["uuid"] - self.logger.debug(logging_text + "nsd={} created at RO. RO_id={}".format(nsd_ref, RO_nsd_uuid)) - self.update_db_2("nsrs", nsr_id, db_nsr_update) - self._on_update_n2vc_db("nsrs", {"_id": nsr_id}, "_admin.deployed", db_nsr_update) + # Crate ns at RO + stage[2] = "Creating nsd={} at RO".format(nsd_ref) + db_nsr_update["detailed-status"] = " ".join(stage) + self.update_db_2("nsrs", nsr_id, db_nsr_update) + self._write_op_status(nslcmop_id, stage) - # Crate ns at RO - # if present use it unless in error status - RO_nsr_id = deep_get(db_nsr, ("_admin", "deployed", "RO", "nsr_id")) - if RO_nsr_id: - try: - step = db_nsr_update["_admin.deployed.RO.detailed-status"] = "Looking for existing ns at RO" - # self.logger.debug(logging_text + step + " RO_ns_id={}".format(RO_nsr_id)) - desc = await self.RO.show("ns", RO_nsr_id) - except ROclient.ROClientException as e: - if e.http_code != HTTPStatus.NOT_FOUND: - raise - RO_nsr_id = db_nsr_update["_admin.deployed.RO.nsr_id"] = None + # if present use it unless in error status + RO_nsr_id = deep_get(db_nsr, ("_admin", "deployed", "RO", "nsr_id")) if RO_nsr_id: - ns_status, ns_status_info = self.RO.check_ns_status(desc) - db_nsr_update["_admin.deployed.RO.nsr_status"] = ns_status - if ns_status == "ERROR": - step = db_nsr_update["_admin.deployed.RO.detailed-status"] = "Deleting ns at RO. RO_ns_id={}"\ - .format(RO_nsr_id) - self.logger.debug(logging_text + step) - await self.RO.delete("ns", RO_nsr_id) + try: + stage[2] = "Looking for existing ns at RO" + db_nsr_update["detailed-status"] = " ".join(stage) + self.update_db_2("nsrs", nsr_id, db_nsr_update) + self._write_op_status(nslcmop_id, stage) + # self.logger.debug(logging_text + stage[2] + " RO_ns_id={}".format(RO_nsr_id)) + desc = await self.RO.show("ns", RO_nsr_id) + + except ROclient.ROClientException as e: + if e.http_code != HTTPStatus.NOT_FOUND: + raise RO_nsr_id = db_nsr_update["_admin.deployed.RO.nsr_id"] = None - if not RO_nsr_id: - step = db_nsr_update["_admin.deployed.RO.detailed-status"] = "Checking dependencies" - # self.logger.debug(logging_text + step) + if RO_nsr_id: + ns_status, ns_status_info = self.RO.check_ns_status(desc) + db_nsr_update["_admin.deployed.RO.nsr_status"] = ns_status + if ns_status == "ERROR": + stage[2] = "Deleting ns at RO. RO_ns_id={}".format(RO_nsr_id) + self.logger.debug(logging_text + stage[2]) + await self.RO.delete("ns", RO_nsr_id) + RO_nsr_id = db_nsr_update["_admin.deployed.RO.nsr_id"] = None + if not RO_nsr_id: + stage[2] = "Checking dependencies" + db_nsr_update["detailed-status"] = " ".join(stage) + self.update_db_2("nsrs", nsr_id, db_nsr_update) + self._write_op_status(nslcmop_id, stage) + # self.logger.debug(logging_text + stage[2]) + + # check if VIM is creating and wait look if previous tasks in process + task_name, task_dependency = self.lcm_tasks.lookfor_related("vim_account", ns_params["vimAccountId"]) + if task_dependency: + stage[2] = "Waiting for related tasks '{}' to be completed".format(task_name) + self.logger.debug(logging_text + stage[2]) + await asyncio.wait(task_dependency, timeout=3600) + if ns_params.get("vnf"): + for vnf in ns_params["vnf"]: + if "vimAccountId" in vnf: + task_name, task_dependency = self.lcm_tasks.lookfor_related("vim_account", + vnf["vimAccountId"]) + if task_dependency: + stage[2] = "Waiting for related tasks '{}' to be completed.".format(task_name) + self.logger.debug(logging_text + stage[2]) + await asyncio.wait(task_dependency, timeout=3600) + + stage[2] = "Checking instantiation parameters." + RO_ns_params = self._ns_params_2_RO(ns_params, nsd, db_vnfds_ref, db_vnfrs, n2vc_key_list) + stage[2] = "Deploying ns at VIM." + db_nsr_update["detailed-status"] = " ".join(stage) + self.update_db_2("nsrs", nsr_id, db_nsr_update) + self._write_op_status(nslcmop_id, stage) - # check if VIM is creating and wait look if previous tasks in process - task_name, task_dependency = self.lcm_tasks.lookfor_related("vim_account", ns_params["vimAccountId"]) - if task_dependency: - step = "Waiting for related tasks to be completed: {}".format(task_name) - self.logger.debug(logging_text + step) - await asyncio.wait(task_dependency, timeout=3600) - if ns_params.get("vnf"): - for vnf in ns_params["vnf"]: - if "vimAccountId" in vnf: - task_name, task_dependency = self.lcm_tasks.lookfor_related("vim_account", - vnf["vimAccountId"]) - if task_dependency: - step = "Waiting for related tasks to be completed: {}".format(task_name) - self.logger.debug(logging_text + step) - await asyncio.wait(task_dependency, timeout=3600) - - step = db_nsr_update["_admin.deployed.RO.detailed-status"] = "Checking instantiation parameters" - - RO_ns_params = self.ns_params_2_RO(ns_params, nsd, db_vnfds_ref, n2vc_key_list) - - step = db_nsr_update["detailed-status"] = "Deploying ns at VIM" - # step = db_nsr_update["_admin.deployed.RO.detailed-status"] = "Deploying ns at VIM" - desc = await self.RO.create("ns", descriptor=RO_ns_params, name=db_nsr["name"], scenario=RO_nsd_uuid) - RO_nsr_id = db_nsr_update["_admin.deployed.RO.nsr_id"] = desc["uuid"] - db_nsr_update["_admin.nsState"] = "INSTANTIATED" - db_nsr_update["_admin.deployed.RO.nsr_status"] = "BUILD" - self.logger.debug(logging_text + "ns created at RO. RO_id={}".format(desc["uuid"])) - self.update_db_2("nsrs", nsr_id, db_nsr_update) - self._on_update_n2vc_db("nsrs", {"_id": nsr_id}, "_admin.deployed", db_nsr_update) + desc = await self.RO.create("ns", descriptor=RO_ns_params, name=db_nsr["name"], scenario=RO_nsd_uuid) + RO_nsr_id = db_nsr_update["_admin.deployed.RO.nsr_id"] = desc["uuid"] + db_nsr_update["_admin.nsState"] = "INSTANTIATED" + db_nsr_update["_admin.deployed.RO.nsr_status"] = "BUILD" + self.logger.debug(logging_text + "ns created at RO. RO_id={}".format(desc["uuid"])) - # wait until NS is ready - step = ns_status_detailed = detailed_status = "Waiting VIM to deploy ns. RO_ns_id={}".format(RO_nsr_id) - detailed_status_old = None - self.logger.debug(logging_text + step) - - while time() <= start_deploy + self.total_deploy_timeout: - desc = await self.RO.show("ns", RO_nsr_id) - ns_status, ns_status_info = self.RO.check_ns_status(desc) - db_nsr_update["_admin.deployed.RO.nsr_status"] = ns_status - if ns_status == "ERROR": - raise ROclient.ROClientException(ns_status_info) - elif ns_status == "BUILD": - detailed_status = ns_status_detailed + "; {}".format(ns_status_info) - elif ns_status == "ACTIVE": - step = detailed_status = "Waiting for management IP address reported by the VIM. Updating VNFRs" - try: - if vdu_flag: - self.ns_update_vnfr(db_vnfrs, desc) - break - except LcmExceptionNoMgmtIP: - pass - else: - assert False, "ROclient.check_ns_status returns unknown {}".format(ns_status) - if detailed_status != detailed_status_old: - detailed_status_old = db_nsr_update["_admin.deployed.RO.detailed-status"] = detailed_status - self.update_db_2("nsrs", nsr_id, db_nsr_update) - self._on_update_n2vc_db("nsrs", {"_id": nsr_id}, "_admin.deployed", db_nsr_update) - await asyncio.sleep(5, loop=self.loop) - else: # total_deploy_timeout - raise ROclient.ROClientException("Timeout waiting ns to be ready") + # wait until NS is ready + stage[2] = "Waiting VIM to deploy ns." + db_nsr_update["detailed-status"] = " ".join(stage) + self.update_db_2("nsrs", nsr_id, db_nsr_update) + self._write_op_status(nslcmop_id, stage) + detailed_status_old = None + self.logger.debug(logging_text + stage[2] + " RO_ns_id={}".format(RO_nsr_id)) - step = "Updating NSR" - self.ns_update_nsr(db_nsr_update, db_nsr, desc) + old_desc = None + while time() <= start_deploy + timeout_ns_deploy: + desc = await self.RO.show("ns", RO_nsr_id) - db_nsr_update["_admin.deployed.RO.operational-status"] = "running" - db_nsr["_admin.deployed.RO.detailed-status"] = "Deployed at VIM" - db_nsr_update["_admin.deployed.RO.detailed-status"] = "Deployed at VIM" - self.update_db_2("nsrs", nsr_id, db_nsr_update) - self._on_update_n2vc_db("nsrs", {"_id": nsr_id}, "_admin.deployed", db_nsr_update) + # deploymentStatus + if desc != old_desc: + # desc has changed => update db + self._on_update_ro_db(nsrs_id=nsr_id, ro_descriptor=desc) + old_desc = desc + + ns_status, ns_status_info = self.RO.check_ns_status(desc) + db_nsr_update["_admin.deployed.RO.nsr_status"] = ns_status + if ns_status == "ERROR": + raise ROclient.ROClientException(ns_status_info) + elif ns_status == "BUILD": + stage[2] = "VIM: ({})".format(ns_status_info) + elif ns_status == "ACTIVE": + stage[2] = "Waiting for management IP address reported by the VIM. Updating VNFRs." + try: + self.ns_update_vnfr(db_vnfrs, desc) + break + except LcmExceptionNoMgmtIP: + pass + else: + assert False, "ROclient.check_ns_status returns unknown {}".format(ns_status) + if stage[2] != detailed_status_old: + detailed_status_old = stage[2] + db_nsr_update["detailed-status"] = " ".join(stage) + self.update_db_2("nsrs", nsr_id, db_nsr_update) + self._write_op_status(nslcmop_id, stage) + await asyncio.sleep(5, loop=self.loop) + else: # timeout_ns_deploy + raise ROclient.ROClientException("Timeout waiting ns to be ready") + + # Updating NSR + self.ns_update_nsr(db_nsr_update, db_nsr, desc) + + db_nsr_update["_admin.deployed.RO.operational-status"] = "running" + # db_nsr["_admin.deployed.RO.detailed-status"] = "Deployed at VIM" + stage[2] = "Deployed at VIM" + db_nsr_update["detailed-status"] = " ".join(stage) + self.update_db_2("nsrs", nsr_id, db_nsr_update) + self._write_op_status(nslcmop_id, stage) + # await self._on_update_n2vc_db("nsrs", {"_id": nsr_id}, "_admin.deployed", db_nsr_update) + # self.logger.debug(logging_text + "Deployed at VIM") + except (ROclient.ROClientException, LcmException, DbException, NgRoException) as e: + stage[2] = "ERROR deploying at VIM" + self.set_vnfr_at_error(db_vnfrs, str(e)) + raise + + async def wait_kdu_up(self, logging_text, nsr_id, vnfr_id, kdu_name): + """ + Wait for kdu to be up, get ip address + :param logging_text: prefix use for logging + :param nsr_id: + :param vnfr_id: + :param kdu_name: + :return: IP address + """ + + # self.logger.debug(logging_text + "Starting wait_kdu_up") + nb_tries = 0 + + while nb_tries < 360: + db_vnfr = self.db.get_one("vnfrs", {"_id": vnfr_id}) + kdur = next((x for x in get_iterable(db_vnfr, "kdur") if x.get("kdu-name") == kdu_name), None) + if not kdur: + raise LcmException("Not found vnfr_id={}, kdu_name={}".format(vnfr_id, kdu_name)) + if kdur.get("status"): + if kdur["status"] in ("READY", "ENABLED"): + return kdur.get("ip-address") + else: + raise LcmException("target KDU={} is in error state".format(kdu_name)) - step = "Deployed at VIM" - self.logger.debug(logging_text + step) + await asyncio.sleep(10, loop=self.loop) + nb_tries += 1 + raise LcmException("Timeout waiting KDU={} instantiated".format(kdu_name)) async def wait_vm_up_insert_key_ro(self, logging_text, nsr_id, vnfr_id, vdu_id, vdu_index, pub_key=None, user=None): """ @@ -892,18 +1354,14 @@ class NsLcm(LcmBase): raise LcmException("Not found _admin.deployed.RO.nsr_id for nsr_id: {}".format(nsr_id)) await asyncio.sleep(10, loop=self.loop) - # wait until NS is deployed at RO - if not ro_nsr_id: - db_nsrs = self.db.get_one("nsrs", {"_id": nsr_id}) - ro_nsr_id = deep_get(db_nsrs, ("_admin", "deployed", "RO", "nsr_id")) - if not ro_nsr_id: - continue # get ip address if not target_vdu_id: db_vnfr = self.db.get_one("vnfrs", {"_id": vnfr_id}) if not vdu_id: # for the VNF case + if db_vnfr.get("status") == "ERROR": + raise LcmException("Cannot inject ssh-key because target VNF is in error state") ip_address = db_vnfr.get("ip-address") if not ip_address: continue @@ -912,12 +1370,13 @@ class NsLcm(LcmBase): vdur = next((x for x in get_iterable(db_vnfr, "vdur") if x.get("vdu-id-ref") == vdu_id and x.get("count-index") == vdu_index), None) + if not vdur and len(db_vnfr.get("vdur", ())) == 1: # If only one, this should be the target vdu + vdur = db_vnfr["vdur"][0] if not vdur: - raise LcmException("Not found vnfr_id={}, vdu_index={}, vdu_index={}".format( - vnfr_id, vdu_id, vdu_index - )) + raise LcmException("Not found vnfr_id={}, vdu_id={}, vdu_index={}".format(vnfr_id, vdu_id, + vdu_index)) - if vdur.get("status") == "ACTIVE": + if vdur.get("pdu-type") or vdur.get("status") == "ACTIVE": ip_address = vdur.get("ip-address") if not ip_address: continue @@ -928,28 +1387,44 @@ class NsLcm(LcmBase): if not target_vdu_id: continue - # self.logger.debug(logging_text + "IP address={}".format(ip_address)) - # inject public key into machine if pub_key and user: + # wait until NS is deployed at RO + if not ro_nsr_id: + db_nsrs = self.db.get_one("nsrs", {"_id": nsr_id}) + ro_nsr_id = deep_get(db_nsrs, ("_admin", "deployed", "RO", "nsr_id")) + if not ro_nsr_id: + continue + # self.logger.debug(logging_text + "Inserting RO key") + if vdur.get("pdu-type"): + self.logger.error(logging_text + "Cannot inject ssh-ky to a PDU") + return ip_address try: ro_vm_id = "{}-{}".format(db_vnfr["member-vnf-index-ref"], target_vdu_id) # TODO add vdu_index - result_dict = await self.RO.create_action( - item="ns", - item_id_name=ro_nsr_id, - descriptor={"add_public_key": pub_key, "vms": [ro_vm_id], "user": user} - ) - # result_dict contains the format {VM-id: {vim_result: 200, description: text}} - if not result_dict or not isinstance(result_dict, dict): - raise LcmException("Unknown response from RO when injecting key") - for result in result_dict.values(): - if result.get("vim_result") == 200: - break - else: - raise ROclient.ROClientException("error injecting key: {}".format( - result.get("description"))) - break + if self.ng_ro: + target = {"action": "inject_ssh_key", "key": pub_key, "user": user, + "vnf": [{"_id": vnfr_id, "vdur": [{"id": vdu_id}]}], + } + await self.RO.deploy(nsr_id, target) + else: + result_dict = await self.RO.create_action( + item="ns", + item_id_name=ro_nsr_id, + descriptor={"add_public_key": pub_key, "vms": [ro_vm_id], "user": user} + ) + # result_dict contains the format {VM-id: {vim_result: 200, description: text}} + if not result_dict or not isinstance(result_dict, dict): + raise LcmException("Unknown response from RO when injecting key") + for result in result_dict.values(): + if result.get("vim_result") == 200: + break + else: + raise ROclient.ROClientException("error injecting key: {}".format( + result.get("description"))) + break + except NgRoException as e: + raise LcmException("Reaching max tries injecting key. Error: {}".format(e)) except ROclient.ROClientException as e: if not nb_tries: self.logger.debug(logging_text + "error injecting key: {}. Retrying until {} seconds". @@ -968,33 +1443,42 @@ class NsLcm(LcmBase): """ my_vca = vca_deployed_list[vca_index] if my_vca.get("vdu_id") or my_vca.get("kdu_name"): + # vdu or kdu: no dependencies return timeout = 300 while timeout >= 0: - for index, vca_deployed in enumerate(vca_deployed_list): + db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) + vca_deployed_list = db_nsr["_admin"]["deployed"]["VCA"] + configuration_status_list = db_nsr["configurationStatus"] + for index, vca_deployed in enumerate(configuration_status_list): if index == vca_index: + # myself continue if not my_vca.get("member-vnf-index") or \ (vca_deployed.get("member-vnf-index") == my_vca.get("member-vnf-index")): - if not vca_deployed.get("instantiation"): - break # wait - if vca_deployed["instantiation"] == "FAILED": + internal_status = configuration_status_list[index].get("status") + if internal_status == 'READY': + continue + elif internal_status == 'BROKEN': raise LcmException("Configuration aborted because dependent charm/s has failed") + else: + break else: + # no dependencies, return return await asyncio.sleep(10) timeout -= 1 - db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) - vca_deployed_list = db_nsr["_admin"]["deployed"]["VCA"] raise LcmException("Configuration aborted because dependent charm/s timeout") - async def instantiate_N2VC(self, logging_text, vca_index, nsi_id, db_nsr, db_vnfr, vdu_id, - kdu_name, vdu_index, config_descriptor, deploy_params, base_folder): + async def instantiate_N2VC(self, logging_text, vca_index, nsi_id, db_nsr, db_vnfr, vdu_id, kdu_name, vdu_index, + config_descriptor, deploy_params, base_folder, nslcmop_id, stage, vca_type, vca_name, + ee_config_descriptor): nsr_id = db_nsr["_id"] db_update_entry = "_admin.deployed.VCA.{}.".format(vca_index) vca_deployed_list = db_nsr["_admin"]["deployed"]["VCA"] vca_deployed = db_nsr["_admin"]["deployed"]["VCA"][vca_index] + osm_config = {"osm": {"ns_id": db_nsr["_id"]}} db_dict = { 'collection': 'nsrs', 'filter': {'_id': nsr_id}, @@ -1002,42 +1486,75 @@ class NsLcm(LcmBase): } step = "" try: + + element_type = 'NS' + element_under_configuration = nsr_id + vnfr_id = None if db_vnfr: vnfr_id = db_vnfr["_id"] + osm_config["osm"]["vnf_id"] = vnfr_id namespace = "{nsi}.{ns}".format( nsi=nsi_id if nsi_id else "", ns=nsr_id) + if vnfr_id: - namespace += "." + vnfr_id + element_type = 'VNF' + element_under_configuration = vnfr_id + namespace += ".{}".format(vnfr_id) if vdu_id: namespace += ".{}-{}".format(vdu_id, vdu_index or 0) + element_type = 'VDU' + element_under_configuration = "{}-{}".format(vdu_id, vdu_index or 0) + osm_config["osm"]["vdu_id"] = vdu_id + elif kdu_name: + namespace += ".{}".format(kdu_name) + element_type = 'KDU' + element_under_configuration = kdu_name + osm_config["osm"]["kdu_name"] = kdu_name # Get artifact path - artifact_path = "{}/{}/charms/{}".format( + artifact_path = "{}/{}/{}/{}".format( base_folder["folder"], base_folder["pkg-dir"], - config_descriptor["juju"]["charm"] + "charms" if vca_type in ("native_charm", "lxc_proxy_charm", "k8s_proxy_charm") else "helm-charts", + vca_name ) + # get initial_config_primitive_list that applies to this element + initial_config_primitive_list = config_descriptor.get('initial-config-primitive') - is_proxy_charm = deep_get(config_descriptor, ('juju', 'charm')) is not None - if deep_get(config_descriptor, ('juju', 'proxy')) is False: - is_proxy_charm = False + # add config if not present for NS charm + ee_descriptor_id = ee_config_descriptor.get("id") + initial_config_primitive_list = self._get_initial_config_primitive_list(initial_config_primitive_list, + vca_deployed, ee_descriptor_id) # n2vc_redesign STEP 3.1 - # find old ee_id if exists ee_id = vca_deployed.get("ee_id") # create or register execution environment in VCA - if is_proxy_charm: + if vca_type in ("lxc_proxy_charm", "k8s_proxy_charm", "helm"): + + self._write_configuration_status( + nsr_id=nsr_id, + vca_index=vca_index, + status='CREATING', + element_under_configuration=element_under_configuration, + element_type=element_type + ) + step = "create execution environment" self.logger.debug(logging_text + step) - ee_id, credentials = await self.n2vc.create_execution_environment(namespace=namespace, - reuse_ee_id=ee_id, - db_dict=db_dict) - else: + ee_id, credentials = await self.vca_map[vca_type].create_execution_environment( + namespace=namespace, + reuse_ee_id=ee_id, + db_dict=db_dict, + config=osm_config, + artifact_path=artifact_path, + vca_type=vca_type) + + elif vca_type == "native_charm": step = "Waiting to VM being up and getting IP address" self.logger.debug(logging_text + step) rw_mgmt_ip = await self.wait_vm_up_insert_key_ro(logging_text, nsr_id, vnfr_id, vdu_id, vdu_index, @@ -1047,59 +1564,119 @@ class NsLcm(LcmBase): username = deep_get(config_descriptor, ("config-access", "ssh-access", "default-user")) # TODO remove this when changes on IM regarding config-access:ssh-access:default-user were # merged. Meanwhile let's get username from initial-config-primitive - if not username and config_descriptor.get("initial-config-primitive"): - for config_primitive in config_descriptor["initial-config-primitive"]: + if not username and initial_config_primitive_list: + for config_primitive in initial_config_primitive_list: for param in config_primitive.get("parameter", ()): if param["name"] == "ssh-username": username = param["value"] break if not username: - raise LcmException("Cannot determine the username neither with 'initial-config-promitive' nor with " + raise LcmException("Cannot determine the username neither with 'initial-config-primitive' nor with " "'config-access.ssh-access.default-user'") credentials["username"] = username # n2vc_redesign STEP 3.2 + self._write_configuration_status( + nsr_id=nsr_id, + vca_index=vca_index, + status='REGISTERING', + element_under_configuration=element_under_configuration, + element_type=element_type + ) + step = "register execution environment {}".format(credentials) self.logger.debug(logging_text + step) - ee_id = await self.n2vc.register_execution_environment(credentials=credentials, namespace=namespace, - db_dict=db_dict) + ee_id = await self.vca_map[vca_type].register_execution_environment( + credentials=credentials, namespace=namespace, db_dict=db_dict) # for compatibility with MON/POL modules, the need model and application name at database - # TODO ask to N2VC instead of assuming the format "model_name.application_name" + # TODO ask MON/POL if needed to not assuming anymore the format "model_name.application_name" ee_id_parts = ee_id.split('.') - model_name = ee_id_parts[0] - application_name = ee_id_parts[1] - self.update_db_2("nsrs", nsr_id, {db_update_entry + "model": model_name, - db_update_entry + "application": application_name, - db_update_entry + "ee_id": ee_id}) + db_nsr_update = {db_update_entry + "ee_id": ee_id} + if len(ee_id_parts) >= 2: + model_name = ee_id_parts[0] + application_name = ee_id_parts[1] + db_nsr_update[db_update_entry + "model"] = model_name + db_nsr_update[db_update_entry + "application"] = application_name # n2vc_redesign STEP 3.3 - step = "Install configuration Software" + + self._write_configuration_status( + nsr_id=nsr_id, + vca_index=vca_index, + status='INSTALLING SW', + element_under_configuration=element_under_configuration, + element_type=element_type, + other_update=db_nsr_update + ) + # TODO check if already done self.logger.debug(logging_text + step) - await self.n2vc.install_configuration_sw(ee_id=ee_id, artifact_path=artifact_path, db_dict=db_dict) + config = None + if vca_type == "native_charm": + config_primitive = next((p for p in initial_config_primitive_list if p["name"] == "config"), None) + if config_primitive: + config = self._map_primitive_params( + config_primitive, + {}, + deploy_params + ) + num_units = 1 + if vca_type == "lxc_proxy_charm": + if element_type == "NS": + num_units = db_nsr.get("config-units") or 1 + elif element_type == "VNF": + num_units = db_vnfr.get("config-units") or 1 + elif element_type == "VDU": + for v in db_vnfr["vdur"]: + if vdu_id == v["vdu-id-ref"]: + num_units = v.get("config-units") or 1 + break + + await self.vca_map[vca_type].install_configuration_sw( + ee_id=ee_id, + artifact_path=artifact_path, + db_dict=db_dict, + config=config, + num_units=num_units, + vca_type=vca_type + ) + + # write in db flag of configuration_sw already installed + self.update_db_2("nsrs", nsr_id, {db_update_entry + "config_sw_installed": True}) + + # add relations for this VCA (wait for other peers related with this VCA) + await self._add_vca_relations(logging_text=logging_text, nsr_id=nsr_id, + vca_index=vca_index, vca_type=vca_type) # if SSH access is required, then get execution environment SSH public - if is_proxy_charm: # if native charm we have waited already to VM be UP + # if native charm we have waited already to VM be UP + if vca_type in ("k8s_proxy_charm", "lxc_proxy_charm", "helm"): pub_key = None user = None + # self.logger.debug("get ssh key block") if deep_get(config_descriptor, ("config-access", "ssh-access", "required")): + # self.logger.debug("ssh key needed") # Needed to inject a ssh key user = deep_get(config_descriptor, ("config-access", "ssh-access", "default-user")) step = "Install configuration Software, getting public ssh key" - pub_key = await self.n2vc.get_ee_ssh_public__key(ee_id=ee_id, db_dict=db_dict) + pub_key = await self.vca_map[vca_type].get_ee_ssh_public__key(ee_id=ee_id, db_dict=db_dict) step = "Insert public key into VM user={} ssh_key={}".format(user, pub_key) else: + # self.logger.debug("no need to get ssh key") step = "Waiting to VM being up and getting IP address" self.logger.debug(logging_text + step) # n2vc_redesign STEP 5.1 # wait for RO (ip-address) Insert pub_key into VM if vnfr_id: - rw_mgmt_ip = await self.wait_vm_up_insert_key_ro(logging_text, nsr_id, vnfr_id, vdu_id, vdu_index, - user=user, pub_key=pub_key) + if kdu_name: + rw_mgmt_ip = await self.wait_kdu_up(logging_text, nsr_id, vnfr_id, kdu_name) + else: + rw_mgmt_ip = await self.wait_vm_up_insert_key_ro(logging_text, nsr_id, vnfr_id, vdu_id, + vdu_index, user=user, pub_key=pub_key) else: rw_mgmt_ip = None # This is for a NS configuration @@ -1110,19 +1687,35 @@ class NsLcm(LcmBase): # n2vc_redesign STEP 6 Execute initial config primitive step = 'execute initial config primitive' - initial_config_primitive_list = config_descriptor.get('initial-config-primitive') - - # sort initial config primitives by 'seq' - try: - initial_config_primitive_list.sort(key=lambda val: int(val['seq'])) - except Exception as e: - self.logger.error(logging_text + step + ": " + str(e)) - # add config if not present for NS charm - initial_config_primitive_list = self._get_initial_config_primitive_list(initial_config_primitive_list, - vca_deployed) + # wait for dependent primitives execution (NS -> VNF -> VDU) if initial_config_primitive_list: await self._wait_dependent_n2vc(nsr_id, vca_deployed_list, vca_index) + + # stage, in function of element type: vdu, kdu, vnf or ns + my_vca = vca_deployed_list[vca_index] + if my_vca.get("vdu_id") or my_vca.get("kdu_name"): + # VDU or KDU + stage[0] = 'Stage 3/5: running Day-1 primitives for VDU.' + elif my_vca.get("member-vnf-index"): + # VNF + stage[0] = 'Stage 4/5: running Day-1 primitives for VNF.' + else: + # NS + stage[0] = 'Stage 5/5: running Day-1 primitives for NS.' + + self._write_configuration_status( + nsr_id=nsr_id, + vca_index=vca_index, + status='EXECUTING PRIMITIVE' + ) + + self._write_op_status( + op_id=nslcmop_id, + stage=stage + ) + + check_if_terminated_needed = True for initial_config_primitive in initial_config_primitive_list: # adding information on the vca_deployed if it is a NS execution environment if not vca_deployed["member-vnf-index"]: @@ -1132,36 +1725,182 @@ class NsLcm(LcmBase): step = "execute primitive '{}' params '{}'".format(initial_config_primitive["name"], primitive_params_) self.logger.debug(logging_text + step) - await self.n2vc.exec_primitive( + await self.vca_map[vca_type].exec_primitive( ee_id=ee_id, primitive_name=initial_config_primitive["name"], params_dict=primitive_params_, db_dict=db_dict ) + # Once some primitive has been exec, check and write at db if it needs to exec terminated primitives + if check_if_terminated_needed: + if config_descriptor.get('terminate-config-primitive'): + self.update_db_2("nsrs", nsr_id, {db_update_entry + "needed_terminate": True}) + check_if_terminated_needed = False + # TODO register in database that primitive is done + # STEP 7 Configure metrics + if vca_type == "helm": + prometheus_jobs = await self.add_prometheus_metrics( + ee_id=ee_id, + artifact_path=artifact_path, + ee_config_descriptor=ee_config_descriptor, + vnfr_id=vnfr_id, + nsr_id=nsr_id, + target_ip=rw_mgmt_ip, + ) + if prometheus_jobs: + self.update_db_2("nsrs", nsr_id, {db_update_entry + "prometheus_jobs": prometheus_jobs}) + step = "instantiated at VCA" - self.update_db_2("nsrs", nsr_id, {db_update_entry + "instantiation": "COMPLETED"}) self.logger.debug(logging_text + step) + self._write_configuration_status( + nsr_id=nsr_id, + vca_index=vca_index, + status='READY' + ) + except Exception as e: # TODO not use Exception but N2VC exception - self.update_db_2("nsrs", nsr_id, {db_update_entry + "instantiation": "FAILED"}) - raise Exception("{} {}".format(step, e)) from e - # TODO raise N2VC exception with 'step' extra information + # self.update_db_2("nsrs", nsr_id, {db_update_entry + "instantiation": "FAILED"}) + if not isinstance(e, (DbException, N2VCException, LcmException, asyncio.CancelledError)): + self.logger.error("Exception while {} : {}".format(step, e), exc_info=True) + self._write_configuration_status( + nsr_id=nsr_id, + vca_index=vca_index, + status='BROKEN' + ) + raise LcmException("{} {}".format(step, e)) from e def _write_ns_status(self, nsr_id: str, ns_state: str, current_operation: str, current_operation_id: str, - error_description: str = None, error_detail: str = None): + error_description: str = None, error_detail: str = None, other_update: dict = None): + """ + Update db_nsr fields. + :param nsr_id: + :param ns_state: + :param current_operation: + :param current_operation_id: + :param error_description: + :param error_detail: + :param other_update: Other required changes at database if provided, will be cleared + :return: + """ try: - db_dict = dict() - if ns_state: - db_dict["nsState"] = ns_state + db_dict = other_update or {} + db_dict["_admin.nslcmop"] = current_operation_id # for backward compatibility + db_dict["_admin.current-operation"] = current_operation_id + db_dict["_admin.operation-type"] = current_operation if current_operation != "IDLE" else None db_dict["currentOperation"] = current_operation db_dict["currentOperationID"] = current_operation_id db_dict["errorDescription"] = error_description db_dict["errorDetail"] = error_detail + + if ns_state: + db_dict["nsState"] = ns_state self.update_db_2("nsrs", nsr_id, db_dict) - except Exception as e: - self.logger.warn('Error writing NS status: {}'.format(e)) + except DbException as e: + self.logger.warn('Error writing NS status, ns={}: {}'.format(nsr_id, e)) + + def _write_op_status(self, op_id: str, stage: list = None, error_message: str = None, queuePosition: int = 0, + operation_state: str = None, other_update: dict = None): + try: + db_dict = other_update or {} + db_dict['queuePosition'] = queuePosition + if isinstance(stage, list): + db_dict['stage'] = stage[0] + db_dict['detailed-status'] = " ".join(stage) + elif stage is not None: + db_dict['stage'] = str(stage) + + if error_message is not None: + db_dict['errorMessage'] = error_message + if operation_state is not None: + db_dict['operationState'] = operation_state + db_dict["statusEnteredTime"] = time() + self.update_db_2("nslcmops", op_id, db_dict) + except DbException as e: + self.logger.warn('Error writing OPERATION status for op_id: {} -> {}'.format(op_id, e)) + + def _write_all_config_status(self, db_nsr: dict, status: str): + try: + nsr_id = db_nsr["_id"] + # configurationStatus + config_status = db_nsr.get('configurationStatus') + if config_status: + db_nsr_update = {"configurationStatus.{}.status".format(index): status for index, v in + enumerate(config_status) if v} + # update status + self.update_db_2("nsrs", nsr_id, db_nsr_update) + + except DbException as e: + self.logger.warn('Error writing all configuration status, ns={}: {}'.format(nsr_id, e)) + + def _write_configuration_status(self, nsr_id: str, vca_index: int, status: str = None, + element_under_configuration: str = None, element_type: str = None, + other_update: dict = None): + + # self.logger.debug('_write_configuration_status(): vca_index={}, status={}' + # .format(vca_index, status)) + + try: + db_path = 'configurationStatus.{}.'.format(vca_index) + db_dict = other_update or {} + if status: + db_dict[db_path + 'status'] = status + if element_under_configuration: + db_dict[db_path + 'elementUnderConfiguration'] = element_under_configuration + if element_type: + db_dict[db_path + 'elementType'] = element_type + self.update_db_2("nsrs", nsr_id, db_dict) + except DbException as e: + self.logger.warn('Error writing configuration status={}, ns={}, vca_index={}: {}' + .format(status, nsr_id, vca_index, e)) + + async def _do_placement(self, logging_text, db_nslcmop, db_vnfrs): + """ + Check and computes the placement, (vim account where to deploy). If it is decided by an external tool, it + sends the request via kafka and wait until the result is wrote at database (nslcmops _admin.plca). + Database is used because the result can be obtained from a different LCM worker in case of HA. + :param logging_text: contains the prefix for logging, with the ns and nslcmop identifiers + :param db_nslcmop: database content of nslcmop + :param db_vnfrs: database content of vnfrs, indexed by member-vnf-index. + :return: True if some modification is done. Modifies database vnfrs and parameter db_vnfr with the + computed 'vim-account-id' + """ + modified = False + nslcmop_id = db_nslcmop['_id'] + placement_engine = deep_get(db_nslcmop, ('operationParams', 'placement-engine')) + if placement_engine == "PLA": + self.logger.debug(logging_text + "Invoke and wait for placement optimization") + await self.msg.aiowrite("pla", "get_placement", {'nslcmopId': nslcmop_id}, loop=self.loop) + db_poll_interval = 5 + wait = db_poll_interval * 10 + pla_result = None + while not pla_result and wait >= 0: + await asyncio.sleep(db_poll_interval) + wait -= db_poll_interval + db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id}) + pla_result = deep_get(db_nslcmop, ('_admin', 'pla')) + + if not pla_result: + raise LcmException("Placement timeout for nslcmopId={}".format(nslcmop_id)) + + for pla_vnf in pla_result['vnf']: + vnfr = db_vnfrs.get(pla_vnf['member-vnf-index']) + if not pla_vnf.get('vimAccountId') or not vnfr: + continue + modified = True + self.db.set_one("vnfrs", {"_id": vnfr["_id"]}, {"vim-account-id": pla_vnf['vimAccountId']}) + # Modifies db_vnfrs + vnfr["vim-account-id"] = pla_vnf['vimAccountId'] + return modified + + def update_nsrs_with_pla_result(self, params): + try: + nslcmop_id = deep_get(params, ('placement', 'nslcmopId')) + self.update_db_2("nslcmops", nslcmop_id, {"_admin.pla": params.get('placement')}) + except Exception as e: + self.logger.warn('Update failed for nslcmop_id={}:{}'.format(nslcmop_id, e)) async def instantiate(self, nsr_id, nslcmop_id): """ @@ -1174,7 +1913,7 @@ class NsLcm(LcmBase): # Try to lock HA task here task_is_locked_by_me = self.lcm_tasks.lock_HA('ns', 'nslcmops', nslcmop_id) if not task_is_locked_by_me: - self.logger.debug('instantiate() task is not locked by me') + self.logger.debug('instantiate() task is not locked by me, ns={}'.format(nsr_id)) return logging_text = "Task ns={} instantiate={} ".format(nsr_id, nslcmop_id) @@ -1189,49 +1928,63 @@ class NsLcm(LcmBase): db_nslcmop = None # update operation on nsrs - db_nsr_update = {"_admin.nslcmop": nslcmop_id, - "_admin.current-operation": nslcmop_id, - "_admin.operation-type": "instantiate"} - self.update_db_2("nsrs", nsr_id, db_nsr_update) - + db_nsr_update = {} # update operation on nslcmops db_nslcmop_update = {} nslcmop_operation_state = None db_vnfrs = {} # vnf's info indexed by member-index # n2vc_info = {} - task_instantiation_list = [] - task_instantiation_info = {} # from task to info text + tasks_dict_info = {} # from task to info text exc = None + error_list = [] + stage = ['Stage 1/5: preparation of the environment.', "Waiting for previous operations to terminate.", ""] + # ^ stage, step, VIM progress try: # wait for any previous tasks in process - step = "Waiting for previous operations to terminate" await self.lcm_tasks.waitfor_related_HA('ns', 'nslcmops', nslcmop_id) - # STEP 0: Reading database (nslcmops, nsrs, nsds, vnfrs, vnfds) + stage[1] = "Sync filesystem from database." + self.fs.sync() # TODO, make use of partial sync, only for the needed packages + # STEP 0: Reading database (nslcmops, nsrs, nsds, vnfrs, vnfds) + stage[1] = "Reading from database." # nsState="BUILDING", currentOperation="INSTANTIATING", currentOperationID=nslcmop_id + db_nsr_update["detailed-status"] = "creating" + db_nsr_update["operational-status"] = "init" self._write_ns_status( nsr_id=nsr_id, ns_state="BUILDING", current_operation="INSTANTIATING", - current_operation_id=nslcmop_id + current_operation_id=nslcmop_id, + other_update=db_nsr_update + ) + self._write_op_status( + op_id=nslcmop_id, + stage=stage, + queuePosition=0 ) # read from db: operation - step = "Getting nslcmop={} from db".format(nslcmop_id) + stage[1] = "Getting nslcmop={} from db.".format(nslcmop_id) db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id}) + ns_params = db_nslcmop.get("operationParams") + if ns_params and ns_params.get("timeout_ns_deploy"): + timeout_ns_deploy = ns_params["timeout_ns_deploy"] + else: + timeout_ns_deploy = self.timeout.get("ns_deploy", self.timeout_ns_deploy) # read from db: ns - step = "Getting nsr={} from db".format(nsr_id) + stage[1] = "Getting nsr={} from db.".format(nsr_id) db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) - # nsd is replicated into ns (no db read) - nsd = db_nsr["nsd"] + stage[1] = "Getting nsd={} from db.".format(db_nsr["nsd-id"]) + nsd = self.db.get_one("nsds", {"_id": db_nsr["nsd-id"]}) + db_nsr["nsd"] = nsd # nsr_name = db_nsr["name"] # TODO short-name?? # read from db: vnf's of this ns - step = "Getting vnfrs from db" - self.logger.debug(logging_text + step) + stage[1] = "Getting vnfrs from db." + self.logger.debug(logging_text + stage[1]) db_vnfrs_list = self.db.get_list("vnfrs", {"nsr-id-ref": nsr_id}) # read from db: vnfd's for every vnf @@ -1244,11 +1997,12 @@ class NsLcm(LcmBase): db_vnfrs[vnfr["member-vnf-index-ref"]] = vnfr # vnf's dict indexed by member-index: '1', '2', etc vnfd_id = vnfr["vnfd-id"] # vnfd uuid for this vnf vnfd_ref = vnfr["vnfd-ref"] # vnfd name for this vnf + # if we haven't this vnfd, read it from db if vnfd_id not in db_vnfds: - # read from cb - step = "Getting vnfd={} id='{}' from db".format(vnfd_id, vnfd_ref) - self.logger.debug(logging_text + step) + # read from db + stage[1] = "Getting vnfd={} id='{}' from db.".format(vnfd_id, vnfd_ref) + self.logger.debug(logging_text + stage[1]) vnfd = self.db.get_one("vnfds", {"_id": vnfd_id}) # store vnfd @@ -1262,7 +2016,9 @@ class NsLcm(LcmBase): vca_deployed_list = db_nsr["_admin"]["deployed"].get("VCA") if vca_deployed_list is None: vca_deployed_list = [] + configuration_status_list = [] db_nsr_update["_admin.deployed.VCA"] = vca_deployed_list + db_nsr_update["configurationStatus"] = configuration_status_list # add _admin.deployed.VCA to db_nsr dictionary, value=vca_deployed_list populate_dict(db_nsr, ("_admin", "deployed", "VCA"), vca_deployed_list) elif isinstance(vca_deployed_list, dict): @@ -1271,9 +2027,6 @@ class NsLcm(LcmBase): db_nsr_update["_admin.deployed.VCA"] = vca_deployed_list populate_dict(db_nsr, ("_admin", "deployed", "VCA"), vca_deployed_list) - db_nsr_update["detailed-status"] = "creating" - db_nsr_update["operational-status"] = "init" - if not isinstance(deep_get(db_nsr, ("_admin", "deployed", "RO", "vnfd")), list): populate_dict(db_nsr, ("_admin", "deployed", "RO", "vnfd"), []) db_nsr_update["_admin.deployed.RO.vnfd"] = [] @@ -1281,19 +2034,28 @@ class NsLcm(LcmBase): # set state to INSTANTIATED. When instantiated NBI will not delete directly db_nsr_update["_admin.nsState"] = "INSTANTIATED" self.update_db_2("nsrs", nsr_id, db_nsr_update) - self.logger.debug(logging_text + "Before deploy_kdus") + self.db.set_list("vnfrs", {"nsr-id-ref": nsr_id}, {"_admin.nsState": "INSTANTIATED"}) + + # n2vc_redesign STEP 2 Deploy Network Scenario + stage[0] = 'Stage 2/5: deployment of KDUs, VMs and execution environments.' + self._write_op_status( + op_id=nslcmop_id, + stage=stage + ) + + stage[1] = "Deploying KDUs." + # self.logger.debug(logging_text + "Before deploy_kdus") # Call to deploy_kdus in case exists the "vdu:kdu" param - task_kdu = asyncio.ensure_future( - self.deploy_kdus( - logging_text=logging_text, - nsr_id=nsr_id, - db_nsr=db_nsr, - db_vnfrs=db_vnfrs, - ) + await self.deploy_kdus( + logging_text=logging_text, + nsr_id=nsr_id, + nslcmop_id=nslcmop_id, + db_vnfrs=db_vnfrs, + db_vnfds=db_vnfds, + task_instantiation_info=tasks_dict_info, ) - self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "instantiate_KDUs", task_kdu) - task_instantiation_info[task_kdu] = "Deploy KDUs" - task_instantiation_list.append(task_kdu) + + stage[1] = "Getting VCA public key." # n2vc_redesign STEP 1 Get VCA public ssh-key # feature 1429. Add n2vc public key to needed VMs n2vc_key = self.n2vc.get_public_key() @@ -1301,7 +2063,7 @@ class NsLcm(LcmBase): if self.vca_config.get("public_key"): n2vc_key_list.append(self.vca_config["public_key"]) - # n2vc_redesign STEP 2 Deploy Network Scenario + stage[1] = "Deploying NS at VIM." task_ro = asyncio.ensure_future( self.instantiate_RO( logging_text=logging_text, @@ -1311,16 +2073,16 @@ class NsLcm(LcmBase): db_nslcmop=db_nslcmop, db_vnfrs=db_vnfrs, db_vnfds_ref=db_vnfds_ref, - n2vc_key_list=n2vc_key_list + n2vc_key_list=n2vc_key_list, + stage=stage ) ) self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "instantiate_RO", task_ro) - task_instantiation_info[task_ro] = "Deploy at VIM" - task_instantiation_list.append(task_ro) + tasks_dict_info[task_ro] = "Deploying at VIM" # n2vc_redesign STEP 3 to 6 Deploy N2VC - step = "Looking for needed vnfd to configure with proxy charm" - self.logger.debug(logging_text + step) + stage[1] = "Deploying Execution Environments." + self.logger.debug(logging_text + stage[1]) nsi_id = None # TODO put nsi_id when this nsr belongs to a NSI # get_iterable() returns a value from a dict or empty tuple if key does not exist @@ -1336,12 +2098,12 @@ class NsLcm(LcmBase): kdu_name = None # Get additional parameters - deploy_params = {} + deploy_params = {"OSM": self._get_osm_params(db_vnfr)} if db_vnfr.get("additionalParamsForVnf"): - deploy_params = self._format_additional_params(db_vnfr["additionalParamsForVnf"].copy()) + deploy_params.update(self._format_additional_params(db_vnfr["additionalParamsForVnf"].copy())) descriptor_config = vnfd.get("vnf-configuration") - if descriptor_config and descriptor_config.get("juju"): + if descriptor_config: self._deploy_n2vc( logging_text=logging_text + "member_vnf_index={} ".format(member_vnf_index), db_nsr=db_nsr, @@ -1358,8 +2120,8 @@ class NsLcm(LcmBase): deploy_params=deploy_params, descriptor_config=descriptor_config, base_folder=base_folder, - task_instantiation_list=task_instantiation_list, - task_instantiation_info=task_instantiation_info + task_instantiation_info=tasks_dict_info, + stage=stage ) # Deploy charms for each VDU that supports one. @@ -1371,15 +2133,8 @@ class NsLcm(LcmBase): deploy_params_vdu = self._format_additional_params(vdur["additionalParams"]) else: deploy_params_vdu = deploy_params - if descriptor_config and descriptor_config.get("juju"): - # look for vdu index in the db_vnfr["vdu"] section - # for vdur_index, vdur in enumerate(db_vnfr["vdur"]): - # if vdur["vdu-id-ref"] == vdu_id: - # break - # else: - # raise LcmException("Mismatch vdu_id={} not found in the vnfr['vdur'] list for " - # "member_vnf_index={}".format(vdu_id, member_vnf_index)) - # vdu_name = vdur.get("name") + deploy_params_vdu["OSM"] = self._get_osm_params(db_vnfr, vdu_id, vdu_count_index=0) + if descriptor_config: vdu_name = None kdu_name = None for vdu_index in range(int(vdud.get("count", 1))): @@ -1401,25 +2156,20 @@ class NsLcm(LcmBase): deploy_params=deploy_params_vdu, descriptor_config=descriptor_config, base_folder=base_folder, - task_instantiation_list=task_instantiation_list, - task_instantiation_info=task_instantiation_info + task_instantiation_info=tasks_dict_info, + stage=stage ) for kdud in get_iterable(vnfd, 'kdu'): kdu_name = kdud["name"] descriptor_config = kdud.get('kdu-configuration') - if descriptor_config and descriptor_config.get("juju"): + if descriptor_config: vdu_id = None vdu_index = 0 vdu_name = None - # look for vdu index in the db_vnfr["vdu"] section - # for vdur_index, vdur in enumerate(db_vnfr["vdur"]): - # if vdur["vdu-id-ref"] == vdu_id: - # break - # else: - # raise LcmException("Mismatch vdu_id={} not found in the vnfr['vdur'] list for " - # "member_vnf_index={}".format(vdu_id, member_vnf_index)) - # vdu_name = vdur.get("name") - # vdu_name = None + kdur = next(x for x in db_vnfr["kdur"] if x["kdu-name"] == kdu_name) + deploy_params_kdu = {"OSM": self._get_osm_params(db_vnfr)} + if kdur.get("additionalParams"): + deploy_params_kdu = self._format_additional_params(kdur["additionalParams"]) self._deploy_n2vc( logging_text=logging_text, @@ -1434,11 +2184,11 @@ class NsLcm(LcmBase): member_vnf_index=member_vnf_index, vdu_index=vdu_index, vdu_name=vdu_name, - deploy_params=deploy_params, + deploy_params=deploy_params_kdu, descriptor_config=descriptor_config, base_folder=base_folder, - task_instantiation_list=task_instantiation_list, - task_instantiation_info=task_instantiation_info + task_instantiation_info=tasks_dict_info, + stage=stage ) # Check if this NS has a charm configuration @@ -1453,9 +2203,9 @@ class NsLcm(LcmBase): vdu_name = None # Get additional parameters - deploy_params = {} + deploy_params = {"OSM": self._get_osm_params(db_vnfr)} if db_nsr.get("additionalParamsForNs"): - deploy_params = self._format_additional_params(db_nsr["additionalParamsForNs"].copy()) + deploy_params.update(self._format_additional_params(db_nsr["additionalParamsForNs"].copy())) base_folder = nsd["_admin"]["storage"] self._deploy_n2vc( logging_text=logging_text, @@ -1473,125 +2223,88 @@ class NsLcm(LcmBase): deploy_params=deploy_params, descriptor_config=descriptor_config, base_folder=base_folder, - task_instantiation_list=task_instantiation_list, - task_instantiation_info=task_instantiation_info + task_instantiation_info=tasks_dict_info, + stage=stage ) - # Wait until all tasks of "task_instantiation_list" have been finished - - # while time() <= start_deploy + self.total_deploy_timeout: - error_text_list = [] - timeout = 3600 - - # let's begin with all OK - instantiated_ok = True - # let's begin with RO 'running' status (later we can change it) - db_nsr_update["operational-status"] = "running" - # let's begin with VCA 'configured' status (later we can change it) - db_nsr_update["config-status"] = "configured" - - if task_instantiation_list: - # wait for all tasks completion - done, pending = await asyncio.wait(task_instantiation_list, timeout=timeout) - - for task in pending: - instantiated_ok = False - if task == task_ro: - db_nsr_update["operational-status"] = "failed" - else: - db_nsr_update["config-status"] = "failed" - self.logger.error(logging_text + task_instantiation_info[task] + ": Timeout") - error_text_list.append(task_instantiation_info[task] + ": Timeout") - for task in done: - if task.cancelled(): - instantiated_ok = False - if task == task_ro: - db_nsr_update["operational-status"] = "failed" - else: - db_nsr_update["config-status"] = "failed" - self.logger.warn(logging_text + task_instantiation_info[task] + ": Cancelled") - error_text_list.append(task_instantiation_info[task] + ": Cancelled") - else: - exc = task.exception() - if exc: - instantiated_ok = False - if task == task_ro: - db_nsr_update["operational-status"] = "failed" - else: - db_nsr_update["config-status"] = "failed" - self.logger.error(logging_text + task_instantiation_info[task] + ": Failed") - if isinstance(exc, (N2VCException, ROclient.ROClientException)): - error_text_list.append(task_instantiation_info[task] + ": {}".format(exc)) - else: - exc_traceback = "".join(traceback.format_exception(None, exc, exc.__traceback__)) - self.logger.error(logging_text + task_instantiation_info[task] + exc_traceback) - error_text_list.append(task_instantiation_info[task] + ": " + exc_traceback) - else: - self.logger.debug(logging_text + task_instantiation_info[task] + ": Done") - - if error_text_list: - error_text = "\n".join(error_text_list) - db_nsr_update["detailed-status"] = error_text - db_nslcmop_update["operationState"] = nslcmop_operation_state = "FAILED_TEMP" - db_nslcmop_update["detailed-status"] = error_text - db_nslcmop_update["statusEnteredTime"] = time() - else: - # all is done - db_nslcmop_update["operationState"] = nslcmop_operation_state = "COMPLETED" - db_nslcmop_update["statusEnteredTime"] = time() - db_nslcmop_update["detailed-status"] = "done" - db_nsr_update["detailed-status"] = "done" + # rest of staff will be done at finally - except (ROclient.ROClientException, DbException, LcmException) as e: - self.logger.error(logging_text + "Exit Exception while '{}': {}".format(step, e)) + except (ROclient.ROClientException, DbException, LcmException, N2VCException) as e: + self.logger.error(logging_text + "Exit Exception while '{}': {}".format(stage[1], e)) exc = e except asyncio.CancelledError: - self.logger.error(logging_text + "Cancelled Exception while '{}'".format(step)) + self.logger.error(logging_text + "Cancelled Exception while '{}'".format(stage[1])) exc = "Operation was cancelled" except Exception as e: exc = traceback.format_exc() - self.logger.critical(logging_text + "Exit Exception {} while '{}': {}".format(type(e).__name__, step, e), - exc_info=True) + self.logger.critical(logging_text + "Exit Exception while '{}': {}".format(stage[1], e), exc_info=True) finally: if exc: - if db_nsr: - db_nsr_update["detailed-status"] = "ERROR {}: {}".format(step, exc) - db_nsr_update["operational-status"] = "failed" - db_nsr_update["config-status"] = "failed" - if db_nslcmop: - db_nslcmop_update["detailed-status"] = "FAILED {}: {}".format(step, exc) - db_nslcmop_update["operationState"] = nslcmop_operation_state = "FAILED" - db_nslcmop_update["statusEnteredTime"] = time() + error_list.append(str(exc)) try: - if db_nsr: - db_nsr_update["_admin.nslcmop"] = None - db_nsr_update["_admin.current-operation"] = None - db_nsr_update["_admin.operation-type"] = None - self.update_db_2("nsrs", nsr_id, db_nsr_update) - - # nsState="READY/BROKEN", currentOperation="IDLE", currentOperationID=None - ns_state = None - error_description = None - error_detail = None - if instantiated_ok: - ns_state = "READY" + # wait for pending tasks + if tasks_dict_info: + stage[1] = "Waiting for instantiate pending tasks." + self.logger.debug(logging_text + stage[1]) + error_list += await self._wait_for_tasks(logging_text, tasks_dict_info, timeout_ns_deploy, + stage, nslcmop_id, nsr_id=nsr_id) + stage[1] = stage[2] = "" + except asyncio.CancelledError: + error_list.append("Cancelled") + # TODO cancel all tasks + except Exception as exc: + error_list.append(str(exc)) + + # update operation-status + db_nsr_update["operational-status"] = "running" + # let's begin with VCA 'configured' status (later we can change it) + db_nsr_update["config-status"] = "configured" + for task, task_name in tasks_dict_info.items(): + if not task.done() or task.cancelled() or task.exception(): + if task_name.startswith(self.task_name_deploy_vca): + # A N2VC task is pending + db_nsr_update["config-status"] = "failed" else: - ns_state = "BROKEN" - error_description = 'Operation: INSTANTIATING.{}, step: {}'.format(nslcmop_id, step) - error_detail = error_text - self._write_ns_status( - nsr_id=nsr_id, - ns_state=ns_state, - current_operation="IDLE", - current_operation_id=None, - error_description=error_description, - error_detail=error_detail - ) + # RO or KDU task is pending + db_nsr_update["operational-status"] = "failed" + + # update status at database + if error_list: + error_detail = ". ".join(error_list) + self.logger.error(logging_text + error_detail) + error_description_nslcmop = '{} Detail: {}'.format(stage[0], error_detail) + error_description_nsr = 'Operation: INSTANTIATING.{}, {}'.format(nslcmop_id, stage[0]) + + db_nsr_update["detailed-status"] = error_description_nsr + " Detail: " + error_detail + db_nslcmop_update["detailed-status"] = error_detail + nslcmop_operation_state = "FAILED" + ns_state = "BROKEN" + else: + error_detail = None + error_description_nsr = error_description_nslcmop = None + ns_state = "READY" + db_nsr_update["detailed-status"] = "Done" + db_nslcmop_update["detailed-status"] = "Done" + nslcmop_operation_state = "COMPLETED" + + if db_nsr: + self._write_ns_status( + nsr_id=nsr_id, + ns_state=ns_state, + current_operation="IDLE", + current_operation_id=None, + error_description=error_description_nsr, + error_detail=error_detail, + other_update=db_nsr_update + ) + self._write_op_status( + op_id=nslcmop_id, + stage="", + error_message=error_description_nslcmop, + operation_state=nslcmop_operation_state, + other_update=db_nslcmop_update, + ) - if db_nslcmop_update: - self.update_db_2("nslcmops", nslcmop_id, db_nslcmop_update) - except DbException as e: - self.logger.error(logging_text + "Cannot update database: {}".format(e)) if nslcmop_operation_state: try: await self.msg.aiowrite("ns", "instantiated", {"nsr_id": nsr_id, "nslcmop_id": nslcmop_id, @@ -1603,179 +2316,493 @@ class NsLcm(LcmBase): self.logger.debug(logging_text + "Exit") self.lcm_tasks.remove("ns", nsr_id, nslcmop_id, "ns_instantiate") - async def deploy_kdus(self, logging_text, nsr_id, db_nsr, db_vnfrs): + async def _add_vca_relations(self, logging_text, nsr_id, vca_index: int, + timeout: int = 3600, vca_type: str = None) -> bool: + + # steps: + # 1. find all relations for this VCA + # 2. wait for other peers related + # 3. add relations + + try: + vca_type = vca_type or "lxc_proxy_charm" + + # STEP 1: find all relations for this VCA + + # read nsr record + db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) + nsd = self.db.get_one("nsds", {"_id": db_nsr["nsd-id"]}) + + # this VCA data + my_vca = deep_get(db_nsr, ('_admin', 'deployed', 'VCA'))[vca_index] + + # read all ns-configuration relations + ns_relations = list() + db_ns_relations = deep_get(nsd, ('ns-configuration', 'relation')) + if db_ns_relations: + for r in db_ns_relations: + # check if this VCA is in the relation + if my_vca.get('member-vnf-index') in\ + (r.get('entities')[0].get('id'), r.get('entities')[1].get('id')): + ns_relations.append(r) + + # read all vnf-configuration relations + vnf_relations = list() + db_vnfd_list = db_nsr.get('vnfd-id') + if db_vnfd_list: + for vnfd in db_vnfd_list: + db_vnfd = self.db.get_one("vnfds", {"_id": vnfd}) + db_vnf_relations = deep_get(db_vnfd, ('vnf-configuration', 'relation')) + if db_vnf_relations: + for r in db_vnf_relations: + # check if this VCA is in the relation + if my_vca.get('vdu_id') in (r.get('entities')[0].get('id'), r.get('entities')[1].get('id')): + vnf_relations.append(r) + + # if no relations, terminate + if not ns_relations and not vnf_relations: + self.logger.debug(logging_text + ' No relations') + return True + + self.logger.debug(logging_text + ' adding relations\n {}\n {}'.format(ns_relations, vnf_relations)) + + # add all relations + start = time() + while True: + # check timeout + now = time() + if now - start >= timeout: + self.logger.error(logging_text + ' : timeout adding relations') + return False + + # reload nsr from database (we need to update record: _admin.deloyed.VCA) + db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) + + # for each defined NS relation, find the VCA's related + for r in ns_relations.copy(): + from_vca_ee_id = None + to_vca_ee_id = None + from_vca_endpoint = None + to_vca_endpoint = None + vca_list = deep_get(db_nsr, ('_admin', 'deployed', 'VCA')) + for vca in vca_list: + if vca.get('member-vnf-index') == r.get('entities')[0].get('id') \ + and vca.get('config_sw_installed'): + from_vca_ee_id = vca.get('ee_id') + from_vca_endpoint = r.get('entities')[0].get('endpoint') + if vca.get('member-vnf-index') == r.get('entities')[1].get('id') \ + and vca.get('config_sw_installed'): + to_vca_ee_id = vca.get('ee_id') + to_vca_endpoint = r.get('entities')[1].get('endpoint') + if from_vca_ee_id and to_vca_ee_id: + # add relation + await self.vca_map[vca_type].add_relation( + ee_id_1=from_vca_ee_id, + ee_id_2=to_vca_ee_id, + endpoint_1=from_vca_endpoint, + endpoint_2=to_vca_endpoint) + # remove entry from relations list + ns_relations.remove(r) + else: + # check failed peers + try: + vca_status_list = db_nsr.get('configurationStatus') + if vca_status_list: + for i in range(len(vca_list)): + vca = vca_list[i] + vca_status = vca_status_list[i] + if vca.get('member-vnf-index') == r.get('entities')[0].get('id'): + if vca_status.get('status') == 'BROKEN': + # peer broken: remove relation from list + ns_relations.remove(r) + if vca.get('member-vnf-index') == r.get('entities')[1].get('id'): + if vca_status.get('status') == 'BROKEN': + # peer broken: remove relation from list + ns_relations.remove(r) + except Exception: + # ignore + pass + + # for each defined VNF relation, find the VCA's related + for r in vnf_relations.copy(): + from_vca_ee_id = None + to_vca_ee_id = None + from_vca_endpoint = None + to_vca_endpoint = None + vca_list = deep_get(db_nsr, ('_admin', 'deployed', 'VCA')) + for vca in vca_list: + key_to_check = "vdu_id" + if vca.get("vdu_id") is None: + key_to_check = "vnfd_id" + if vca.get(key_to_check) == r.get('entities')[0].get('id') and vca.get('config_sw_installed'): + from_vca_ee_id = vca.get('ee_id') + from_vca_endpoint = r.get('entities')[0].get('endpoint') + if vca.get(key_to_check) == r.get('entities')[1].get('id') and vca.get('config_sw_installed'): + to_vca_ee_id = vca.get('ee_id') + to_vca_endpoint = r.get('entities')[1].get('endpoint') + if from_vca_ee_id and to_vca_ee_id: + # add relation + await self.vca_map[vca_type].add_relation( + ee_id_1=from_vca_ee_id, + ee_id_2=to_vca_ee_id, + endpoint_1=from_vca_endpoint, + endpoint_2=to_vca_endpoint) + # remove entry from relations list + vnf_relations.remove(r) + else: + # check failed peers + try: + vca_status_list = db_nsr.get('configurationStatus') + if vca_status_list: + for i in range(len(vca_list)): + vca = vca_list[i] + vca_status = vca_status_list[i] + if vca.get('vdu_id') == r.get('entities')[0].get('id'): + if vca_status.get('status') == 'BROKEN': + # peer broken: remove relation from list + vnf_relations.remove(r) + if vca.get('vdu_id') == r.get('entities')[1].get('id'): + if vca_status.get('status') == 'BROKEN': + # peer broken: remove relation from list + vnf_relations.remove(r) + except Exception: + # ignore + pass + + # wait for next try + await asyncio.sleep(5.0) + + if not ns_relations and not vnf_relations: + self.logger.debug('Relations added') + break + + return True + + except Exception as e: + self.logger.warn(logging_text + ' ERROR adding relations: {}'.format(e)) + return False + + async def _install_kdu(self, nsr_id: str, nsr_db_path: str, vnfr_data: dict, kdu_index: int, kdud: dict, + vnfd: dict, k8s_instance_info: dict, k8params: dict = None, timeout: int = 600): + + try: + k8sclustertype = k8s_instance_info["k8scluster-type"] + # Instantiate kdu + db_dict_install = {"collection": "nsrs", + "filter": {"_id": nsr_id}, + "path": nsr_db_path} + + kdu_instance = await self.k8scluster_map[k8sclustertype].install( + cluster_uuid=k8s_instance_info["k8scluster-uuid"], + kdu_model=k8s_instance_info["kdu-model"], + atomic=True, + params=k8params, + db_dict=db_dict_install, + timeout=timeout, + kdu_name=k8s_instance_info["kdu-name"], + namespace=k8s_instance_info["namespace"]) + self.update_db_2("nsrs", nsr_id, {nsr_db_path + ".kdu-instance": kdu_instance}) + + # Obtain services to obtain management service ip + services = await self.k8scluster_map[k8sclustertype].get_services( + cluster_uuid=k8s_instance_info["k8scluster-uuid"], + kdu_instance=kdu_instance, + namespace=k8s_instance_info["namespace"]) + + # Obtain management service info (if exists) + vnfr_update_dict = {} + if services: + vnfr_update_dict["kdur.{}.services".format(kdu_index)] = services + mgmt_services = [service for service in kdud.get("service", []) if service.get("mgmt-service")] + for mgmt_service in mgmt_services: + for service in services: + if service["name"].startswith(mgmt_service["name"]): + # Mgmt service found, Obtain service ip + ip = service.get("external_ip", service.get("cluster_ip")) + if isinstance(ip, list) and len(ip) == 1: + ip = ip[0] + + vnfr_update_dict["kdur.{}.ip-address".format(kdu_index)] = ip + + # Check if must update also mgmt ip at the vnf + service_external_cp = mgmt_service.get("external-connection-point-ref") + if service_external_cp: + if deep_get(vnfd, ("mgmt-interface", "cp")) == service_external_cp: + vnfr_update_dict["ip-address"] = ip + + break + else: + self.logger.warn("Mgmt service name: {} not found".format(mgmt_service["name"])) + + vnfr_update_dict["kdur.{}.status".format(kdu_index)] = "READY" + self.update_db_2("vnfrs", vnfr_data.get("_id"), vnfr_update_dict) + + kdu_config = kdud.get("kdu-configuration") + if kdu_config and kdu_config.get("initial-config-primitive") and kdu_config.get("juju") is None: + initial_config_primitive_list = kdu_config.get("initial-config-primitive") + initial_config_primitive_list.sort(key=lambda val: int(val["seq"])) + + for initial_config_primitive in initial_config_primitive_list: + primitive_params_ = self._map_primitive_params(initial_config_primitive, {}, {}) + + await asyncio.wait_for( + self.k8scluster_map[k8sclustertype].exec_primitive( + cluster_uuid=k8s_instance_info["k8scluster-uuid"], + kdu_instance=kdu_instance, + primitive_name=initial_config_primitive["name"], + params=primitive_params_, db_dict={}), + timeout=timeout) + + except Exception as e: + # Prepare update db with error and raise exception + try: + self.update_db_2("nsrs", nsr_id, {nsr_db_path + ".detailed-status": str(e)}) + self.update_db_2("vnfrs", vnfr_data.get("_id"), {"kdur.{}.status".format(kdu_index): "ERROR"}) + except Exception: + # ignore to keep original exception + pass + # reraise original error + raise + + return kdu_instance + + async def deploy_kdus(self, logging_text, nsr_id, nslcmop_id, db_vnfrs, db_vnfds, task_instantiation_info): # Launch kdus if present in the descriptor k8scluster_id_2_uuic = {"helm-chart": {}, "juju-bundle": {}} - def _get_cluster_id(cluster_id, cluster_type): + async def _get_cluster_id(cluster_id, cluster_type): nonlocal k8scluster_id_2_uuic if cluster_id in k8scluster_id_2_uuic[cluster_type]: return k8scluster_id_2_uuic[cluster_type][cluster_id] + # check if K8scluster is creating and wait look if previous tasks in process + task_name, task_dependency = self.lcm_tasks.lookfor_related("k8scluster", cluster_id) + if task_dependency: + text = "Waiting for related tasks '{}' on k8scluster {} to be completed".format(task_name, cluster_id) + self.logger.debug(logging_text + text) + await asyncio.wait(task_dependency, timeout=3600) + db_k8scluster = self.db.get_one("k8sclusters", {"_id": cluster_id}, fail_on_empty=False) if not db_k8scluster: raise LcmException("K8s cluster {} cannot be found".format(cluster_id)) + k8s_id = deep_get(db_k8scluster, ("_admin", cluster_type, "id")) if not k8s_id: - raise LcmException("K8s cluster '{}' has not been initilized for '{}'".format(cluster_id, cluster_type)) + raise LcmException("K8s cluster '{}' has not been initialized for '{}'".format(cluster_id, + cluster_type)) k8scluster_id_2_uuic[cluster_type][cluster_id] = k8s_id return k8s_id logging_text += "Deploy kdus: " + step = "" try: db_nsr_update = {"_admin.deployed.K8s": []} self.update_db_2("nsrs", nsr_id, db_nsr_update) - # Look for all vnfds - pending_tasks = {} index = 0 + updated_cluster_list = [] + for vnfr_data in db_vnfrs.values(): - for kdur in get_iterable(vnfr_data, "kdur"): + for kdu_index, kdur in enumerate(get_iterable(vnfr_data, "kdur")): + # Step 0: Prepare and set parameters desc_params = self._format_additional_params(kdur.get("additionalParams")) - kdumodel = None - k8sclustertype = None - error_text = None - cluster_uuid = None + vnfd_id = vnfr_data.get('vnfd-id') + kdud = next(kdud for kdud in db_vnfds[vnfd_id]["kdu"] if kdud["name"] == kdur["kdu-name"]) + namespace = kdur.get("k8s-namespace") if kdur.get("helm-chart"): kdumodel = kdur["helm-chart"] - k8sclustertype = "chart" - k8sclustertype_full = "helm-chart" + k8sclustertype = "helm-chart" elif kdur.get("juju-bundle"): kdumodel = kdur["juju-bundle"] - k8sclustertype = "juju" - k8sclustertype_full = "juju-bundle" + k8sclustertype = "juju-bundle" else: - error_text = "kdu type is neither helm-chart not juju-bundle. Maybe an old NBI version is" \ - " running" + raise LcmException("kdu type for kdu='{}.{}' is neither helm-chart nor " + "juju-bundle. Maybe an old NBI version is running". + format(vnfr_data["member-vnf-index-ref"], kdur["kdu-name"])) + # check if kdumodel is a file and exists try: - if not error_text: - cluster_uuid = _get_cluster_id(kdur["k8s-cluster"]["id"], k8sclustertype_full) - except LcmException as e: - error_text = str(e) - step = "Instantiate KDU {} in k8s cluster {}".format(kdur["kdu-name"], cluster_uuid) - - k8s_instace_info = {"kdu-instance": None, "k8scluster-uuid": cluster_uuid, - "k8scluster-type": k8sclustertype, - "kdu-name": kdur["kdu-name"], "kdu-model": kdumodel} - if error_text: - k8s_instace_info["detailed-status"] = error_text - db_nsr_update["_admin.deployed.K8s.{}".format(index)] = k8s_instace_info + storage = deep_get(db_vnfds.get(vnfd_id), ('_admin', 'storage')) + if storage and storage.get('pkg-dir'): # may be not present if vnfd has not artifacts + # path format: /vnfdid/pkkdir/helm-charts|juju-bundles/kdumodel + filename = '{}/{}/{}s/{}'.format(storage["folder"], storage["pkg-dir"], k8sclustertype, + kdumodel) + if self.fs.file_exists(filename, mode='file') or self.fs.file_exists(filename, mode='dir'): + kdumodel = self.fs.path + filename + except (asyncio.TimeoutError, asyncio.CancelledError): + raise + except Exception: # it is not a file + pass + + k8s_cluster_id = kdur["k8s-cluster"]["id"] + step = "Synchronize repos for k8s cluster '{}'".format(k8s_cluster_id) + cluster_uuid = await _get_cluster_id(k8s_cluster_id, k8sclustertype) + + # Synchronize repos + if k8sclustertype == "helm-chart" and cluster_uuid not in updated_cluster_list: + del_repo_list, added_repo_dict = await asyncio.ensure_future( + self.k8sclusterhelm.synchronize_repos(cluster_uuid=cluster_uuid)) + if del_repo_list or added_repo_dict: + unset = {'_admin.helm_charts_added.' + item: None for item in del_repo_list} + updated = {'_admin.helm_charts_added.' + + item: name for item, name in added_repo_dict.items()} + self.logger.debug(logging_text + "repos synchronized on k8s cluster '{}' to_delete: {}, " + "to_add: {}".format(k8s_cluster_id, del_repo_list, + added_repo_dict)) + self.db.set_one("k8sclusters", {"_id": k8s_cluster_id}, updated, unset=unset) + updated_cluster_list.append(cluster_uuid) + + # Instantiate kdu + step = "Instantiating KDU {}.{} in k8s cluster {}".format(vnfr_data["member-vnf-index-ref"], + kdur["kdu-name"], k8s_cluster_id) + k8s_instance_info = {"kdu-instance": None, + "k8scluster-uuid": cluster_uuid, + "k8scluster-type": k8sclustertype, + "member-vnf-index": vnfr_data["member-vnf-index-ref"], + "kdu-name": kdur["kdu-name"], + "kdu-model": kdumodel, + "namespace": namespace} + db_path = "_admin.deployed.K8s.{}".format(index) + db_nsr_update[db_path] = k8s_instance_info self.update_db_2("nsrs", nsr_id, db_nsr_update) - if error_text: - continue - db_dict = {"collection": "nsrs", "filter": {"_id": nsr_id}, "path": "_admin.deployed.K8s." - "{}".format(index)} - if k8sclustertype == "chart": - task = asyncio.ensure_future( - self.k8sclusterhelm.install(cluster_uuid=cluster_uuid, kdu_model=kdumodel, atomic=True, - params=desc_params, db_dict=db_dict, timeout=3600) - ) - else: - task = self.k8sclusterjuju.install(cluster_uuid=cluster_uuid, kdu_model=kdumodel, - atomic=True, params=desc_params, - db_dict=db_dict, timeout=600) + task = asyncio.ensure_future( + self._install_kdu(nsr_id, db_path, vnfr_data, kdu_index, kdud, db_vnfds[vnfd_id], + k8s_instance_info, k8params=desc_params, timeout=600)) + self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "instantiate_KDU-{}".format(index), task) + task_instantiation_info[task] = "Deploying KDU {}".format(kdur["kdu-name"]) - pending_tasks[task] = "_admin.deployed.K8s.{}.".format(index) index += 1 - if not pending_tasks: - return - self.logger.debug(logging_text + 'Waiting for terminate pending tasks...') - pending_list = list(pending_tasks.keys()) - while pending_list: - done_list, pending_list = await asyncio.wait(pending_list, timeout=30*60, - return_when=asyncio.FIRST_COMPLETED) - if not done_list: # timeout - for task in pending_list: - db_nsr_update[pending_tasks(task) + "detailed-status"] = "Timeout" - break - for task in done_list: - exc = task.exception() - if exc: - db_nsr_update[pending_tasks[task] + "detailed-status"] = "{}".format(exc) - else: - db_nsr_update[pending_tasks[task] + "kdu-instance"] = task.result() + except (LcmException, asyncio.CancelledError): + raise except Exception as e: - self.logger.critical(logging_text + "Exit Exception {} while '{}': {}".format(type(e).__name__, step, e)) - raise LcmException("{} Exit Exception {} while '{}': {}".format(logging_text, type(e).__name__, step, e)) + msg = "Exception {} while {}: {}".format(type(e).__name__, step, e) + if isinstance(e, (N2VCException, DbException)): + self.logger.error(logging_text + msg) + else: + self.logger.critical(logging_text + msg, exc_info=True) + raise LcmException(msg) finally: - # TODO Write in data base if db_nsr_update: self.update_db_2("nsrs", nsr_id, db_nsr_update) def _deploy_n2vc(self, logging_text, db_nsr, db_vnfr, nslcmop_id, nsr_id, nsi_id, vnfd_id, vdu_id, kdu_name, member_vnf_index, vdu_index, vdu_name, deploy_params, descriptor_config, - base_folder, task_instantiation_list, task_instantiation_info): + base_folder, task_instantiation_info, stage): # launch instantiate_N2VC in a asyncio task and register task object # Look where information of this charm is at database ._admin.deployed.VCA # if not found, create one entry and update database - # fill db_nsr._admin.deployed.VCA. - vca_index = -1 - for vca_index, vca_deployed in enumerate(db_nsr["_admin"]["deployed"]["VCA"]): - if not vca_deployed: + + self.logger.debug(logging_text + "_deploy_n2vc vnfd_id={}, vdu_id={}".format(vnfd_id, vdu_id)) + if descriptor_config.get("juju"): # There is one execution envioronment of type juju + ee_list = [descriptor_config] + elif descriptor_config.get("execution-environment-list"): + ee_list = descriptor_config.get("execution-environment-list") + else: # other types as script are not supported + ee_list = [] + + for ee_item in ee_list: + self.logger.debug(logging_text + "_deploy_n2vc ee_item juju={}, helm={}".format(ee_item.get('juju'), + ee_item.get("helm-chart"))) + ee_descriptor_id = ee_item.get("id") + if ee_item.get("juju"): + vca_name = ee_item['juju'].get('charm') + vca_type = "lxc_proxy_charm" if ee_item['juju'].get('charm') is not None else "native_charm" + if ee_item['juju'].get('cloud') == "k8s": + vca_type = "k8s_proxy_charm" + elif ee_item['juju'].get('proxy') is False: + vca_type = "native_charm" + elif ee_item.get("helm-chart"): + vca_name = ee_item['helm-chart'] + vca_type = "helm" + else: + self.logger.debug(logging_text + "skipping non juju neither charm configuration") continue - if vca_deployed.get("member-vnf-index") == member_vnf_index and \ - vca_deployed.get("vdu_id") == vdu_id and \ - vca_deployed.get("kdu_name") == kdu_name and \ - vca_deployed.get("vdu_count_index", 0) == vdu_index: - break - else: - # not found, create one. - vca_deployed = { - "member-vnf-index": member_vnf_index, - "vdu_id": vdu_id, - "kdu_name": kdu_name, - "vdu_count_index": vdu_index, - "operational-status": "init", # TODO revise - "detailed-status": "", # TODO revise - "step": "initial-deploy", # TODO revise - "vnfd_id": vnfd_id, - "vdu_name": vdu_name, - } - vca_index += 1 - self.update_db_2("nsrs", nsr_id, {"_admin.deployed.VCA.{}".format(vca_index): vca_deployed}) - db_nsr["_admin"]["deployed"]["VCA"].append(vca_deployed) - # Launch task - task_n2vc = asyncio.ensure_future( - self.instantiate_N2VC( - logging_text=logging_text, - vca_index=vca_index, - nsi_id=nsi_id, - db_nsr=db_nsr, - db_vnfr=db_vnfr, - vdu_id=vdu_id, - kdu_name=kdu_name, - vdu_index=vdu_index, - deploy_params=deploy_params, - config_descriptor=descriptor_config, - base_folder=base_folder, + vca_index = -1 + for vca_index, vca_deployed in enumerate(db_nsr["_admin"]["deployed"]["VCA"]): + if not vca_deployed: + continue + if vca_deployed.get("member-vnf-index") == member_vnf_index and \ + vca_deployed.get("vdu_id") == vdu_id and \ + vca_deployed.get("kdu_name") == kdu_name and \ + vca_deployed.get("vdu_count_index", 0) == vdu_index and \ + vca_deployed.get("ee_descriptor_id") == ee_descriptor_id: + break + else: + # not found, create one. + target = "ns" if not member_vnf_index else "vnf/{}".format(member_vnf_index) + if vdu_id: + target += "/vdu/{}/{}".format(vdu_id, vdu_index or 0) + elif kdu_name: + target += "/kdu/{}".format(kdu_name) + vca_deployed = { + "target_element": target, + # ^ target_element will replace member-vnf-index, kdu_name, vdu_id ... in a single string + "member-vnf-index": member_vnf_index, + "vdu_id": vdu_id, + "kdu_name": kdu_name, + "vdu_count_index": vdu_index, + "operational-status": "init", # TODO revise + "detailed-status": "", # TODO revise + "step": "initial-deploy", # TODO revise + "vnfd_id": vnfd_id, + "vdu_name": vdu_name, + "type": vca_type, + "ee_descriptor_id": ee_descriptor_id + } + vca_index += 1 + + # create VCA and configurationStatus in db + db_dict = { + "_admin.deployed.VCA.{}".format(vca_index): vca_deployed, + "configurationStatus.{}".format(vca_index): dict() + } + self.update_db_2("nsrs", nsr_id, db_dict) + + db_nsr["_admin"]["deployed"]["VCA"].append(vca_deployed) + + # Launch task + task_n2vc = asyncio.ensure_future( + self.instantiate_N2VC( + logging_text=logging_text, + vca_index=vca_index, + nsi_id=nsi_id, + db_nsr=db_nsr, + db_vnfr=db_vnfr, + vdu_id=vdu_id, + kdu_name=kdu_name, + vdu_index=vdu_index, + deploy_params=deploy_params, + config_descriptor=descriptor_config, + base_folder=base_folder, + nslcmop_id=nslcmop_id, + stage=stage, + vca_type=vca_type, + vca_name=vca_name, + ee_config_descriptor=ee_item + ) ) - ) - self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "instantiate_N2VC-{}".format(vca_index), task_n2vc) - task_instantiation_info[task_n2vc] = "Deploy VCA {}.{}".format(member_vnf_index or "", vdu_id or "") - task_instantiation_list.append(task_n2vc) - - # Check if this VNFD has a configured terminate action - def _has_terminate_config_primitive(self, vnfd): - vnf_config = vnfd.get("vnf-configuration") - if vnf_config and vnf_config.get("terminate-config-primitive"): - return True - else: - return False + self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "instantiate_N2VC-{}".format(vca_index), task_n2vc) + task_instantiation_info[task_n2vc] = self.task_name_deploy_vca + " {}.{}".format( + member_vnf_index or "", vdu_id or "") @staticmethod - def _get_terminate_config_primitive_seq_list(vnfd): - """ Get a numerically sorted list of the sequences for this VNFD's terminate action """ - # No need to check for existing primitive twice, already done before - vnf_config = vnfd.get("vnf-configuration") - seq_list = vnf_config.get("terminate-config-primitive") - # Get all 'seq' tags in seq_list, order sequences numerically, ascending. - seq_list_sorted = sorted(seq_list, key=lambda x: int(x['seq'])) - return seq_list_sorted + def _get_terminate_config_primitive(primitive_list, vca_deployed): + """ Get a sorted terminate config primitive list. In case ee_descriptor_id is present at vca_deployed, + it get only those primitives for this execution envirom""" + + primitive_list = primitive_list or [] + # filter primitives by ee_descriptor_id + ee_descriptor_id = vca_deployed.get("ee_descriptor_id") + primitive_list = [p for p in primitive_list if p.get("execution-environment-ref") == ee_descriptor_id] + + if primitive_list: + primitive_list.sort(key=lambda val: int(val['seq'])) + + return primitive_list @staticmethod def _create_nslcmop(nsr_id, operation, params): @@ -1831,16 +2858,16 @@ class NsLcm(LcmBase): # sub-operations - def _reintent_or_skip_suboperation(self, db_nslcmop, op_index): - op = db_nslcmop.get('_admin', {}).get('operations', [])[op_index] - if (op.get('operationState') == 'COMPLETED'): + def _retry_or_skip_suboperation(self, db_nslcmop, op_index): + op = deep_get(db_nslcmop, ('_admin', 'operations'), [])[op_index] + if op.get('operationState') == 'COMPLETED': # b. Skip sub-operation # _ns_execute_primitive() or RO.create_action() will NOT be executed return self.SUBOPERATION_STATUS_SKIP else: - # c. Reintent executing sub-operation + # c. retry executing sub-operation # The sub-operation exists, and operationState != 'COMPLETED' - # Update operationState = 'PROCESSING' to indicate a reintent. + # Update operationState = 'PROCESSING' to indicate a retry. operationState = 'PROCESSING' detailed_status = 'In progress' self._update_suboperation_status( @@ -1853,7 +2880,7 @@ class NsLcm(LcmBase): # Find a sub-operation where all keys in a matching dictionary must match # Returns the index of the matching sub-operation, or SUBOPERATION_STATUS_NOT_FOUND if no match def _find_suboperation(self, db_nslcmop, match): - if (db_nslcmop and match): + if db_nslcmop and match: op_list = db_nslcmop.get('_admin', {}).get('operations', []) for i, op in enumerate(op_list): if all(op.get(k) == match[k] for k in match): @@ -1881,7 +2908,7 @@ class NsLcm(LcmBase): def _add_suboperation(self, db_nslcmop, vnf_index, vdu_id, vdu_count_index, vdu_name, primitive, mapped_primitive_params, operationState=None, detailed_status=None, operationType=None, RO_nsr_id=None, RO_scaling_info=None): - if not (db_nslcmop): + if not db_nslcmop: return self.SUBOPERATION_STATUS_NOT_FOUND # Get the "_admin.operations" list, if it exists db_nslcmop_admin = db_nslcmop.get('_admin', {}) @@ -1921,11 +2948,11 @@ class NsLcm(LcmBase): # Check for 3 different cases: # a. New: First time execution, return SUBOPERATION_STATUS_NEW # b. Skip: Existing sub-operation exists, operationState == 'COMPLETED', return SUBOPERATION_STATUS_SKIP - # c. Reintent: Existing sub-operation exists, operationState != 'COMPLETED', return op_index to re-execute + # c. retry: Existing sub-operation exists, operationState != 'COMPLETED', return op_index to re-execute def _check_or_add_scale_suboperation(self, db_nslcmop, vnf_index, vnf_config_primitive, primitive_params, operationType, RO_nsr_id=None, RO_scaling_info=None): # Find this sub-operation - if (RO_nsr_id and RO_scaling_info): + if RO_nsr_id and RO_scaling_info: operationType = 'SCALE-RO' match = { 'member_vnf_index': vnf_index, @@ -1940,7 +2967,7 @@ class NsLcm(LcmBase): 'lcmOperationType': operationType } op_index = self._find_suboperation(db_nslcmop, match) - if (op_index == self.SUBOPERATION_STATUS_NOT_FOUND): + if op_index == self.SUBOPERATION_STATUS_NOT_FOUND: # a. New sub-operation # The sub-operation does not exist, add it. # _ns_execute_primitive() will be called from scale() as usual, with non-modified arguments @@ -1948,7 +2975,7 @@ class NsLcm(LcmBase): vdu_id = None vdu_count_index = None vdu_name = None - if (RO_nsr_id and RO_scaling_info): + if RO_nsr_id and RO_scaling_info: vnf_config_primitive = None primitive_params = None else: @@ -1974,91 +3001,240 @@ class NsLcm(LcmBase): else: # Return either SUBOPERATION_STATUS_SKIP (operationState == 'COMPLETED'), # or op_index (operationState != 'COMPLETED') - return self._reintent_or_skip_suboperation(db_nslcmop, op_index) + return self._retry_or_skip_suboperation(db_nslcmop, op_index) # Function to return execution_environment id def _get_ee_id(self, vnf_index, vdu_id, vca_deployed_list): + # TODO vdu_index_count for vca in vca_deployed_list: if vca["member-vnf-index"] == vnf_index and vca["vdu_id"] == vdu_id: return vca["ee_id"] - # Helper methods for terminate() + async def destroy_N2VC(self, logging_text, db_nslcmop, vca_deployed, config_descriptor, + vca_index, destroy_ee=True, exec_primitives=True): + """ + Execute the terminate primitives and destroy the execution environment (if destroy_ee=False + :param logging_text: + :param db_nslcmop: + :param vca_deployed: Dictionary of deployment info at db_nsr._admin.depoloyed.VCA. + :param config_descriptor: Configuration descriptor of the NSD, VNFD, VNFD.vdu or VNFD.kdu + :param vca_index: index in the database _admin.deployed.VCA + :param destroy_ee: False to do not destroy, because it will be destroyed all of then at once + :param exec_primitives: False to do not execute terminate primitives, because the config is not completed or has + not executed properly + :return: None or exception + """ - async def _terminate_action(self, db_nslcmop, nslcmop_id, nsr_id): - """ Create a primitive with params from VNFD - Called from terminate() before deleting instance - Calls action() to execute the primitive """ - logging_text = "Task ns={} _terminate_action={} ".format(nsr_id, nslcmop_id) - db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) - vca_deployed_list = db_nsr["_admin"]["deployed"]["VCA"] - db_vnfrs_list = self.db.get_list("vnfrs", {"nsr-id-ref": nsr_id}) - db_vnfds = {} - # Loop over VNFRs - for vnfr in db_vnfrs_list: - vnfd_id = vnfr["vnfd-id"] - vnf_index = vnfr["member-vnf-index-ref"] - if vnfd_id not in db_vnfds: - step = "Getting vnfd={} id='{}' from db".format(vnfd_id, vnfd_id) - vnfd = self.db.get_one("vnfds", {"_id": vnfd_id}) - db_vnfds[vnfd_id] = vnfd - vnfd = db_vnfds[vnfd_id] - if not self._has_terminate_config_primitive(vnfd): - continue - # Get the primitive's sorted sequence list - seq_list = self._get_terminate_config_primitive_seq_list(vnfd) - for seq in seq_list: - # For each sequence in list, get primitive and call _ns_execute_primitive() - step = "Calling terminate action for vnf_member_index={} primitive={}".format( - vnf_index, seq.get("name")) - self.logger.debug(logging_text + step) - # Create the primitive for each sequence, i.e. "primitive": "touch" - primitive = seq.get('name') - mapped_primitive_params = self._get_terminate_primitive_params(seq, vnf_index) - # The following 3 parameters are currently set to None for 'terminate': - # vdu_id, vdu_count_index, vdu_name - vdu_id = db_nslcmop["operationParams"].get("vdu_id") - vdu_count_index = db_nslcmop["operationParams"].get("vdu_count_index") - vdu_name = db_nslcmop["operationParams"].get("vdu_name") - # Add sub-operation - self._add_suboperation(db_nslcmop, - nslcmop_id, - vnf_index, - vdu_id, - vdu_count_index, - vdu_name, - primitive, - mapped_primitive_params) - # Sub-operations: Call _ns_execute_primitive() instead of action() - # db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) - # nsr_deployed = db_nsr["_admin"]["deployed"] - - # nslcmop_operation_state, nslcmop_operation_state_detail = await self.action( - # nsr_id, nslcmop_terminate_action_id) - # Launch Exception if action() returns other than ['COMPLETED', 'PARTIALLY_COMPLETED'] - # result_ok = ['COMPLETED', 'PARTIALLY_COMPLETED'] - # if result not in result_ok: - # raise LcmException( - # "terminate_primitive_action for vnf_member_index={}", - # " primitive={} fails with error {}".format( - # vnf_index, seq.get("name"), result_detail)) - - ee_id = self._get_ee_id(vnf_index, vdu_id, vca_deployed_list) + self.logger.debug( + logging_text + " vca_index: {}, vca_deployed: {}, config_descriptor: {}, destroy_ee: {}".format( + vca_index, vca_deployed, config_descriptor, destroy_ee + ) + ) + + vca_type = vca_deployed.get("type", "lxc_proxy_charm") + + # execute terminate_primitives + if exec_primitives: + terminate_primitives = self._get_terminate_config_primitive( + config_descriptor.get("terminate-config-primitive"), vca_deployed) + vdu_id = vca_deployed.get("vdu_id") + vdu_count_index = vca_deployed.get("vdu_count_index") + vdu_name = vca_deployed.get("vdu_name") + vnf_index = vca_deployed.get("member-vnf-index") + if terminate_primitives and vca_deployed.get("needed_terminate"): + for seq in terminate_primitives: + # For each sequence in list, get primitive and call _ns_execute_primitive() + step = "Calling terminate action for vnf_member_index={} primitive={}".format( + vnf_index, seq.get("name")) + self.logger.debug(logging_text + step) + # Create the primitive for each sequence, i.e. "primitive": "touch" + primitive = seq.get('name') + mapped_primitive_params = self._get_terminate_primitive_params(seq, vnf_index) + + # Add sub-operation + self._add_suboperation(db_nslcmop, + vnf_index, + vdu_id, + vdu_count_index, + vdu_name, + primitive, + mapped_primitive_params) + # Sub-operations: Call _ns_execute_primitive() instead of action() + try: + result, result_detail = await self._ns_execute_primitive(vca_deployed["ee_id"], primitive, + mapped_primitive_params, + vca_type=vca_type) + except LcmException: + # this happens when VCA is not deployed. In this case it is not needed to terminate + continue + result_ok = ['COMPLETED', 'PARTIALLY_COMPLETED'] + if result not in result_ok: + raise LcmException("terminate_primitive {} for vnf_member_index={} fails with " + "error {}".format(seq.get("name"), vnf_index, result_detail)) + # set that this VCA do not need terminated + db_update_entry = "_admin.deployed.VCA.{}.needed_terminate".format(vca_index) + self.update_db_2("nsrs", db_nslcmop["nsInstanceId"], {db_update_entry: False}) + + if vca_deployed.get("prometheus_jobs") and self.prometheus: + await self.prometheus.update(remove_jobs=vca_deployed["prometheus_jobs"]) + + if destroy_ee: + await self.vca_map[vca_type].delete_execution_environment(vca_deployed["ee_id"]) + + async def _delete_all_N2VC(self, db_nsr: dict): + self._write_all_config_status(db_nsr=db_nsr, status='TERMINATING') + namespace = "." + db_nsr["_id"] + try: + await self.n2vc.delete_namespace(namespace=namespace, total_timeout=self.timeout_charm_delete) + except N2VCNotFound: # already deleted. Skip + pass + self._write_all_config_status(db_nsr=db_nsr, status='DELETED') + + async def _terminate_RO(self, logging_text, nsr_deployed, nsr_id, nslcmop_id, stage): + """ + Terminates a deployment from RO + :param logging_text: + :param nsr_deployed: db_nsr._admin.deployed + :param nsr_id: + :param nslcmop_id: + :param stage: list of string with the content to write on db_nslcmop.detailed-status. + this method will update only the index 2, but it will write on database the concatenated content of the list + :return: + """ + db_nsr_update = {} + failed_detail = [] + ro_nsr_id = ro_delete_action = None + if nsr_deployed and nsr_deployed.get("RO"): + ro_nsr_id = nsr_deployed["RO"].get("nsr_id") + ro_delete_action = nsr_deployed["RO"].get("nsr_delete_action_id") + try: + if ro_nsr_id: + stage[2] = "Deleting ns from VIM." + db_nsr_update["detailed-status"] = " ".join(stage) + self._write_op_status(nslcmop_id, stage) + self.logger.debug(logging_text + stage[2]) + self.update_db_2("nsrs", nsr_id, db_nsr_update) + self._write_op_status(nslcmop_id, stage) + desc = await self.RO.delete("ns", ro_nsr_id) + ro_delete_action = desc["action_id"] + db_nsr_update["_admin.deployed.RO.nsr_delete_action_id"] = ro_delete_action + db_nsr_update["_admin.deployed.RO.nsr_id"] = None + db_nsr_update["_admin.deployed.RO.nsr_status"] = "DELETED" + if ro_delete_action: + # wait until NS is deleted from VIM + stage[2] = "Waiting ns deleted from VIM." + detailed_status_old = None + self.logger.debug(logging_text + stage[2] + " RO_id={} ro_delete_action={}".format(ro_nsr_id, + ro_delete_action)) + self.update_db_2("nsrs", nsr_id, db_nsr_update) + self._write_op_status(nslcmop_id, stage) + + delete_timeout = 20 * 60 # 20 minutes + while delete_timeout > 0: + desc = await self.RO.show( + "ns", + item_id_name=ro_nsr_id, + extra_item="action", + extra_item_id=ro_delete_action) + + # deploymentStatus + self._on_update_ro_db(nsrs_id=nsr_id, ro_descriptor=desc) + + ns_status, ns_status_info = self.RO.check_action_status(desc) + if ns_status == "ERROR": + raise ROclient.ROClientException(ns_status_info) + elif ns_status == "BUILD": + stage[2] = "Deleting from VIM {}".format(ns_status_info) + elif ns_status == "ACTIVE": + db_nsr_update["_admin.deployed.RO.nsr_delete_action_id"] = None + db_nsr_update["_admin.deployed.RO.nsr_status"] = "DELETED" + break + else: + assert False, "ROclient.check_action_status returns unknown {}".format(ns_status) + if stage[2] != detailed_status_old: + detailed_status_old = stage[2] + db_nsr_update["detailed-status"] = " ".join(stage) + self._write_op_status(nslcmop_id, stage) + self.update_db_2("nsrs", nsr_id, db_nsr_update) + await asyncio.sleep(5, loop=self.loop) + delete_timeout -= 5 + else: # delete_timeout <= 0: + raise ROclient.ROClientException("Timeout waiting ns deleted from VIM") + + except Exception as e: + self.update_db_2("nsrs", nsr_id, db_nsr_update) + if isinstance(e, ROclient.ROClientException) and e.http_code == 404: # not found + db_nsr_update["_admin.deployed.RO.nsr_id"] = None + db_nsr_update["_admin.deployed.RO.nsr_status"] = "DELETED" + db_nsr_update["_admin.deployed.RO.nsr_delete_action_id"] = None + self.logger.debug(logging_text + "RO_ns_id={} already deleted".format(ro_nsr_id)) + elif isinstance(e, ROclient.ROClientException) and e.http_code == 409: # conflict + failed_detail.append("delete conflict: {}".format(e)) + self.logger.debug(logging_text + "RO_ns_id={} delete conflict: {}".format(ro_nsr_id, e)) + else: + failed_detail.append("delete error: {}".format(e)) + self.logger.error(logging_text + "RO_ns_id={} delete error: {}".format(ro_nsr_id, e)) + + # Delete nsd + if not failed_detail and deep_get(nsr_deployed, ("RO", "nsd_id")): + ro_nsd_id = nsr_deployed["RO"]["nsd_id"] + try: + stage[2] = "Deleting nsd from RO." + db_nsr_update["detailed-status"] = " ".join(stage) + self.update_db_2("nsrs", nsr_id, db_nsr_update) + self._write_op_status(nslcmop_id, stage) + await self.RO.delete("nsd", ro_nsd_id) + self.logger.debug(logging_text + "ro_nsd_id={} deleted".format(ro_nsd_id)) + db_nsr_update["_admin.deployed.RO.nsd_id"] = None + except Exception as e: + if isinstance(e, ROclient.ROClientException) and e.http_code == 404: # not found + db_nsr_update["_admin.deployed.RO.nsd_id"] = None + self.logger.debug(logging_text + "ro_nsd_id={} already deleted".format(ro_nsd_id)) + elif isinstance(e, ROclient.ROClientException) and e.http_code == 409: # conflict + failed_detail.append("ro_nsd_id={} delete conflict: {}".format(ro_nsd_id, e)) + self.logger.debug(logging_text + failed_detail[-1]) + else: + failed_detail.append("ro_nsd_id={} delete error: {}".format(ro_nsd_id, e)) + self.logger.error(logging_text + failed_detail[-1]) + + if not failed_detail and deep_get(nsr_deployed, ("RO", "vnfd")): + for index, vnf_deployed in enumerate(nsr_deployed["RO"]["vnfd"]): + if not vnf_deployed or not vnf_deployed["id"]: + continue try: - await self.n2vc.exec_primitive( - ee_id=ee_id, - primitive_name=primitive, - params_dict=mapped_primitive_params - ) + ro_vnfd_id = vnf_deployed["id"] + stage[2] = "Deleting member_vnf_index={} ro_vnfd_id={} from RO.".format( + vnf_deployed["member-vnf-index"], ro_vnfd_id) + db_nsr_update["detailed-status"] = " ".join(stage) + self.update_db_2("nsrs", nsr_id, db_nsr_update) + self._write_op_status(nslcmop_id, stage) + await self.RO.delete("vnfd", ro_vnfd_id) + self.logger.debug(logging_text + "ro_vnfd_id={} deleted".format(ro_vnfd_id)) + db_nsr_update["_admin.deployed.RO.vnfd.{}.id".format(index)] = None except Exception as e: - self.logger.error('Error executing primitive {}: {}'.format(primitive, e)) - raise LcmException( - "terminate_primitive_action for vnf_member_index={}, primitive={} fails with error {}" - .format(vnf_index, seq.get("name"), e), - ) + if isinstance(e, ROclient.ROClientException) and e.http_code == 404: # not found + db_nsr_update["_admin.deployed.RO.vnfd.{}.id".format(index)] = None + self.logger.debug(logging_text + "ro_vnfd_id={} already deleted ".format(ro_vnfd_id)) + elif isinstance(e, ROclient.ROClientException) and e.http_code == 409: # conflict + failed_detail.append("ro_vnfd_id={} delete conflict: {}".format(ro_vnfd_id, e)) + self.logger.debug(logging_text + failed_detail[-1]) + else: + failed_detail.append("ro_vnfd_id={} delete error: {}".format(ro_vnfd_id, e)) + self.logger.error(logging_text + failed_detail[-1]) - async def terminate(self, nsr_id, nslcmop_id): + if failed_detail: + stage[2] = "Error deleting from VIM" + else: + stage[2] = "Deleted from VIM" + db_nsr_update["detailed-status"] = " ".join(stage) + self.update_db_2("nsrs", nsr_id, db_nsr_update) + self._write_op_status(nslcmop_id, stage) + if failed_detail: + raise LcmException("; ".join(failed_detail)) + + async def terminate(self, nsr_id, nslcmop_id): # Try to lock HA task here task_is_locked_by_me = self.lcm_tasks.lock_HA('ns', 'nslcmops', nslcmop_id) if not task_is_locked_by_me: @@ -2066,265 +3242,226 @@ class NsLcm(LcmBase): logging_text = "Task ns={} terminate={} ".format(nsr_id, nslcmop_id) self.logger.debug(logging_text + "Enter") + timeout_ns_terminate = self.timeout_ns_terminate db_nsr = None db_nslcmop = None + operation_params = None exc = None - failed_detail = [] # annotates all failed error messages - db_nsr_update = {"_admin.nslcmop": nslcmop_id, - "_admin.current-operation": nslcmop_id, - "_admin.operation-type": "terminate"} - self.update_db_2("nsrs", nsr_id, db_nsr_update) + error_list = [] # annotates all failed error messages db_nslcmop_update = {} - nslcmop_operation_state = None autoremove = False # autoremove after terminated - pending_tasks = [] + tasks_dict_info = {} + db_nsr_update = {} + stage = ["Stage 1/3: Preparing task.", "Waiting for previous operations to terminate.", ""] + # ^ contains [stage, step, VIM-status] try: # wait for any previous tasks in process - step = "Waiting for previous operations to terminate" await self.lcm_tasks.waitfor_related_HA("ns", 'nslcmops', nslcmop_id) + stage[1] = "Getting nslcmop={} from db.".format(nslcmop_id) + db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id}) + operation_params = db_nslcmop.get("operationParams") or {} + if operation_params.get("timeout_ns_terminate"): + timeout_ns_terminate = operation_params["timeout_ns_terminate"] + stage[1] = "Getting nsr={} from db.".format(nsr_id) + db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) + + db_nsr_update["operational-status"] = "terminating" + db_nsr_update["config-status"] = "terminating" self._write_ns_status( nsr_id=nsr_id, ns_state="TERMINATING", current_operation="TERMINATING", - current_operation_id=nslcmop_id + current_operation_id=nslcmop_id, + other_update=db_nsr_update ) - - step = "Getting nslcmop={} from db".format(nslcmop_id) - db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id}) - step = "Getting nsr={} from db".format(nsr_id) - db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) - # nsd = db_nsr["nsd"] - nsr_deployed = deepcopy(db_nsr["_admin"].get("deployed")) + self._write_op_status( + op_id=nslcmop_id, + queuePosition=0, + stage=stage + ) + nsr_deployed = deepcopy(db_nsr["_admin"].get("deployed")) or {} if db_nsr["_admin"]["nsState"] == "NOT_INSTANTIATED": return - # #TODO check if VIM is creating and wait - # RO_vim_id = db_vim["_admin"]["deployed"]["RO"] - # Call internal terminate action - await self._terminate_action(db_nslcmop, nslcmop_id, nsr_id) - - pending_tasks = [] - - db_nsr_update["operational-status"] = "terminating" - db_nsr_update["config-status"] = "terminating" - - # remove NS - try: - step = "delete execution environment" - self.logger.debug(logging_text + step) - - task_delete_ee = asyncio.ensure_future(self.n2vc.delete_namespace(namespace="." + nsr_id)) - pending_tasks.append(task_delete_ee) - except Exception as e: - msg = "Failed while deleting NS in VCA: {}".format(e) - self.logger.error(msg) - failed_detail.append(msg) - try: - # Delete from k8scluster - step = "delete kdus" - self.logger.debug(logging_text + step) - # print(nsr_deployed) - if nsr_deployed: - for kdu in nsr_deployed.get("K8s", ()): - kdu_instance = kdu.get("kdu-instance") - if not kdu_instance: - continue - if kdu.get("k8scluster-type") == "chart": - task_delete_kdu_instance = asyncio.ensure_future( - self.k8sclusterhelm.uninstall(cluster_uuid=kdu.get("k8scluster-uuid"), - kdu_instance=kdu_instance)) - elif kdu.get("k8scluster-type") == "juju": - task_delete_kdu_instance = asyncio.ensure_future( - self.k8sclusterjuju.uninstall(cluster_uuid=kdu.get("k8scluster-uuid"), - kdu_instance=kdu_instance)) - else: - self.error(logging_text + "Unknown k8s deployment type {}". - format(kdu.get("k8scluster-type"))) - continue - pending_tasks.append(task_delete_kdu_instance) - except LcmException as e: - msg = "Failed while deleting KDUs from NS: {}".format(e) - self.logger.error(msg) - failed_detail.append(msg) - - # remove from RO - RO_fail = False - - # Delete ns - RO_nsr_id = RO_delete_action = None - if nsr_deployed and nsr_deployed.get("RO"): - RO_nsr_id = nsr_deployed["RO"].get("nsr_id") - RO_delete_action = nsr_deployed["RO"].get("nsr_delete_action_id") - try: - if RO_nsr_id: - step = db_nsr_update["detailed-status"] = db_nslcmop_update["detailed-status"] = \ - "Deleting ns from VIM" - self.update_db_2("nslcmops", nslcmop_id, db_nslcmop_update) - self.update_db_2("nsrs", nsr_id, db_nsr_update) - self.logger.debug(logging_text + step) - desc = await self.RO.delete("ns", RO_nsr_id) - RO_delete_action = desc["action_id"] - db_nsr_update["_admin.deployed.RO.nsr_delete_action_id"] = RO_delete_action - db_nsr_update["_admin.deployed.RO.nsr_id"] = None - db_nsr_update["_admin.deployed.RO.nsr_status"] = "DELETED" - if RO_delete_action: - # wait until NS is deleted from VIM - step = detailed_status = "Waiting ns deleted from VIM. RO_id={} RO_delete_action={}".\ - format(RO_nsr_id, RO_delete_action) - detailed_status_old = None - self.logger.debug(logging_text + step) - - delete_timeout = 20 * 60 # 20 minutes - while delete_timeout > 0: - desc = await self.RO.show( - "ns", - item_id_name=RO_nsr_id, - extra_item="action", - extra_item_id=RO_delete_action) - ns_status, ns_status_info = self.RO.check_action_status(desc) - if ns_status == "ERROR": - raise ROclient.ROClientException(ns_status_info) - elif ns_status == "BUILD": - detailed_status = step + "; {}".format(ns_status_info) - elif ns_status == "ACTIVE": - db_nsr_update["_admin.deployed.RO.nsr_delete_action_id"] = None - db_nsr_update["_admin.deployed.RO.nsr_status"] = "DELETED" - break - else: - assert False, "ROclient.check_action_status returns unknown {}".format(ns_status) - if detailed_status != detailed_status_old: - detailed_status_old = db_nslcmop_update["detailed-status"] = \ - db_nsr_update["detailed-status"] = detailed_status - self.update_db_2("nslcmops", nslcmop_id, db_nslcmop_update) - self.update_db_2("nsrs", nsr_id, db_nsr_update) - await asyncio.sleep(5, loop=self.loop) - delete_timeout -= 5 - else: # delete_timeout <= 0: - raise ROclient.ROClientException("Timeout waiting ns deleted from VIM") - - except ROclient.ROClientException as e: - if e.http_code == 404: # not found - db_nsr_update["_admin.deployed.RO.nsr_id"] = None - db_nsr_update["_admin.deployed.RO.nsr_status"] = "DELETED" - db_nsr_update["_admin.deployed.RO.nsr_delete_action_id"] = None - self.logger.debug(logging_text + "RO_ns_id={} already deleted".format(RO_nsr_id)) - elif e.http_code == 409: # conflict - failed_detail.append("RO_ns_id={} delete conflict: {}".format(RO_nsr_id, e)) - self.logger.debug(logging_text + failed_detail[-1]) - RO_fail = True + stage[1] = "Getting vnf descriptors from db." + db_vnfrs_list = self.db.get_list("vnfrs", {"nsr-id-ref": nsr_id}) + db_vnfds_from_id = {} + db_vnfds_from_member_index = {} + # Loop over VNFRs + for vnfr in db_vnfrs_list: + vnfd_id = vnfr["vnfd-id"] + if vnfd_id not in db_vnfds_from_id: + vnfd = self.db.get_one("vnfds", {"_id": vnfd_id}) + db_vnfds_from_id[vnfd_id] = vnfd + db_vnfds_from_member_index[vnfr["member-vnf-index-ref"]] = db_vnfds_from_id[vnfd_id] + + # Destroy individual execution environments when there are terminating primitives. + # Rest of EE will be deleted at once + # TODO - check before calling _destroy_N2VC + # if not operation_params.get("skip_terminate_primitives"):# + # or not vca.get("needed_terminate"): + stage[0] = "Stage 2/3 execute terminating primitives." + self.logger.debug(logging_text + stage[0]) + stage[1] = "Looking execution environment that needs terminate." + self.logger.debug(logging_text + stage[1]) + # self.logger.debug("nsr_deployed: {}".format(nsr_deployed)) + for vca_index, vca in enumerate(get_iterable(nsr_deployed, "VCA")): + config_descriptor = None + if not vca or not vca.get("ee_id"): + continue + if not vca.get("member-vnf-index"): + # ns + config_descriptor = db_nsr.get("ns-configuration") + elif vca.get("vdu_id"): + db_vnfd = db_vnfds_from_member_index[vca["member-vnf-index"]] + vdud = next((vdu for vdu in db_vnfd.get("vdu", ()) if vdu["id"] == vca.get("vdu_id")), None) + if vdud: + config_descriptor = vdud.get("vdu-configuration") + elif vca.get("kdu_name"): + db_vnfd = db_vnfds_from_member_index[vca["member-vnf-index"]] + kdud = next((kdu for kdu in db_vnfd.get("kdu", ()) if kdu["name"] == vca.get("kdu_name")), None) + if kdud: + config_descriptor = kdud.get("kdu-configuration") else: - failed_detail.append("RO_ns_id={} delete error: {}".format(RO_nsr_id, e)) - self.logger.error(logging_text + failed_detail[-1]) - RO_fail = True - - # Delete nsd - if not RO_fail and nsr_deployed and nsr_deployed.get("RO") and nsr_deployed["RO"].get("nsd_id"): - RO_nsd_id = nsr_deployed["RO"]["nsd_id"] - try: - step = db_nsr_update["detailed-status"] = db_nslcmop_update["detailed-status"] =\ - "Deleting nsd from RO" - await self.RO.delete("nsd", RO_nsd_id) - self.logger.debug(logging_text + "RO_nsd_id={} deleted".format(RO_nsd_id)) - db_nsr_update["_admin.deployed.RO.nsd_id"] = None - except ROclient.ROClientException as e: - if e.http_code == 404: # not found - db_nsr_update["_admin.deployed.RO.nsd_id"] = None - self.logger.debug(logging_text + "RO_nsd_id={} already deleted".format(RO_nsd_id)) - elif e.http_code == 409: # conflict - failed_detail.append("RO_nsd_id={} delete conflict: {}".format(RO_nsd_id, e)) - self.logger.debug(logging_text + failed_detail[-1]) - RO_fail = True - else: - failed_detail.append("RO_nsd_id={} delete error: {}".format(RO_nsd_id, e)) - self.logger.error(logging_text + failed_detail[-1]) - RO_fail = True - - if not RO_fail and nsr_deployed and nsr_deployed.get("RO") and nsr_deployed["RO"].get("vnfd"): - for index, vnf_deployed in enumerate(nsr_deployed["RO"]["vnfd"]): - if not vnf_deployed or not vnf_deployed["id"]: - continue - try: - RO_vnfd_id = vnf_deployed["id"] - step = db_nsr_update["detailed-status"] = db_nslcmop_update["detailed-status"] =\ - "Deleting member_vnf_index={} RO_vnfd_id={} from RO".format( - vnf_deployed["member-vnf-index"], RO_vnfd_id) - await self.RO.delete("vnfd", RO_vnfd_id) - self.logger.debug(logging_text + "RO_vnfd_id={} deleted".format(RO_vnfd_id)) - db_nsr_update["_admin.deployed.RO.vnfd.{}.id".format(index)] = None - except ROclient.ROClientException as e: - if e.http_code == 404: # not found - db_nsr_update["_admin.deployed.RO.vnfd.{}.id".format(index)] = None - self.logger.debug(logging_text + "RO_vnfd_id={} already deleted ".format(RO_vnfd_id)) - elif e.http_code == 409: # conflict - failed_detail.append("RO_vnfd_id={} delete conflict: {}".format(RO_vnfd_id, e)) - self.logger.debug(logging_text + failed_detail[-1]) - else: - failed_detail.append("RO_vnfd_id={} delete error: {}".format(RO_vnfd_id, e)) - self.logger.error(logging_text + failed_detail[-1]) + config_descriptor = db_vnfds_from_member_index[vca["member-vnf-index"]].get("vnf-configuration") + vca_type = vca.get("type") + exec_terminate_primitives = (not operation_params.get("skip_terminate_primitives") and + vca.get("needed_terminate")) + # For helm we must destroy_ee. Also for native_charm, as juju_model cannot be deleted if there are + # pending native charms + destroy_ee = True if vca_type in ("helm", "native_charm") else False + # self.logger.debug(logging_text + "vca_index: {}, ee_id: {}, vca_type: {} destroy_ee: {}".format( + # vca_index, vca.get("ee_id"), vca_type, destroy_ee)) + task = asyncio.ensure_future( + self.destroy_N2VC(logging_text, db_nslcmop, vca, config_descriptor, vca_index, + destroy_ee, exec_terminate_primitives)) + tasks_dict_info[task] = "Terminating VCA {}".format(vca.get("ee_id")) + + # wait for pending tasks of terminate primitives + if tasks_dict_info: + self.logger.debug(logging_text + 'Waiting for tasks {}'.format(list(tasks_dict_info.keys()))) + error_list = await self._wait_for_tasks(logging_text, tasks_dict_info, + min(self.timeout_charm_delete, timeout_ns_terminate), + stage, nslcmop_id) + tasks_dict_info.clear() + if error_list: + return # raise LcmException("; ".join(error_list)) + + # remove All execution environments at once + stage[0] = "Stage 3/3 delete all." + + if nsr_deployed.get("VCA"): + stage[1] = "Deleting all execution environments." + self.logger.debug(logging_text + stage[1]) + task_delete_ee = asyncio.ensure_future(asyncio.wait_for(self._delete_all_N2VC(db_nsr=db_nsr), + timeout=self.timeout_charm_delete)) + # task_delete_ee = asyncio.ensure_future(self.n2vc.delete_namespace(namespace="." + nsr_id)) + tasks_dict_info[task_delete_ee] = "Terminating all VCA" + + # Delete from k8scluster + stage[1] = "Deleting KDUs." + self.logger.debug(logging_text + stage[1]) + # print(nsr_deployed) + for kdu in get_iterable(nsr_deployed, "K8s"): + if not kdu or not kdu.get("kdu-instance"): + continue + kdu_instance = kdu.get("kdu-instance") + if kdu.get("k8scluster-type") in self.k8scluster_map: + task_delete_kdu_instance = asyncio.ensure_future( + self.k8scluster_map[kdu["k8scluster-type"]].uninstall( + cluster_uuid=kdu.get("k8scluster-uuid"), + kdu_instance=kdu_instance)) + else: + self.logger.error(logging_text + "Unknown k8s deployment type {}". + format(kdu.get("k8scluster-type"))) + continue + tasks_dict_info[task_delete_kdu_instance] = "Terminating KDU '{}'".format(kdu.get("kdu-name")) - if failed_detail: - terminate_ok = False - self.logger.error(logging_text + " ;".join(failed_detail)) - db_nsr_update["operational-status"] = "failed" - db_nsr_update["detailed-status"] = "Deletion errors " + "; ".join(failed_detail) - db_nslcmop_update["detailed-status"] = "; ".join(failed_detail) - db_nslcmop_update["operationState"] = nslcmop_operation_state = "FAILED" - db_nslcmop_update["statusEnteredTime"] = time() + # remove from RO + stage[1] = "Deleting ns from VIM." + if self.ng_ro: + task_delete_ro = asyncio.ensure_future( + self._terminate_ng_ro(logging_text, nsr_deployed, nsr_id, nslcmop_id, stage)) else: - terminate_ok = True - db_nsr_update["operational-status"] = "terminated" - db_nsr_update["detailed-status"] = "Done" - db_nsr_update["_admin.nsState"] = "NOT_INSTANTIATED" - db_nslcmop_update["detailed-status"] = "Done" - db_nslcmop_update["operationState"] = nslcmop_operation_state = "COMPLETED" - db_nslcmop_update["statusEnteredTime"] = time() - if db_nslcmop["operationParams"].get("autoremove"): - autoremove = True + task_delete_ro = asyncio.ensure_future( + self._terminate_RO(logging_text, nsr_deployed, nsr_id, nslcmop_id, stage)) + tasks_dict_info[task_delete_ro] = "Removing deployment from VIM" - except (ROclient.ROClientException, DbException, LcmException) as e: + # rest of staff will be done at finally + + except (ROclient.ROClientException, DbException, LcmException, N2VCException) as e: self.logger.error(logging_text + "Exit Exception {}".format(e)) exc = e except asyncio.CancelledError: - self.logger.error(logging_text + "Cancelled Exception while '{}'".format(step)) + self.logger.error(logging_text + "Cancelled Exception while '{}'".format(stage[1])) exc = "Operation was cancelled" except Exception as e: exc = traceback.format_exc() - self.logger.critical(logging_text + "Exit Exception {}".format(e), exc_info=True) + self.logger.critical(logging_text + "Exit Exception while '{}': {}".format(stage[1], e), exc_info=True) finally: - if exc and db_nslcmop: - db_nslcmop_update["detailed-status"] = "FAILED {}: {}".format(step, exc) - db_nslcmop_update["operationState"] = nslcmop_operation_state = "FAILED" - db_nslcmop_update["statusEnteredTime"] = time() + if exc: + error_list.append(str(exc)) try: - if db_nslcmop and db_nslcmop_update: - self.update_db_2("nslcmops", nslcmop_id, db_nslcmop_update) - if db_nsr: - db_nsr_update["_admin.nslcmop"] = None - db_nsr_update["_admin.current-operation"] = None - db_nsr_update["_admin.operation-type"] = None - self.update_db_2("nsrs", nsr_id, db_nsr_update) - - if terminate_ok: - ns_state = "IDLE" - error_description = None - error_detail = None - else: - ns_state = "BROKEN" - error_description = 'Operation: TERMINATING.{}, step: {}'.format(nslcmop_id, step) - error_detail = "; ".join(failed_detail) + # wait for pending tasks + if tasks_dict_info: + stage[1] = "Waiting for terminate pending tasks." + self.logger.debug(logging_text + stage[1]) + error_list += await self._wait_for_tasks(logging_text, tasks_dict_info, timeout_ns_terminate, + stage, nslcmop_id) + stage[1] = stage[2] = "" + except asyncio.CancelledError: + error_list.append("Cancelled") + # TODO cancell all tasks + except Exception as exc: + error_list.append(str(exc)) + # update status at database + if error_list: + error_detail = "; ".join(error_list) + # self.logger.error(logging_text + error_detail) + error_description_nslcmop = '{} Detail: {}'.format(stage[0], error_detail) + error_description_nsr = 'Operation: TERMINATING.{}, {}.'.format(nslcmop_id, stage[0]) - self._write_ns_status( - nsr_id=nsr_id, - ns_state=ns_state, - current_operation="IDLE", - current_operation_id=None, - error_description=error_description, - error_detail=error_detail - ) + db_nsr_update["operational-status"] = "failed" + db_nsr_update["detailed-status"] = error_description_nsr + " Detail: " + error_detail + db_nslcmop_update["detailed-status"] = error_detail + nslcmop_operation_state = "FAILED" + ns_state = "BROKEN" + else: + error_detail = None + error_description_nsr = error_description_nslcmop = None + ns_state = "NOT_INSTANTIATED" + db_nsr_update["operational-status"] = "terminated" + db_nsr_update["detailed-status"] = "Done" + db_nsr_update["_admin.nsState"] = "NOT_INSTANTIATED" + db_nslcmop_update["detailed-status"] = "Done" + nslcmop_operation_state = "COMPLETED" - except DbException as e: - self.logger.error(logging_text + "Cannot update database: {}".format(e)) + if db_nsr: + self._write_ns_status( + nsr_id=nsr_id, + ns_state=ns_state, + current_operation="IDLE", + current_operation_id=None, + error_description=error_description_nsr, + error_detail=error_detail, + other_update=db_nsr_update + ) + self._write_op_status( + op_id=nslcmop_id, + stage="", + error_message=error_description_nslcmop, + operation_state=nslcmop_operation_state, + other_update=db_nslcmop_update, + ) + if ns_state == "NOT_INSTANTIATED": + try: + self.db.set_list("vnfrs", {"nsr-id-ref": nsr_id}, {"_admin.nsState": "NOT_INSTANTIATED"}) + except DbException as e: + self.logger.warn(logging_text + 'Error writing VNFR status for nsr-id-ref: {} -> {}'. + format(nsr_id, e)) + if operation_params: + autoremove = operation_params.get("autoremove", False) if nslcmop_operation_state: try: await self.msg.aiowrite("ns", "terminated", {"nsr_id": nsr_id, "nslcmop_id": nslcmop_id, @@ -2334,22 +3471,59 @@ class NsLcm(LcmBase): except Exception as e: self.logger.error(logging_text + "kafka_write notification Exception {}".format(e)) - # wait for pending tasks - done = None - pending = None - if pending_tasks: - self.logger.debug(logging_text + 'Waiting for terminate pending tasks...') - done, pending = await asyncio.wait(pending_tasks, timeout=3600) - if not pending: - self.logger.debug(logging_text + 'All tasks finished...') - else: - self.logger.info(logging_text + 'There are pending tasks: {}'.format(pending)) - self.logger.debug(logging_text + "Exit") self.lcm_tasks.remove("ns", nsr_id, nslcmop_id, "ns_terminate") - @staticmethod - def _map_primitive_params(primitive_desc, params, instantiation_params): + async def _wait_for_tasks(self, logging_text, created_tasks_info, timeout, stage, nslcmop_id, nsr_id=None): + time_start = time() + error_detail_list = [] + error_list = [] + pending_tasks = list(created_tasks_info.keys()) + num_tasks = len(pending_tasks) + num_done = 0 + stage[1] = "{}/{}.".format(num_done, num_tasks) + self._write_op_status(nslcmop_id, stage) + while pending_tasks: + new_error = None + _timeout = timeout + time_start - time() + done, pending_tasks = await asyncio.wait(pending_tasks, timeout=_timeout, + return_when=asyncio.FIRST_COMPLETED) + num_done += len(done) + if not done: # Timeout + for task in pending_tasks: + new_error = created_tasks_info[task] + ": Timeout" + error_detail_list.append(new_error) + error_list.append(new_error) + break + for task in done: + if task.cancelled(): + exc = "Cancelled" + else: + exc = task.exception() + if exc: + if isinstance(exc, asyncio.TimeoutError): + exc = "Timeout" + new_error = created_tasks_info[task] + ": {}".format(exc) + error_list.append(created_tasks_info[task]) + error_detail_list.append(new_error) + if isinstance(exc, (str, DbException, N2VCException, ROclient.ROClientException, LcmException, + K8sException)): + self.logger.error(logging_text + new_error) + else: + exc_traceback = "".join(traceback.format_exception(None, exc, exc.__traceback__)) + self.logger.error(logging_text + created_tasks_info[task] + exc_traceback) + else: + self.logger.debug(logging_text + created_tasks_info[task] + ": Done") + stage[1] = "{}/{}.".format(num_done, num_tasks) + if new_error: + stage[1] += " Errors: " + ". ".join(error_detail_list) + "." + if nsr_id: # update also nsr + self.update_db_2("nsrs", nsr_id, {"errorDescription": "Error at: " + ", ".join(error_list), + "errorDetail": ". ".join(error_detail_list)}) + self._write_op_status(nslcmop_id, stage) + return error_detail_list + + def _map_primitive_params(self, primitive_desc, params, instantiation_params): """ Generates the params to be provided to charm before executing primitive. If user does not provide a parameter, The default-value is used. If it is between < > it look for a value at instantiation_params @@ -2384,65 +3558,93 @@ class NsLcm(LcmBase): width=256) elif isinstance(calculated_params[param_name], str) and calculated_params[param_name].startswith("!!yaml "): calculated_params[param_name] = calculated_params[param_name][7:] + if parameter.get("data-type") == "INTEGER": + try: + calculated_params[param_name] = int(calculated_params[param_name]) + except ValueError: # error converting string to int + raise LcmException( + "Parameter {} of primitive {} must be integer".format(param_name, primitive_desc["name"])) + elif parameter.get("data-type") == "BOOLEAN": + calculated_params[param_name] = not ((str(calculated_params[param_name])).lower() == 'false') # add always ns_config_info if primitive name is config if primitive_desc["name"] == "config": if "ns_config_info" in instantiation_params: calculated_params["ns_config_info"] = instantiation_params["ns_config_info"] + calculated_params["VCA"] = self.vca_config return calculated_params - async def _ns_execute_primitive(self, db_deployed, member_vnf_index, vdu_id, vdu_name, vdu_count_index, - primitive, primitive_params, retries=0, retries_interval=30) -> (str, str): - - # find vca_deployed record for this action + def _look_for_deployed_vca(self, deployed_vca, member_vnf_index, vdu_id, vdu_count_index, kdu_name=None, + ee_descriptor_id=None): + # find vca_deployed record for this action. Raise LcmException if not found or there is not any id. + for vca in deployed_vca: + if not vca: + continue + if member_vnf_index != vca["member-vnf-index"] or vdu_id != vca["vdu_id"]: + continue + if vdu_count_index is not None and vdu_count_index != vca["vdu_count_index"]: + continue + if kdu_name and kdu_name != vca["kdu_name"]: + continue + if ee_descriptor_id and ee_descriptor_id != vca["ee_descriptor_id"]: + continue + break + else: + # vca_deployed not found + raise LcmException("charm for member_vnf_index={} vdu_id={}.{} kdu_name={} execution-environment-list.id={}" + " is not deployed".format(member_vnf_index, vdu_id, vdu_count_index, kdu_name, + ee_descriptor_id)) + + # get ee_id + ee_id = vca.get("ee_id") + vca_type = vca.get("type", "lxc_proxy_charm") # default value for backward compatibility - proxy charm + if not ee_id: + raise LcmException("charm for member_vnf_index={} vdu_id={} kdu_name={} vdu_count_index={} has not " + "execution environment" + .format(member_vnf_index, vdu_id, kdu_name, vdu_count_index)) + return ee_id, vca_type + + async def _ns_execute_primitive(self, ee_id, primitive, primitive_params, retries=0, + retries_interval=30, timeout=None, + vca_type=None, db_dict=None) -> (str, str): try: - for vca_deployed in db_deployed["VCA"]: - if not vca_deployed: - continue - if member_vnf_index != vca_deployed["member-vnf-index"] or vdu_id != vca_deployed["vdu_id"]: - continue - if vdu_name and vdu_name != vca_deployed["vdu_name"]: - continue - if vdu_count_index and vdu_count_index != vca_deployed["vdu_count_index"]: - continue - break - else: - # vca_deployed not found - raise LcmException("charm for member_vnf_index={} vdu_id={} vdu_name={} vdu_count_index={} is not " - "deployed".format(member_vnf_index, vdu_id, vdu_name, vdu_count_index)) - - # get ee_id - ee_id = vca_deployed.get("ee_id") - if not ee_id: - raise LcmException("charm for member_vnf_index={} vdu_id={} vdu_name={} vdu_count_index={} has not " - "execution environment" - .format(member_vnf_index, vdu_id, vdu_name, vdu_count_index)) - if primitive == "config": primitive_params = {"params": primitive_params} + vca_type = vca_type or "lxc_proxy_charm" + while retries >= 0: try: - output = await self.n2vc.exec_primitive( - ee_id=ee_id, - primitive_name=primitive, - params_dict=primitive_params - ) + output = await asyncio.wait_for( + self.vca_map[vca_type].exec_primitive( + ee_id=ee_id, + primitive_name=primitive, + params_dict=primitive_params, + progress_timeout=self.timeout_progress_primitive, + total_timeout=self.timeout_primitive, + db_dict=db_dict), + timeout=timeout or self.timeout_primitive) # execution was OK break - except Exception as e: + except asyncio.CancelledError: + raise + except Exception as e: # asyncio.TimeoutError + if isinstance(e, asyncio.TimeoutError): + e = "Timeout" retries -= 1 if retries >= 0: self.logger.debug('Error executing action {} on {} -> {}'.format(primitive, ee_id, e)) # wait and retry await asyncio.sleep(retries_interval, loop=self.loop) else: - return 'Cannot execute action {} on {}: {}'.format(primitive, ee_id, e), 'FAIL' + return 'FAILED', str(e) - return output, 'OK' + return 'COMPLETED', output + except (LcmException, asyncio.CancelledError): + raise except Exception as e: - return 'Error executing action {}: {}'.format(primitive, e), 'FAIL' + return 'FAIL', 'Error executing action {}: {}'.format(primitive, e) async def action(self, nsr_id, nslcmop_id): @@ -2456,13 +3658,10 @@ class NsLcm(LcmBase): # get all needed from database db_nsr = None db_nslcmop = None - db_nsr_update = {"_admin.nslcmop": nslcmop_id, - "_admin.current-operation": nslcmop_id, - "_admin.operation-type": "action"} - self.update_db_2("nsrs", nsr_id, db_nsr_update) + db_nsr_update = {} db_nslcmop_update = {} nslcmop_operation_state = None - nslcmop_operation_state_detail = None + error_description_nslcmop = None exc = None try: # wait for any previous tasks in process @@ -2485,7 +3684,9 @@ class NsLcm(LcmBase): vdu_id = db_nslcmop["operationParams"].get("vdu_id") kdu_name = db_nslcmop["operationParams"].get("kdu_name") vdu_count_index = db_nslcmop["operationParams"].get("vdu_count_index") - vdu_name = db_nslcmop["operationParams"].get("vdu_name") + primitive = db_nslcmop["operationParams"]["primitive"] + primitive_params = db_nslcmop["operationParams"]["primitive_params"] + timeout_ns_action = db_nslcmop["operationParams"].get("timeout_ns_action", self.timeout_primitive) if vnf_index: step = "Getting vnfr from database" @@ -2493,11 +3694,8 @@ class NsLcm(LcmBase): step = "Getting vnfd from database" db_vnfd = self.db.get_one("vnfds", {"_id": db_vnfr["vnfd-id"]}) else: - if db_nsr.get("nsd"): - db_nsd = db_nsr.get("nsd") # TODO this will be removed - else: - step = "Getting nsd from database" - db_nsd = self.db.get_one("nsds", {"_id": db_nsr["nsd-id"]}) + step = "Getting nsd from database" + db_nsd = self.db.get_one("nsds", {"_id": db_nsr["nsd-id"]}) # for backward compatibility if nsr_deployed and isinstance(nsr_deployed.get("VCA"), dict): @@ -2505,178 +3703,184 @@ class NsLcm(LcmBase): db_nsr_update["_admin.deployed.VCA"] = nsr_deployed["VCA"] self.update_db_2("nsrs", nsr_id, db_nsr_update) - primitive = db_nslcmop["operationParams"]["primitive"] - primitive_params = db_nslcmop["operationParams"]["primitive_params"] - # look for primitive - config_primitive_desc = None + config_primitive_desc = descriptor_configuration = None if vdu_id: for vdu in get_iterable(db_vnfd, "vdu"): if vdu_id == vdu["id"]: - for config_primitive in vdu.get("vdu-configuration", {}).get("config-primitive", ()): - if config_primitive["name"] == primitive: - config_primitive_desc = config_primitive - break + descriptor_configuration = vdu.get("vdu-configuration") + break elif kdu_name: - self.logger.debug(logging_text + "Checking actions in KDUs") - kdur = next((x for x in db_vnfr["kdur"] if x["kdu-name"] == kdu_name), None) - desc_params = self._format_additional_params(kdur.get("additionalParams")) or {} - if primitive_params: - desc_params.update(primitive_params) - # TODO Check if we will need something at vnf level - index = 0 - for kdu in get_iterable(nsr_deployed, "K8s"): - if kdu_name == kdu["kdu-name"]: - db_dict = {"collection": "nsrs", "filter": {"_id": nsr_id}, - "path": "_admin.deployed.K8s.{}".format(index)} - if primitive == "upgrade": - if desc_params.get("kdu_model"): - kdu_model = desc_params.get("kdu_model") - del desc_params["kdu_model"] - else: - kdu_model = kdu.get("kdu-model") - parts = kdu_model.split(sep=":") - if len(parts) == 2: - kdu_model = parts[0] - - if kdu.get("k8scluster-type") == "chart": - output = await self.k8sclusterhelm.upgrade(cluster_uuid=kdu.get("k8scluster-uuid"), - kdu_instance=kdu.get("kdu-instance"), - atomic=True, kdu_model=kdu_model, - params=desc_params, db_dict=db_dict, - timeout=300) - elif kdu.get("k8scluster-type") == "juju": - output = await self.k8sclusterjuju.upgrade(cluster_uuid=kdu.get("k8scluster-uuid"), - kdu_instance=kdu.get("kdu-instance"), - atomic=True, kdu_model=kdu_model, - params=desc_params, db_dict=db_dict, - timeout=300) - - else: - msg = "k8scluster-type not defined" - raise LcmException(msg) - - self.logger.debug(logging_text + " Upgrade of kdu {} done".format(output)) - break - elif primitive == "rollback": - if kdu.get("k8scluster-type") == "chart": - output = await self.k8sclusterhelm.rollback(cluster_uuid=kdu.get("k8scluster-uuid"), - kdu_instance=kdu.get("kdu-instance"), - db_dict=db_dict) - elif kdu.get("k8scluster-type") == "juju": - output = await self.k8sclusterjuju.rollback(cluster_uuid=kdu.get("k8scluster-uuid"), - kdu_instance=kdu.get("kdu-instance"), - db_dict=db_dict) - else: - msg = "k8scluster-type not defined" - raise LcmException(msg) - break - elif primitive == "status": - if kdu.get("k8scluster-type") == "chart": - output = await self.k8sclusterhelm.status_kdu(cluster_uuid=kdu.get("k8scluster-uuid"), - kdu_instance=kdu.get("kdu-instance")) - elif kdu.get("k8scluster-type") == "juju": - output = await self.k8sclusterjuju.status_kdu(cluster_uuid=kdu.get("k8scluster-uuid"), - kdu_instance=kdu.get("kdu-instance")) - else: - msg = "k8scluster-type not defined" - raise LcmException(msg) - break - index += 1 - - else: - raise LcmException("KDU '{}' not found".format(kdu_name)) - if output: - db_nslcmop_update["detailed-status"] = output - db_nslcmop_update["operationState"] = 'COMPLETED' - db_nslcmop_update["statusEnteredTime"] = time() - else: - db_nslcmop_update["detailed-status"] = '' - db_nslcmop_update["operationState"] = 'FAILED' - db_nslcmop_update["statusEnteredTime"] = time() - return - elif vnf_index: - for config_primitive in db_vnfd.get("vnf-configuration", {}).get("config-primitive", ()): - if config_primitive["name"] == primitive: - config_primitive_desc = config_primitive + for kdu in get_iterable(db_vnfd, "kdu"): + if kdu_name == kdu["name"]: + descriptor_configuration = kdu.get("kdu-configuration") break + elif vnf_index: + descriptor_configuration = db_vnfd.get("vnf-configuration") else: - for config_primitive in db_nsd.get("ns-configuration", {}).get("config-primitive", ()): + descriptor_configuration = db_nsd.get("ns-configuration") + + if descriptor_configuration and descriptor_configuration.get("config-primitive"): + for config_primitive in descriptor_configuration["config-primitive"]: if config_primitive["name"] == primitive: config_primitive_desc = config_primitive break if not config_primitive_desc: - raise LcmException("Primitive {} not found at [ns|vnf|vdu]-configuration:config-primitive ". - format(primitive)) + if not (kdu_name and primitive in ("upgrade", "rollback", "status")): + raise LcmException("Primitive {} not found at [ns|vnf|vdu]-configuration:config-primitive ". + format(primitive)) + primitive_name = primitive + ee_descriptor_id = None + else: + primitive_name = config_primitive_desc.get("execution-environment-primitive", primitive) + ee_descriptor_id = config_primitive_desc.get("execution-environment-ref") - desc_params = {} if vnf_index: - if db_vnfr.get("additionalParamsForVnf"): - desc_params = self._format_additional_params(db_vnfr["additionalParamsForVnf"]) if vdu_id: vdur = next((x for x in db_vnfr["vdur"] if x["vdu-id-ref"] == vdu_id), None) - if vdur.get("additionalParams"): - desc_params = self._format_additional_params(vdur["additionalParams"]) + desc_params = self._format_additional_params(vdur.get("additionalParams")) + elif kdu_name: + kdur = next((x for x in db_vnfr["kdur"] if x["kdu-name"] == kdu_name), None) + desc_params = self._format_additional_params(kdur.get("additionalParams")) + else: + desc_params = self._format_additional_params(db_vnfr.get("additionalParamsForVnf")) else: - if db_nsr.get("additionalParamsForNs"): - desc_params.update(self._format_additional_params(db_nsr["additionalParamsForNs"])) + desc_params = self._format_additional_params(db_nsr.get("additionalParamsForNs")) + + if kdu_name: + kdu_action = True if not deep_get(kdu, ("kdu-configuration", "juju")) else False # TODO check if ns is in a proper status - output, detail = await self._ns_execute_primitive( - db_deployed=nsr_deployed, - member_vnf_index=vnf_index, - vdu_id=vdu_id, - vdu_name=vdu_name, - vdu_count_index=vdu_count_index, - primitive=primitive, - primitive_params=self._map_primitive_params(config_primitive_desc, primitive_params, desc_params)) - - detailed_status = output - if detail == 'OK': - result = 'COMPLETED' + if kdu_name and (primitive_name in ("upgrade", "rollback", "status") or kdu_action): + # kdur and desc_params already set from before + if primitive_params: + desc_params.update(primitive_params) + # TODO Check if we will need something at vnf level + for index, kdu in enumerate(get_iterable(nsr_deployed, "K8s")): + if kdu_name == kdu["kdu-name"] and kdu["member-vnf-index"] == vnf_index: + break + else: + raise LcmException("KDU '{}' for vnf '{}' not deployed".format(kdu_name, vnf_index)) + + if kdu.get("k8scluster-type") not in self.k8scluster_map: + msg = "unknown k8scluster-type '{}'".format(kdu.get("k8scluster-type")) + raise LcmException(msg) + + db_dict = {"collection": "nsrs", + "filter": {"_id": nsr_id}, + "path": "_admin.deployed.K8s.{}".format(index)} + self.logger.debug(logging_text + "Exec k8s {} on {}.{}".format(primitive_name, vnf_index, kdu_name)) + step = "Executing kdu {}".format(primitive_name) + if primitive_name == "upgrade": + if desc_params.get("kdu_model"): + kdu_model = desc_params.get("kdu_model") + del desc_params["kdu_model"] + else: + kdu_model = kdu.get("kdu-model") + parts = kdu_model.split(sep=":") + if len(parts) == 2: + kdu_model = parts[0] + + detailed_status = await asyncio.wait_for( + self.k8scluster_map[kdu["k8scluster-type"]].upgrade( + cluster_uuid=kdu.get("k8scluster-uuid"), + kdu_instance=kdu.get("kdu-instance"), + atomic=True, kdu_model=kdu_model, + params=desc_params, db_dict=db_dict, + timeout=timeout_ns_action), + timeout=timeout_ns_action + 10) + self.logger.debug(logging_text + " Upgrade of kdu {} done".format(detailed_status)) + elif primitive_name == "rollback": + detailed_status = await asyncio.wait_for( + self.k8scluster_map[kdu["k8scluster-type"]].rollback( + cluster_uuid=kdu.get("k8scluster-uuid"), + kdu_instance=kdu.get("kdu-instance"), + db_dict=db_dict), + timeout=timeout_ns_action) + elif primitive_name == "status": + detailed_status = await asyncio.wait_for( + self.k8scluster_map[kdu["k8scluster-type"]].status_kdu( + cluster_uuid=kdu.get("k8scluster-uuid"), + kdu_instance=kdu.get("kdu-instance")), + timeout=timeout_ns_action) + else: + kdu_instance = kdu.get("kdu-instance") or "{}-{}".format(kdu["kdu-name"], nsr_id) + params = self._map_primitive_params(config_primitive_desc, primitive_params, desc_params) + + detailed_status = await asyncio.wait_for( + self.k8scluster_map[kdu["k8scluster-type"]].exec_primitive( + cluster_uuid=kdu.get("k8scluster-uuid"), + kdu_instance=kdu_instance, + primitive_name=primitive_name, + params=params, db_dict=db_dict, + timeout=timeout_ns_action), + timeout=timeout_ns_action) + + if detailed_status: + nslcmop_operation_state = 'COMPLETED' + else: + detailed_status = '' + nslcmop_operation_state = 'FAILED' else: - result = 'FAILED' - - db_nslcmop_update["detailed-status"] = nslcmop_operation_state_detail = detailed_status - db_nslcmop_update["operationState"] = nslcmop_operation_state = result - db_nslcmop_update["statusEnteredTime"] = time() - self.logger.debug(logging_text + " task Done with result {} {}".format(result, detailed_status)) + ee_id, vca_type = self._look_for_deployed_vca(nsr_deployed["VCA"], + member_vnf_index=vnf_index, + vdu_id=vdu_id, + vdu_count_index=vdu_count_index, + ee_descriptor_id=ee_descriptor_id) + db_nslcmop_notif = {"collection": "nslcmops", + "filter": {"_id": nslcmop_id}, + "path": "admin.VCA"} + nslcmop_operation_state, detailed_status = await self._ns_execute_primitive( + ee_id, + primitive=primitive_name, + primitive_params=self._map_primitive_params(config_primitive_desc, primitive_params, desc_params), + timeout=timeout_ns_action, + vca_type=vca_type, + db_dict=db_nslcmop_notif) + + db_nslcmop_update["detailed-status"] = detailed_status + error_description_nslcmop = detailed_status if nslcmop_operation_state == "FAILED" else "" + self.logger.debug(logging_text + " task Done with result {} {}".format(nslcmop_operation_state, + detailed_status)) return # database update is called inside finally - except (DbException, LcmException) as e: + except (DbException, LcmException, N2VCException, K8sException) as e: self.logger.error(logging_text + "Exit Exception {}".format(e)) exc = e except asyncio.CancelledError: self.logger.error(logging_text + "Cancelled Exception while '{}'".format(step)) exc = "Operation was cancelled" + except asyncio.TimeoutError: + self.logger.error(logging_text + "Timeout while '{}'".format(step)) + exc = "Timeout" except Exception as e: exc = traceback.format_exc() self.logger.critical(logging_text + "Exit Exception {} {}".format(type(e).__name__, e), exc_info=True) finally: - if exc and db_nslcmop: - db_nslcmop_update["detailed-status"] = nslcmop_operation_state_detail = \ + if exc: + db_nslcmop_update["detailed-status"] = detailed_status = error_description_nslcmop = \ "FAILED {}: {}".format(step, exc) - db_nslcmop_update["operationState"] = nslcmop_operation_state = "FAILED" - db_nslcmop_update["statusEnteredTime"] = time() - try: - if db_nslcmop_update: - self.update_db_2("nslcmops", nslcmop_id, db_nslcmop_update) - if db_nsr: - db_nsr_update["_admin.nslcmop"] = None - db_nsr_update["_admin.operation-type"] = None - db_nsr_update["_admin.nslcmop"] = None - db_nsr_update["_admin.current-operation"] = None - self.update_db_2("nsrs", nsr_id, db_nsr_update) - self._write_ns_status( - nsr_id=nsr_id, - ns_state=None, - current_operation="IDLE", - current_operation_id=None - ) - except DbException as e: - self.logger.error(logging_text + "Cannot update database: {}".format(e)) - self.logger.debug(logging_text + "Exit") + nslcmop_operation_state = "FAILED" + if db_nsr: + self._write_ns_status( + nsr_id=nsr_id, + ns_state=db_nsr["nsState"], # TODO check if degraded. For the moment use previous status + current_operation="IDLE", + current_operation_id=None, + # error_description=error_description_nsr, + # error_detail=error_detail, + other_update=db_nsr_update + ) + + self._write_op_status( + op_id=nslcmop_id, + stage="", + error_message=error_description_nslcmop, + operation_state=nslcmop_operation_state, + other_update=db_nslcmop_update, + ) + if nslcmop_operation_state: try: await self.msg.aiowrite("ns", "actioned", {"nsr_id": nsr_id, "nslcmop_id": nslcmop_id, @@ -2686,7 +3890,7 @@ class NsLcm(LcmBase): self.logger.error(logging_text + "kafka_write notification Exception {}".format(e)) self.logger.debug(logging_text + "Exit") self.lcm_tasks.remove("ns", nsr_id, nslcmop_id, "ns_action") - return nslcmop_operation_state, nslcmop_operation_state_detail + return nslcmop_operation_state, detailed_status async def scale(self, nsr_id, nslcmop_id): @@ -2702,16 +3906,12 @@ class NsLcm(LcmBase): db_nslcmop = None db_nslcmop_update = {} nslcmop_operation_state = None - db_nsr_update = {"_admin.nslcmop": nslcmop_id, - "_admin.current-operation": nslcmop_id, - "_admin.operation-type": "scale"} - self.update_db_2("nsrs", nsr_id, db_nsr_update) + db_nsr_update = {} exc = None # in case of error, indicates what part of scale was failed to put nsr at error status scale_process = None old_operational_status = "" old_config_status = "" - vnfr_scaled = False try: # wait for any previous tasks in process step = "Waiting for previous operations to terminate" @@ -2806,8 +4006,25 @@ class NsLcm(LcmBase): vdu_scaling_info["scaling_direction"] = "OUT" vdu_scaling_info["vdu-create"] = {} for vdu_scale_info in scaling_descriptor["vdu"]: + vdud = next(vdu for vdu in db_vnfd.get("vdu") if vdu["id"] == vdu_scale_info["vdu-id-ref"]) + vdu_index = len([x for x in db_vnfr.get("vdur", ()) + if x.get("vdu-id-ref") == vdu_scale_info["vdu-id-ref"] and + x.get("member-vnf-index-ref") == vnf_index]) + cloud_init_text = self._get_cloud_init(vdud, db_vnfd) + if cloud_init_text: + additional_params = self._get_vdu_additional_params(db_vnfr, vdud["id"]) or {} + cloud_init_list = [] + for x in range(vdu_scale_info.get("count", 1)): + if cloud_init_text: + # TODO Information of its own ip is not available because db_vnfr is not updated. + additional_params["OSM"] = self._get_osm_params(db_vnfr, vdu_scale_info["vdu-id-ref"], + vdu_index + x) + cloud_init_list.append(self._parse_cloud_init(cloud_init_text, additional_params, + db_vnfd["id"], vdud["id"])) RO_scaling_info.append({"osm_vdu_id": vdu_scale_info["vdu-id-ref"], "member-vnf-index": vnf_index, "type": "create", "count": vdu_scale_info.get("count", 1)}) + if cloud_init_list: + RO_scaling_info[-1]["cloud_init"] = cloud_init_list vdu_scaling_info["vdu-create"][vdu_scale_info["vdu-id-ref"]] = vdu_scale_info.get("count", 1) elif scaling_type == "SCALE_IN": @@ -2864,7 +4081,7 @@ class NsLcm(LcmBase): raise LcmException( "Invalid vnfd descriptor at scaling-group-descriptor[name='{}']:scaling-config-action" "[vnf-config-primitive-name-ref='{}'] does not match any vnf-configuration:config-" - "primitive".format(scaling_group, config_primitive)) + "primitive".format(scaling_group, vnf_config_primitive)) vnfr_params = {"VDU_SCALE_INFO": vdu_scaling_info} if db_vnfr.get("additionalParamsForVnf"): @@ -2874,10 +4091,10 @@ class NsLcm(LcmBase): db_nsr_update["config-status"] = "configuring pre-scaling" primitive_params = self._map_primitive_params(config_primitive, {}, vnfr_params) - # Pre-scale reintent check: Check if this sub-operation has been executed before + # Pre-scale retry check: Check if this sub-operation has been executed before op_index = self._check_or_add_scale_suboperation( db_nslcmop, nslcmop_id, vnf_index, vnf_config_primitive, primitive_params, 'PRE-SCALE') - if (op_index == self.SUBOPERATION_STATUS_SKIP): + if op_index == self.SUBOPERATION_STATUS_SKIP: # Skip sub-operation result = 'COMPLETED' result_detail = 'Done' @@ -2885,22 +4102,30 @@ class NsLcm(LcmBase): "vnf_config_primitive={} Skipped sub-operation, result {} {}".format( vnf_config_primitive, result, result_detail)) else: - if (op_index == self.SUBOPERATION_STATUS_NEW): + if op_index == self.SUBOPERATION_STATUS_NEW: # New sub-operation: Get index of this sub-operation op_index = len(db_nslcmop.get('_admin', {}).get('operations')) - 1 self.logger.debug(logging_text + "vnf_config_primitive={} New sub-operation". format(vnf_config_primitive)) else: - # Reintent: Get registered params for this existing sub-operation + # retry: Get registered params for this existing sub-operation op = db_nslcmop.get('_admin', {}).get('operations', [])[op_index] vnf_index = op.get('member_vnf_index') vnf_config_primitive = op.get('primitive') primitive_params = op.get('primitive_params') - self.logger.debug(logging_text + "vnf_config_primitive={} Sub-operation reintent". + self.logger.debug(logging_text + "vnf_config_primitive={} Sub-operation retry". format(vnf_config_primitive)) # Execute the primitive, either with new (first-time) or registered (reintent) args + ee_descriptor_id = config_primitive.get("execution-environment-ref") + primitive_name = config_primitive.get("execution-environment-primitive", + vnf_config_primitive) + ee_id, vca_type = self._look_for_deployed_vca(nsr_deployed["VCA"], + member_vnf_index=vnf_index, + vdu_id=None, + vdu_count_index=None, + ee_descriptor_id=ee_descriptor_id) result, result_detail = await self._ns_execute_primitive( - nsr_deployed, vnf_index, None, None, None, vnf_config_primitive, primitive_params) + ee_id, primitive_name, primitive_params, vca_type) self.logger.debug(logging_text + "vnf_config_primitive={} Done with result {} {}".format( vnf_config_primitive, result, result_detail)) # Update operationState = COMPLETED | FAILED @@ -2918,26 +4143,26 @@ class NsLcm(LcmBase): # if (RO_nsr_id and RO_scaling_info): if RO_scaling_info: scale_process = "RO" - # Scale RO reintent check: Check if this sub-operation has been executed before + # Scale RO retry check: Check if this sub-operation has been executed before op_index = self._check_or_add_scale_suboperation( db_nslcmop, vnf_index, None, None, 'SCALE-RO', RO_nsr_id, RO_scaling_info) - if (op_index == self.SUBOPERATION_STATUS_SKIP): + if op_index == self.SUBOPERATION_STATUS_SKIP: # Skip sub-operation result = 'COMPLETED' result_detail = 'Done' self.logger.debug(logging_text + "Skipped sub-operation RO, result {} {}".format( result, result_detail)) else: - if (op_index == self.SUBOPERATION_STATUS_NEW): + if op_index == self.SUBOPERATION_STATUS_NEW: # New sub-operation: Get index of this sub-operation op_index = len(db_nslcmop.get('_admin', {}).get('operations')) - 1 self.logger.debug(logging_text + "New sub-operation RO") else: - # Reintent: Get registered params for this existing sub-operation + # retry: Get registered params for this existing sub-operation op = db_nslcmop.get('_admin', {}).get('operations', [])[op_index] RO_nsr_id = op.get('RO_nsr_id') RO_scaling_info = op.get('RO_scaling_info') - self.logger.debug(logging_text + "Sub-operation RO reintent".format( + self.logger.debug(logging_text + "Sub-operation RO retry for primitive {}".format( vnf_config_primitive)) RO_desc = await self.RO.create_action("ns", RO_nsr_id, {"vdu-scaling": RO_scaling_info}) @@ -2948,7 +4173,7 @@ class NsLcm(LcmBase): db_nslcmop_update["_admin.deploy.RO"] = RO_nslcmop_id RO_task_done = False - step = detailed_status = "Waiting RO_task_id={} to complete the scale action.".format(RO_nslcmop_id) + step = detailed_status = "Waiting for VIM to scale. RO_task_id={}.".format(RO_nslcmop_id) detailed_status_old = None self.logger.debug(logging_text + step) @@ -2957,6 +4182,10 @@ class NsLcm(LcmBase): if not RO_task_done: desc = await self.RO.show("ns", item_id_name=RO_nsr_id, extra_item="action", extra_item_id=RO_nslcmop_id) + + # deploymentStatus + self._on_update_ro_db(nsrs_id=nsr_id, ro_descriptor=desc) + ns_status, ns_status_info = self.RO.check_action_status(desc) if ns_status == "ERROR": raise ROclient.ROClientException(ns_status_info) @@ -2964,11 +4193,16 @@ class NsLcm(LcmBase): detailed_status = step + "; {}".format(ns_status_info) elif ns_status == "ACTIVE": RO_task_done = True + self.scale_vnfr(db_vnfr, vdu_create=vdu_create, vdu_delete=vdu_delete) step = detailed_status = "Waiting ns ready at RO. RO_id={}".format(RO_nsr_id) self.logger.debug(logging_text + step) else: assert False, "ROclient.check_action_status returns unknown {}".format(ns_status) else: + desc = await self.RO.show("ns", RO_nsr_id) + ns_status, ns_status_info = self.RO.check_ns_status(desc) + # deploymentStatus + self._on_update_ro_db(nsrs_id=nsr_id, ro_descriptor=desc) if ns_status == "ERROR": raise ROclient.ROClientException(ns_status_info) @@ -2977,11 +4211,7 @@ class NsLcm(LcmBase): elif ns_status == "ACTIVE": step = detailed_status = \ "Waiting for management IP address reported by the VIM. Updating VNFRs" - if not vnfr_scaled: - self.scale_vnfr(db_vnfr, vdu_create=vdu_create, vdu_delete=vdu_delete) - vnfr_scaled = True try: - desc = await self.RO.show("ns", RO_nsr_id) # nsr_deployed["nsr_ip"] = RO.get_ns_vnf_info(desc) self.ns_update_vnfr({db_vnfr["member-vnf-index-ref"]: db_vnfr}, desc) break @@ -3047,15 +4277,15 @@ class NsLcm(LcmBase): if config_primitive["name"] == vnf_config_primitive: break else: - raise LcmException("Invalid vnfd descriptor at scaling-group-descriptor[name='{}']:" - "scaling-config-action[vnf-config-primitive-name-ref='{}'] does not " - "match any vnf-configuration:config-primitive".format(scaling_group, - config_primitive)) + raise LcmException( + "Invalid vnfd descriptor at scaling-group-descriptor[name='{}']:scaling-config-" + "action[vnf-config-primitive-name-ref='{}'] does not match any vnf-configuration:" + "config-primitive".format(scaling_group, vnf_config_primitive)) scale_process = "VCA" db_nsr_update["config-status"] = "configuring post-scaling" primitive_params = self._map_primitive_params(config_primitive, {}, vnfr_params) - # Post-scale reintent check: Check if this sub-operation has been executed before + # Post-scale retry check: Check if this sub-operation has been executed before op_index = self._check_or_add_scale_suboperation( db_nslcmop, nslcmop_id, vnf_index, vnf_config_primitive, primitive_params, 'POST-SCALE') if op_index == self.SUBOPERATION_STATUS_SKIP: @@ -3072,16 +4302,24 @@ class NsLcm(LcmBase): self.logger.debug(logging_text + "vnf_config_primitive={} New sub-operation". format(vnf_config_primitive)) else: - # Reintent: Get registered params for this existing sub-operation + # retry: Get registered params for this existing sub-operation op = db_nslcmop.get('_admin', {}).get('operations', [])[op_index] vnf_index = op.get('member_vnf_index') vnf_config_primitive = op.get('primitive') primitive_params = op.get('primitive_params') - self.logger.debug(logging_text + "vnf_config_primitive={} Sub-operation reintent". + self.logger.debug(logging_text + "vnf_config_primitive={} Sub-operation retry". format(vnf_config_primitive)) # Execute the primitive, either with new (first-time) or registered (reintent) args + ee_descriptor_id = config_primitive.get("execution-environment-ref") + primitive_name = config_primitive.get("execution-environment-primitive", + vnf_config_primitive) + ee_id, vca_type = self._look_for_deployed_vca(nsr_deployed["VCA"], + member_vnf_index=vnf_index, + vdu_id=None, + vdu_count_index=None, + ee_descriptor_id=ee_descriptor_id) result, result_detail = await self._ns_execute_primitive( - nsr_deployed, vnf_index, None, None, None, vnf_config_primitive, primitive_params) + ee_id, primitive_name, primitive_params, vca_type) self.logger.debug(logging_text + "vnf_config_primitive={} Done with result {} {}".format( vnf_config_primitive, result, result_detail)) # Update operationState = COMPLETED | FAILED @@ -3094,9 +4332,6 @@ class NsLcm(LcmBase): scale_process = None # POST-SCALE END - db_nslcmop_update["operationState"] = nslcmop_operation_state = "COMPLETED" - db_nslcmop_update["statusEnteredTime"] = time() - db_nslcmop_update["detailed-status"] = "done" db_nsr_update["detailed-status"] = "" # "scaled {} {}".format(scaling_group, scaling_type) db_nsr_update["operational-status"] = "running" if old_operational_status == "failed" \ else old_operational_status @@ -3112,16 +4347,19 @@ class NsLcm(LcmBase): exc = traceback.format_exc() self.logger.critical(logging_text + "Exit Exception {} {}".format(type(e).__name__, e), exc_info=True) finally: + self._write_ns_status( + nsr_id=nsr_id, + ns_state=None, + current_operation="IDLE", + current_operation_id=None + ) if exc: - if db_nslcmop: - db_nslcmop_update["detailed-status"] = "FAILED {}: {}".format(step, exc) - db_nslcmop_update["operationState"] = nslcmop_operation_state = "FAILED" - db_nslcmop_update["statusEnteredTime"] = time() + db_nslcmop_update["detailed-status"] = error_description_nslcmop = "FAILED {}: {}".format(step, exc) + nslcmop_operation_state = "FAILED" if db_nsr: db_nsr_update["operational-status"] = old_operational_status db_nsr_update["config-status"] = old_config_status db_nsr_update["detailed-status"] = "" - db_nsr_update["_admin.nslcmop"] = None if scale_process: if "VCA" in scale_process: db_nsr_update["config-status"] = "failed" @@ -3129,24 +4367,27 @@ class NsLcm(LcmBase): db_nsr_update["operational-status"] = "failed" db_nsr_update["detailed-status"] = "FAILED scaling nslcmop={} {}: {}".format(nslcmop_id, step, exc) - try: - if db_nslcmop and db_nslcmop_update: - self.update_db_2("nslcmops", nslcmop_id, db_nslcmop_update) - if db_nsr: - db_nsr_update["_admin.current-operation"] = None - db_nsr_update["_admin.operation-type"] = None - db_nsr_update["_admin.nslcmop"] = None - self.update_db_2("nsrs", nsr_id, db_nsr_update) + else: + error_description_nslcmop = None + nslcmop_operation_state = "COMPLETED" + db_nslcmop_update["detailed-status"] = "Done" - self._write_ns_status( - nsr_id=nsr_id, - ns_state=None, - current_operation="IDLE", - current_operation_id=None - ) + self._write_op_status( + op_id=nslcmop_id, + stage="", + error_message=error_description_nslcmop, + operation_state=nslcmop_operation_state, + other_update=db_nslcmop_update, + ) + if db_nsr: + self._write_ns_status( + nsr_id=nsr_id, + ns_state=None, + current_operation="IDLE", + current_operation_id=None, + other_update=db_nsr_update + ) - except DbException as e: - self.logger.error(logging_text + "Cannot update database: {}".format(e)) if nslcmop_operation_state: try: await self.msg.aiowrite("ns", "scaled", {"nsr_id": nsr_id, "nslcmop_id": nslcmop_id, @@ -3159,3 +4400,35 @@ class NsLcm(LcmBase): self.logger.error(logging_text + "kafka_write notification Exception {}".format(e)) self.logger.debug(logging_text + "Exit") self.lcm_tasks.remove("ns", nsr_id, nslcmop_id, "ns_scale") + + async def add_prometheus_metrics(self, ee_id, artifact_path, ee_config_descriptor, vnfr_id, nsr_id, target_ip): + if not self.prometheus: + return + # look if exist a file called 'prometheus*.j2' and + artifact_content = self.fs.dir_ls(artifact_path) + job_file = next((f for f in artifact_content if f.startswith("prometheus") and f.endswith(".j2")), None) + if not job_file: + return + with self.fs.file_open((artifact_path, job_file), "r") as f: + job_data = f.read() + + # TODO get_service + _, _, service = ee_id.partition(".") # remove prefix "namespace." + host_name = "{}-{}".format(service, ee_config_descriptor["metric-service"]) + host_port = "80" + vnfr_id = vnfr_id.replace("-", "") + variables = { + "JOB_NAME": vnfr_id, + "TARGET_IP": target_ip, + "EXPORTER_POD_IP": host_name, + "EXPORTER_POD_PORT": host_port, + } + job_list = self.prometheus.parse_job(job_data, variables) + # ensure job_name is using the vnfr_id. Adding the metadata nsr_id + for job in job_list: + if not isinstance(job.get("job_name"), str) or vnfr_id not in job["job_name"]: + job["job_name"] = vnfr_id + "_" + str(randint(1, 10000)) + job["nsr_id"] = nsr_id + job_dict = {jl["job_name"]: jl for jl in job_list} + if await self.prometheus.update(job_dict): + return list(job_dict.keys()) diff --git a/osm_lcm/prometheus.py b/osm_lcm/prometheus.py new file mode 100644 index 0000000..2be94af --- /dev/null +++ b/osm_lcm/prometheus.py @@ -0,0 +1,220 @@ +# -*- coding: utf-8 -*- + +## +# Copyright 2020 Telefonica S.A. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +## + +import asyncio +from time import time +import logging +import aiohttp +import yaml +import os +from osm_lcm.lcm_utils import LcmException +from osm_common.dbbase import DbException +from jinja2 import Template, TemplateError, TemplateNotFound, TemplateSyntaxError + +__author__ = "Alfonso Tierno " + +initial_prometheus_data = { + "_id": "prometheus", + "_admin": { + "locked_at": 0, + "locked_by": None, + "modified": 1593445184, # 2020-06-29 + "created": 1593445184, + "version": "1.0" # to allow future version updates + }, + 'scrape_configs': { # Dictionary at database. Converted to list before sending to prometheus + 'mon_exporter': {'static_configs': [{'targets': ['mon:8000']}], 'job_name': 'mon_exporter'}, + }, + 'global': {'evaluation_interval': '15s', 'scrape_interval': '15s'}, + 'rule_files': None, + 'alerting': {'alertmanagers': [{'static_configs': [{'targets': None}]}]} +} + + +class Prometheus: + """ + Implements a class to update Prometheus + """ + + PROMETHEUS_LOCKED_TIME = 120 + + def __init__(self, config, worker_id, db, loop, logger=None): + self.worker_id = worker_id + self.db = db + self.loop = loop + self.logger = logger or logging.getLogger("lcm.prometheus") + self.server = config["uri"] + self.path = config["path"] + if not self.path.endswith("/"): + self.path += "/" + self.cfg_file = self.path + "prometheus.yml" + self.cfg_file_backup = self.path + "prometheus.yml-backup" + + @staticmethod + def parse_job(job_data: str, variables: dict) -> dict: + try: + template = Template(job_data) + job_parsed = template.render(variables or {}) + return yaml.safe_load(job_parsed) + except (TemplateError, TemplateNotFound, TemplateSyntaxError) as e: + # TODO yaml exceptions + raise LcmException("Error parsing Jinja2 to prometheus job. job_data={}, variables={}. Error={}".format( + job_data, variables, e)) + + async def start(self): + for retry in range(4): + try: + # self.logger("Starting prometheus ") + # read from database + prometheus_data = self.db.get_one("admin", {"_id": "prometheus"}, fail_on_empty=False) + if not prometheus_data: + self.logger.info("Init db.admin.prometheus content") + self.db.create("admin", initial_prometheus_data) + # send database config file to prometheus. Ignore loading errors, as prometheus may be starting + # but at least an initial configuration file is set + await self.update() + return + except DbException as e: + if retry == 3: + raise LcmException("Max retries trying to init prometheus configuration: {}".format(e)) + await asyncio.sleep(5, loop=self.loop) + + async def update(self, add_jobs: dict = None, remove_jobs: list = None) -> bool: + """ + + :param add_jobs: dictionary with {job_id_1: job_content, job_id_2: job_content} + :param remove_jobs: list with jobs to remove [job_id_1, job_id_2] + :return: result. If false prometheus denies this configuration. Exception on error + """ + for retry in range(20): + result = True + if retry: # first time do not wait + await asyncio.sleep(4 + retry, loop=self.loop) + + # lock database + now = time() + if not self.db.set_one( + "admin", + q_filter={"_id": "prometheus", "_admin.locked_at.lt": now - self.PROMETHEUS_LOCKED_TIME}, + update_dict={"_admin.locked_at": now, "_admin.locked_by": self.worker_id}, + fail_on_empty=False): + continue + # read database + prometheus_data = self.db.get_one("admin", {"_id": "prometheus"}) + update_dict = {"_admin.locked_at": 0, + "_admin.locked_by": None} + + # Make changes from prometheus_incremental + push_dict = pull_dict = None + if add_jobs or remove_jobs: + log_text_list = [] + if add_jobs: + log_text_list.append("adding jobs: {}".format(list(add_jobs.keys()))) + prometheus_data["scrape_configs"].update(add_jobs) + push_dict = {"scrape_configs." + job_id: job_data for job_id, job_data in add_jobs.items()} + elif remove_jobs: + log_text_list.append("removing jobs: {}".format(list(remove_jobs))) + for job_id in remove_jobs: + prometheus_data["scrape_configs"].pop(job_id, None) + pull_dict = {"scrape_configs." + job_id: None for job_id in remove_jobs} + self.logger.debug("Updating. " + ". ".join(log_text_list)) + + if not await self.send_data(prometheus_data): + self.logger.error("Cannot update add_jobs: {}. remove_jobs: {}".format(add_jobs, remove_jobs)) + push_dict = pull_dict = None + result = False + + # unblock database + if push_dict: + update_dict.update(push_dict) + if push_dict or pull_dict: + update_dict["_admin.modified_at"] = now + if not self.db.set_one( + "admin", {"_id": "prometheus", "_admin.locked_at": now, "_admin.locked_by": self.worker_id}, + update_dict=update_dict, unset=pull_dict, fail_on_empty=False): + continue + return result + raise LcmException("Cannot update prometheus database. Reached max retries") + + async def send_data(self, new_config): + restore_backup = False + del new_config["_id"] + del new_config["_admin"] + new_scrape_configs = [] + + # generate a list with the values of scrape_configs + for scrape_config in new_config["scrape_configs"].values(): + scrape_config = scrape_config.copy() + # remove nsr_id metadata from scrape_configs + scrape_config.pop("nsr_id", None) + new_scrape_configs.append(scrape_config) + new_config["scrape_configs"] = new_scrape_configs + + try: + if os.path.exists(self.cfg_file): + os.rename(self.cfg_file, self.cfg_file_backup) + restore_backup = True + with open(self.cfg_file, "w+") as f: + yaml.safe_dump(new_config, f, indent=4, default_flow_style=False) + # self.logger.debug("new configuration: {}".format(yaml.safe_dump(new_config, indent=4, + # default_flow_style=False))) + async with aiohttp.ClientSession() as session: + async with session.post(self.server + "-/reload") as resp: + if resp.status > 204: + raise LcmException(await resp.text()) + await asyncio.sleep(5, loop=self.loop) + # If prometheus does not admit this configuration, remains with the old one + # Then, to check if the configuration has been accepted, get the configuration from prometheus + # and compares with the inserted one + async with session.get(self.server + "api/v1/status/config") as resp: + if resp.status > 204: + raise LcmException(await resp.text()) + current_config = await resp.json() + if not self._check_configuration_equal(current_config, new_config): + return False + else: + restore_backup = False + return True + except Exception as e: + self.logger.error("Error updating configuration url={}: {}".format(self.server, e)) + return False + finally: + if restore_backup: + try: + os.rename(self.cfg_file_backup, self.cfg_file) + except Exception as e: + self.logger.critical("Exception while rolling back: {}".format(e)) + + def _check_configuration_equal(self, current_config, expected_config): + try: + # self.logger.debug("Comparing current_config='{}' with expected_config='{}'".format(current_config, + # expected_config)) + current_config_yaml = yaml.safe_load(current_config['data']['yaml']) + current_jobs = [j["job_name"] for j in current_config_yaml["scrape_configs"]] + expected_jobs = [j["job_name"] for j in expected_config["scrape_configs"]] + if current_jobs == expected_jobs: + return True + else: + self.logger.error("Not all jobs have been loaded. Target jobs: {} Loaded jobs: {}".format( + expected_jobs, current_jobs)) + return False + except Exception as e: + self.logger.error("Invalid obtained status from server. Error: '{}'. Obtained data: '{}'".format( + e, current_config)) + # if format is not understood, cannot be compared, assume it is ok + return True diff --git a/osm_lcm/tests/test_db_descriptors.py b/osm_lcm/tests/test_db_descriptors.py index ed98e11..b41e644 100644 --- a/osm_lcm/tests/test_db_descriptors.py +++ b/osm_lcm/tests/test_db_descriptors.py @@ -170,6 +170,24 @@ db_nslcmops_text = """ operationState: FAILED startTime: 1566823354.414689 statusEnteredTime: 1566824534.5112448 + +- _admin: + created: 1600000000.0000000 + modified: 1600000000.0000000 + projects_read: + - 25b5aebf-3da1-49ed-99de-1d2b4a86d6e4 + projects_write: + - 25b5aebf-3da1-49ed-99de-1d2b4a86d6e4 + worker: 86434c2948e2 + _id: a639fac7-e0bb-4225-ffff-c1f8efcc125e + detailed-status: None + lcmOperationType: terminate + nsInstanceId: f48163a6-c807-47bc-9682-f72caef5af85 + operationParams: {} + operationState: PROCESSING + startTime: 1600000000.0000000 + statusEnteredTime: 1600000000.0000000 + - _admin: created: 1575034637.044651 modified: 1575034637.044651 @@ -219,6 +237,8 @@ db_nsrs_text = """ member-vnf-index: '2' VCA: - application: alf-b-aa + ee_id: f48163a6-c807-47bc-9682-f72caef5af85.alf-b-aa + needed_terminate: True detailed-status: Ready! member-vnf-index: '1' model: f48163a6-c807-47bc-9682-f72caef5af85 @@ -231,6 +251,8 @@ db_nsrs_text = """ vdu_name: null vnfd_id: hackfest3charmed-vnf - application: alf-c-ab + ee_id: f48163a6-c807-47bc-9682-f72caef5af85.alf-c-ab + needed_terminate: True detailed-status: Ready! member-vnf-index: '2' model: f48163a6-c807-47bc-9682-f72caef5af85 @@ -424,10 +446,12 @@ db_nsrs_text = """ - 7ab0d10d-8ce2-4c68-aef6-cc5a437a9c62 """ -db_ro_ns_text = """ +ro_ns_text = """ datacenter_tenant_id: dc5c67fa-c7f2-11e9-b9c0-02420aff0004 description: null name: ALF +classifications: [] +sdn_nets: [] nets: - created: false datacenter_id: dc51ce6c-c7f2-11e9-b9c0-02420aff0004 @@ -675,6 +699,36 @@ vnfs: vnfd_osm_id: f48163a6-c807-47bc-9682-f72caef5af85.1.2 """ +ro_delete_action_text = """ +actions: +- created_at: 1580140763.1099188 + description: DELETE + instance_id: c9fe9908-3180-430d-b633-fca2f68db008 + modified_at: 1580140763.253148 + number_done: 1 + number_failed: 0 + number_tasks: 1 + tenant_id: 0ea38bd0-2729-47a9-ae07-c6ce76115eb2 + uuid: delete + vim_wim_actions: + - action: DELETE + created_at: 1580140763.1099188 + datacenter_vim_id: dc5c67fa-c7f2-11e9-b9c0-02420aff0004 + error_msg: null + extra: '{params: [9b6a2ac4-767e-4ec9-9497-8ba63084c77f, null]}' + instance_action_id: ACTION-1580140763.054037 + item: instance_nets + item_id: 8cb06b72-c71d-4b58-b419-95025fa651d3 + modified_at: 1580140763.1099188 + related: 8cb06b72-c71d-4b58-b419-95025fa651d3 + status: SUPERSEDED + task_index: 0 + vim_id: null + wim_account_id: null + wim_internal_id: null + worker: null +""" + db_vim_accounts_text = """ --- - _admin: @@ -699,15 +753,44 @@ db_vim_accounts_text = """ projects_write: - 25b5aebf-3da1-49ed-99de-1d2b4a86d6e4 _id: ea958ba5-4e58-4405-bf42-6e3be15d4c3a - description: Openstack site 2, based on Mirantis, also called DSS9000-1, with - tenant tid - name: ost2-mrt-tid + description: some description here + name: vim1 schema_version: '1.1' vim_password: 5g0yGX86qIhprX86YTMcpg== vim_tenant_name: osm vim_type: openstack vim_url: http://10.95.87.162:5000/v2.0 vim_user: osm +- _admin: + created: 1566818150.3024442 + current_operation: 0 + deployed: + RO: 9ac17c0d-4265-4333-843b-c3cbd1f93f88 + RO-account: 011895dc-ab34-4c9f-b06f-401a8ffb073b + detailed-status: Done + modified: 1566818150.3024442 + operationalState: ENABLED + operations: + - detailed-status: Done + lcmOperationType: create + operationParams: null + operationState: COMPLETED + startTime: 1566818150.3025382 + statusEnteredTime: 1566818150.3025382 + worker: 86434c2948e2 + projects_read: + - 25b5aebf-3da1-49ed-99de-1d2b4a86d6e4 + projects_write: + - 25b5aebf-3da1-49ed-99de-1d2b4a86d6e4 + _id: 05357241-1a01-416f-9e02-af20f65f51cd + description: No description + name: vim2 + schema_version: '1.1' + vim_password: 5g0yGX86qIhprX86YTMcpg== + vim_tenant_name: osm + vim_type: dumy + vim_url: http://10.95.88.162:5000/v2.0 + vim_user: osm """ db_k8sclusters_text = """ @@ -1210,3 +1293,17 @@ db_nslcmops_scale_text = """ operationState: COMPLETED detailed-status: done """ + +test_ids = { + # contains the ids of ns and operations of every test + "TEST-A": { + "ns": "f48163a6-c807-47bc-9682-f72caef5af85", + "instantiate": "a639fac7-e0bb-4225-8ecb-c1f8efcc125e", + "terminate": "a639fac7-e0bb-4225-ffff-c1f8efcc125e", + }, + "TEST-KDU": { + "ns": "0bcb701c-ee4d-41ab-8ee6-f4156f7f114d", + "instantiate": "cf3aa178-7640-4174-b921-2330e6f2aad6", + "terminate": None, + } +} diff --git a/osm_lcm/tests/test_lcm_helm_conn.py b/osm_lcm/tests/test_lcm_helm_conn.py new file mode 100644 index 0000000..ae92c5e --- /dev/null +++ b/osm_lcm/tests/test_lcm_helm_conn.py @@ -0,0 +1,103 @@ +## +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# For those usages not covered by the Apache License, Version 2.0 please +# contact: alfonso.tiernosepulveda@telefonica.com +## + +import asynctest +import logging + +from osm_lcm import lcm_helm_conn +from osm_lcm.lcm_helm_conn import LCMHelmConn +from osm_common.fslocal import FsLocal +from asynctest.mock import Mock +from osm_common.dbmemory import DbMemory + +__author__ = "Isabel Lloret " + + +class TestLcmHelmConn(asynctest.TestCase): + logging.basicConfig(level=logging.DEBUG) + logger = logging.getLogger(__name__) + logger.setLevel(logging.DEBUG) + + async def setUp(self): + self.db = Mock(DbMemory()) + self.fs = asynctest.Mock(FsLocal()) + self.fs.path = "/app/storage" + vca_config = {} + lcm_helm_conn.K8sHelmConnector = asynctest.Mock(lcm_helm_conn.K8sHelmConnector) + self.helm_conn = LCMHelmConn(self.db, self.fs, loop=self.loop, vca_config=vca_config, log=self.logger) + + @asynctest.fail_on(active_handles=True) + async def test_create_execution_environment(self): + namespace = "testnamespace" + db_dict = {} + artifact_path = "helm_sample_charm" + helm_chart_id = "helm_sample_charm_0001" + self.helm_conn._k8sclusterhelm.install = asynctest.CoroutineMock(return_value=helm_chart_id) + self.db.get_one.return_value = {"_admin": {"helm-chart": {"id": "myk8s_id"}}} + ee_id, _ = await self.helm_conn.create_execution_environment(namespace, db_dict, artifact_path=artifact_path) + self.assertEqual(ee_id, "{}.{}".format("osm", helm_chart_id), + "Check ee_id format: .") + self.helm_conn._k8sclusterhelm.install.assert_called_once_with("myk8s_id", + kdu_model="/app/storage/helm_sample_charm", + namespace="osm", db_dict=db_dict, + params=None, timeout=None) + + @asynctest.fail_on(active_handles=True) + async def test_get_ee_ssh_public__key(self): + ee_id = "osm.helm_sample_charm_0001" + db_dict = {} + lcm_helm_conn.socket.gethostbyname = asynctest.Mock() + mock_pub_key = "ssh-rsapubkey" + self.db.get_one.return_value = {"_admin": {"helm-chart": {"id": "myk8s_id"}}} + self.helm_conn._get_ssh_key = asynctest.CoroutineMock(return_value=mock_pub_key) + pub_key = await self.helm_conn.get_ee_ssh_public__key(ee_id=ee_id, db_dict=db_dict) + self.assertEqual(pub_key, mock_pub_key) + + @asynctest.fail_on(active_handles=True) + async def test_execute_primitive(self): + lcm_helm_conn.socket.gethostbyname = asynctest.Mock() + ee_id = "osm.helm_sample_charm_0001" + primitive_name = "sleep" + params = {} + self.db.get_one.return_value = {"_admin": {"helm-chart": {"id": "myk8s_id"}}} + self.helm_conn._execute_primitive_internal = asynctest.CoroutineMock(return_value=("OK", "test-ok")) + message = await self.helm_conn.exec_primitive(ee_id, primitive_name, params) + self.assertEqual(message, "test-ok") + + @asynctest.fail_on(active_handles=True) + async def test_execute_config_primitive(self): + self.logger.debug("Execute config primitive") + lcm_helm_conn.socket.gethostbyname = asynctest.Mock() + ee_id = "osm.helm_sample_charm_0001" + primitive_name = "config" + params = {"ssh-host-name": "host1"} + self.db.get_one.return_value = {"_admin": {"helm-chart": {"id": "myk8s_id"}}} + self.helm_conn._execute_primitive_internal = asynctest.CoroutineMock(return_value=("OK", "CONFIG OK")) + message = await self.helm_conn.exec_primitive(ee_id, primitive_name, params) + self.assertEqual(message, "CONFIG OK") + + @asynctest.fail_on(active_handles=True) + async def test_delete_execution_environment(self): + ee_id = "osm.helm_sample_charm_0001" + self.db.get_one.return_value = {"_admin": {"helm-chart": {"id": "myk8s_id"}}} + self.helm_conn._k8sclusterhelm.uninstall = asynctest.CoroutineMock() + await self.helm_conn.delete_execution_environment(ee_id) + self.helm_conn._k8sclusterhelm.uninstall.assert_called_once_with("myk8s_id", "helm_sample_charm_0001") + + +if __name__ == '__main__': + asynctest.main() diff --git a/osm_lcm/tests/test_ns.py b/osm_lcm/tests/test_ns.py index 2bc413c..63bfa35 100644 --- a/osm_lcm/tests/test_ns.py +++ b/osm_lcm/tests/test_ns.py @@ -12,24 +12,22 @@ # under the License. # # For those usages not covered by the Apache License, Version 2.0 please -# contact: esousa@whitestack.com or alfonso.tiernosepulveda@telefonica.com +# contact: alfonso.tiernosepulveda@telefonica.com ## import asynctest # pip3 install asynctest --user import asyncio import yaml -# import logging from os import getenv -from osm_lcm.ns import NsLcm +from osm_lcm import ns from osm_common.dbmemory import DbMemory from osm_common.msgkafka import MsgKafka from osm_common.fslocal import FsLocal from osm_lcm.lcm_utils import TaskRegistry -from n2vc.vnf import N2VC -# from n2vc.k8s_helm_conn import K8sHelmConnector +from osm_lcm.ROclient import ROClient from uuid import uuid4 -from asynctest.mock import patch +# from asynctest.mock import patch from osm_lcm.tests import test_db_descriptors as descriptors @@ -51,21 +49,24 @@ It allows, if some testing ENV are supplied, testing without mocking some extern OSMLCM_RO_XXX: configuration of RO """ - -vca_config = { # TODO replace with os.get_env to get other configurations - "host": getenv("OSMLCM_VCA_HOST", "vca"), - "port": getenv("OSMLCM_VCA_PORT", 17070), - "user": getenv("OSMLCM_VCA_USER", "admin"), - "secret": getenv("OSMLCM_VCA_SECRET", "vca"), - "pubkey": getenv("OSMLCM_VCA_PUBKEY", None), - 'cacert': getenv("OSMLCM_VCA_CACERT", None) -} - -ro_config = { - "endpoint_url": "http://{}:{}/openmano".format(getenv("OSMLCM_RO_HOST", "ro"), getenv("OSMLCM_RO_PORT", "9090")), - "tenant": getenv("OSMLCM_RO_TENANT", "osm"), - "logger_name": "lcm.ROclient", - "loglevel": "DEBUG", +lcm_config = { + "timeout": {}, + "VCA": { # TODO replace with os.get_env to get other configurations + "host": getenv("OSMLCM_VCA_HOST", "vca"), + "port": getenv("OSMLCM_VCA_PORT", 17070), + "user": getenv("OSMLCM_VCA_USER", "admin"), + "secret": getenv("OSMLCM_VCA_SECRET", "vca"), + "public_key": getenv("OSMLCM_VCA_PUBKEY", None), + 'ca_cert': getenv("OSMLCM_VCA_CACERT", None), + 'apiproxy': getenv("OSMLCM_VCA_APIPROXY", "192.168.1.1") + }, + "ro_config": { + "uri": "http://{}:{}/openmano".format(getenv("OSMLCM_RO_HOST", "ro"), + getenv("OSMLCM_RO_PORT", "9090")), + "tenant": getenv("OSMLCM_RO_TENANT", "osm"), + "logger_name": "lcm.ROclient", + "loglevel": "DEBUG", + } } @@ -86,7 +87,7 @@ class TestMyNS(asynctest.TestCase): yield "app_name-{}".format(num_calls) num_calls += 1 - def _n2vc_CreateExecutionEnvironment(self, namespace, reuse_ee_id, db_dict): + def _n2vc_CreateExecutionEnvironment(self, namespace, reuse_ee_id, db_dict, *args, **kwargs): k_list = namespace.split(".") ee_id = k_list[1] + "." if len(k_list) >= 2: @@ -97,7 +98,12 @@ class TestMyNS(asynctest.TestCase): return ee_id, {} def _ro_show(self, *args, **kwargs): - ro_ns_desc = yaml.load(descriptors.db_ro_ns_text, Loader=yaml.Loader) + if kwargs.get("delete"): + ro_ns_desc = yaml.load(descriptors.ro_delete_action_text, Loader=yaml.Loader) + while True: + yield ro_ns_desc + + ro_ns_desc = yaml.load(descriptors.ro_ns_text, Loader=yaml.Loader) # if ip address provided, replace descriptor ip_addresses = getenv("OSMLCMTEST_NS_IPADDRESS", "") @@ -132,9 +138,8 @@ class TestMyNS(asynctest.TestCase): def _return_uuid(self, *args, **kwargs): return str(uuid4()) - @patch("osm_lcm.ns.N2VCJujuConnector") - @patch("osm_lcm.ns.K8sHelmConnector") - async def setUp(self, k8s_mock, n2vc_mock): + async def setUp(self): + # Mock DB if not getenv("OSMLCMTEST_DB_NOMOCK"): self.db = DbMemory() @@ -145,7 +150,6 @@ class TestMyNS(asynctest.TestCase): self.db.create_list("k8sclusters", yaml.load(descriptors.db_k8sclusters_text, Loader=yaml.Loader)) self.db.create_list("nslcmops", yaml.load(descriptors.db_nslcmops_text, Loader=yaml.Loader)) self.db.create_list("vnfrs", yaml.load(descriptors.db_vnfrs_text, Loader=yaml.Loader)) - self.db_vim_accounts = yaml.load(descriptors.db_vim_accounts_text, Loader=yaml.Loader) # Mock kafka @@ -165,8 +169,18 @@ class TestMyNS(asynctest.TestCase): self.lcm_tasks.waitfor_related_HA.return_value = None self.lcm_tasks.lookfor_related.return_value = ("", []) + # Mock VCA - K8s + if not getenv("OSMLCMTEST_VCA_K8s_NOMOCK"): + ns.K8sJujuConnector = asynctest.MagicMock(ns.K8sJujuConnector) + ns.K8sHelmConnector = asynctest.MagicMock(ns.K8sHelmConnector) + + if not getenv("OSMLCMTEST_VCA_NOMOCK"): + ns.N2VCJujuConnector = asynctest.MagicMock(ns.N2VCJujuConnector) + ns.N2VCJujuConnectorLCM = asynctest.MagicMock(ns.N2VCJujuConnectorLCM) + ns.LCMHelmConn = asynctest.MagicMock(ns.LCMHelmConn) + # Create NsLCM class - self.my_ns = NsLcm(self.db, self.msg, self.fs, self.lcm_tasks, ro_config, vca_config, self.loop) + self.my_ns = ns.NsLcm(self.db, self.msg, self.fs, self.lcm_tasks, lcm_config, self.loop) self.my_ns._wait_dependent_n2vc = asynctest.CoroutineMock() # Mock logging @@ -176,7 +190,7 @@ class TestMyNS(asynctest.TestCase): # Mock VCA - N2VC if not getenv("OSMLCMTEST_VCA_NOMOCK"): pub_key = getenv("OSMLCMTEST_NS_PUBKEY", "ssh-rsa test-pub-key t@osm.com") - self.my_ns.n2vc = asynctest.Mock(N2VC()) + # self.my_ns.n2vc = asynctest.Mock(N2VC()) self.my_ns.n2vc.GetPublicKey.return_value = getenv("OSMLCM_VCA_PUBKEY", "public_key") # allow several versions of n2vc self.my_ns.n2vc.FormatApplicationName = asynctest.Mock(side_effect=self._n2vc_FormatApplicationName()) @@ -189,17 +203,14 @@ class TestMyNS(asynctest.TestCase): self.my_ns.n2vc.GetPrimitiveStatus = asynctest.CoroutineMock(return_value="completed") self.my_ns.n2vc.GetPrimitiveOutput = asynctest.CoroutineMock(return_value={"result": "ok", "pubkey": pub_key}) + self.my_ns.n2vc.delete_execution_environment = asynctest.CoroutineMock(return_value=None) self.my_ns.n2vc.get_public_key = asynctest.CoroutineMock( return_value=getenv("OSMLCM_VCA_PUBKEY", "public_key")) - - # # Mock VCA - K8s - # if not getenv("OSMLCMTEST_VCA_K8s_NOMOCK"): - # pub_key = getenv("OSMLCMTEST_NS_PUBKEY", "ssh-rsa test-pub-key t@osm.com") - # self.my_ns.k8sclusterhelm = asynctest.Mock(K8sHelmConnector()) + self.my_ns.n2vc.delete_namespace = asynctest.CoroutineMock(return_value=None) # Mock RO if not getenv("OSMLCMTEST_RO_NOMOCK"): - # self.my_ns.RO = asynctest.Mock(ROclient.ROClient(self.loop, **ro_config)) + # self.my_ns.RO = asynctest.Mock(ROclient.ROClient(self.loop, **lcm_config["ro_config"])) # TODO first time should be empty list, following should return a dict self.my_ns.RO.get_list = asynctest.CoroutineMock(self.my_ns.RO.get_list, return_value=[]) self.my_ns.RO.create = asynctest.CoroutineMock(self.my_ns.RO.create, side_effect=self._ro_create()) @@ -207,13 +218,14 @@ class TestMyNS(asynctest.TestCase): self.my_ns.RO.create_action = asynctest.CoroutineMock(self.my_ns.RO.create_action, return_value={"vm-id": {"vim_result": 200, "description": "done"}}) + self.my_ns.RO.delete = asynctest.CoroutineMock(self.my_ns.RO.delete, return_value={"action_id": "del"}) # self.my_ns.wait_vm_up_insert_key_ro = asynctest.CoroutineMock(return_value="ip-address") @asynctest.fail_on(active_handles=True) # all async tasks must be completed async def test_instantiate(self): self.db.set_one = asynctest.Mock() - nsr_id = self.db.get_list("nsrs")[0]["_id"] - nslcmop_id = self.db.get_list("nslcmops")[0]["_id"] + nsr_id = descriptors.test_ids["TEST-A"]["ns"] + nslcmop_id = descriptors.test_ids["TEST-A"]["instantiate"] # print("Test instantiate started") # delete deployed information of database @@ -247,14 +259,60 @@ class TestMyNS(asynctest.TestCase): if not getenv("OSMLCMTEST_DB_NOMOCK"): self.assertTrue(self.db.set_one.called, "db.set_one not called") + db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) + db_vnfrs_list = self.db.get_list("vnfrs", {"nsr-id-ref": nsr_id}) + self.assertEqual(db_nsr["_admin"].get("nsState"), "INSTANTIATED", "Not instantiated") + for vnfr in db_vnfrs_list: + self.assertEqual(vnfr["_admin"].get("nsState"), "INSTANTIATED", "Not instantiated") + + if not getenv("OSMLCMTEST_VCA_NOMOCK"): + # check intial-primitives called + self.assertTrue(self.my_ns.n2vc.exec_primitive.called, + "Exec primitive not called for initial config primitive") + for _call in self.my_ns.n2vc.exec_primitive.call_args_list: + self.assertIn(_call[1]["primitive_name"], ("config", "touch"), + "called exec primitive with a primitive different than config or touch") # TODO add more checks of called methods # TODO add a terminate + async def test_instantiate_ee_list(self): + # Using modern IM where configuration is in the new format of execution_environment_list + ee_descriptor_id = "charm_simple" + non_used_initial_primitive = { + "name": "not_to_be_called", + "seq": 3, + "execution-environment-ref": "not_used_ee" + } + ee_list = [ + { + "id": ee_descriptor_id, + "juju": {"charm": "simple"}, + + }, + ] + + self.db.set_one( + "vnfds", + q_filter={"_id": "7637bcf8-cf14-42dc-ad70-c66fcf1e6e77"}, + update_dict={"vnf-configuration.execution-environment-list": ee_list, + "vnf-configuration.initial-config-primitive.0.execution-environment-ref": ee_descriptor_id, + "vnf-configuration.initial-config-primitive.1.execution-environment-ref": ee_descriptor_id, + "vnf-configuration.initial-config-primitive.2": non_used_initial_primitive, + "vnf-configuration.config-primitive.0.execution-environment-ref": ee_descriptor_id, + "vnf-configuration.config-primitive.0.execution-environment-primitive": "touch_charm", + }, + unset={"vnf-configuration.juju": None}) + await self.test_instantiate() + # this will check that the initial-congig-primitive 'not_to_be_called' is not called + def test_ns_params_2_RO(self): - vim = self.db.get_list("vim_accounts")[0] - vim_id = vim["_id"] - ro_vim_id = vim["_admin"]["deployed"]["RO"] + vims = self.db.get_list("vim_accounts") + vim_id = vims[0]["_id"] + ro_vim_id = vims[0]["_admin"]["deployed"]["RO"] + vim_id2 = vims[1]["_id"] + ro_vim_id2 = vims[1]["_admin"]["deployed"]["RO"] + ns_params = {"vimAccountId": vim_id} mgmt_interface = {"cp": "cp"} vdu = [{"id": "vdu_id", "interface": [{"external-connection-point-ref": "cp"}]}] @@ -268,13 +326,18 @@ class TestMyNS(asynctest.TestCase): "vnf-configuration": {"config-access": {"ssh-access": {"required": True, "default_user": "U"}}}}, } nsd = {"constituent-vnfd": []} + db_vnfrs = {} for k in vnfd_dict.keys(): - nsd["constituent-vnfd"].append({"vnfd-id-ref": k, "member-vnf-index": k}) + nsd["constituent-vnfd"].append({"vnfd-id-ref": k, "member-vnf-index": "member " + k}) + db_vnfrs["member " + k] = {"vim-account-id": vim_id2 if k == "1" else vim_id} n2vc_key_list = ["key"] - ro_ns_params = self.my_ns.ns_params_2_RO(ns_params, nsd, vnfd_dict, n2vc_key_list) + ro_ns_params = self.my_ns._ns_params_2_RO(ns_params, nsd, vnfd_dict, db_vnfrs, n2vc_key_list) ro_params_expected = {'wim_account': None, "datacenter": ro_vim_id, - "vnfs": {"5": {"vdus": {"vdu_id": {"mgmt_keys": n2vc_key_list}}}}} + "vnfs": { + "member 5": {"vdus": {"vdu_id": {"mgmt_keys": n2vc_key_list}}}, + "member 1": {"datacenter": ro_vim_id2} + }} self.assertEqual(ro_ns_params, ro_params_expected) # Test scale() and related methods @@ -286,30 +349,31 @@ class TestMyNS(asynctest.TestCase): # scale-out/scale-in operations with success/error result # Test scale() with missing 'scaleVnfData', should return operationState = 'FAILED' - nsr_id = self.db.get_list("nsrs")[0]["_id"] - nslcmop_id = self.db.get_list("nslcmops")[0]["_id"] + nsr_id = descriptors.test_ids["TEST-A"]["ns"] + nslcmop_id = descriptors.test_ids["TEST-A"]["instantiate"] await self.my_ns.scale(nsr_id, nslcmop_id) expected_value = 'FAILED' return_value = self.db.get_one("nslcmops", {"_id": nslcmop_id}).get("operationState") self.assertEqual(return_value, expected_value) # print("scale_result: {}".format(self.db.get_one("nslcmops", {"_id": nslcmop_id}).get("detailed-status"))) - # Test _reintent_or_skip_suboperation() + # Test _retry_or_skip_suboperation() # Expected result: # - if a suboperation's 'operationState' is marked as 'COMPLETED', SUBOPERATION_STATUS_SKIP is expected # - if marked as anything but 'COMPLETED', the suboperation index is expected - def test_scale_reintent_or_skip_suboperation(self): + def test_scale_retry_or_skip_suboperation(self): # Load an alternative 'nslcmops' YAML for this test - db_nslcmop = self.db.get_list('nslcmops')[0] + nslcmop_id = descriptors.test_ids["TEST-A"]["instantiate"] + db_nslcmop = self.db.get_one('nslcmops', {"_id": nslcmop_id}) op_index = 2 # Test when 'operationState' is 'COMPLETED' db_nslcmop['_admin']['operations'][op_index]['operationState'] = 'COMPLETED' - return_value = self.my_ns._reintent_or_skip_suboperation(db_nslcmop, op_index) + return_value = self.my_ns._retry_or_skip_suboperation(db_nslcmop, op_index) expected_value = self.my_ns.SUBOPERATION_STATUS_SKIP self.assertEqual(return_value, expected_value) # Test when 'operationState' is not 'COMPLETED' db_nslcmop['_admin']['operations'][op_index]['operationState'] = None - return_value = self.my_ns._reintent_or_skip_suboperation(db_nslcmop, op_index) + return_value = self.my_ns._retry_or_skip_suboperation(db_nslcmop, op_index) expected_value = op_index self.assertEqual(return_value, expected_value) @@ -317,7 +381,8 @@ class TestMyNS(asynctest.TestCase): # Expected result: index of the found sub-operation, or SUBOPERATION_STATUS_NOT_FOUND if not found def test_scale_find_suboperation(self): # Load an alternative 'nslcmops' YAML for this test - db_nslcmop = self.db.get_list('nslcmops')[0] + nslcmop_id = descriptors.test_ids["TEST-A"]["instantiate"] + db_nslcmop = self.db.get_one('nslcmops', {"_id": nslcmop_id}) # Find this sub-operation op_index = 2 vnf_index = db_nslcmop['_admin']['operations'][op_index]['member_vnf_index'] @@ -346,7 +411,8 @@ class TestMyNS(asynctest.TestCase): # Test _update_suboperation_status() def test_scale_update_suboperation_status(self): self.db.set_one = asynctest.Mock() - db_nslcmop = self.db.get_list('nslcmops')[0] + nslcmop_id = descriptors.test_ids["TEST-A"]["instantiate"] + db_nslcmop = self.db.get_one('nslcmops', {"_id": nslcmop_id}) op_index = 0 # Force the initial values to be distinct from the updated ones q_filter = {"_id": db_nslcmop["_id"]} @@ -361,7 +427,8 @@ class TestMyNS(asynctest.TestCase): fail_on_empty=False) def test_scale_add_suboperation(self): - db_nslcmop = self.db.get_list('nslcmops')[0] + nslcmop_id = descriptors.test_ids["TEST-A"]["instantiate"] + db_nslcmop = self.db.get_one('nslcmops', {"_id": nslcmop_id}) vnf_index = '1' num_ops_before = len(db_nslcmop.get('_admin', {}).get('operations', [])) - 1 vdu_id = None @@ -412,7 +479,8 @@ class TestMyNS(asynctest.TestCase): # - op_index (non-negative number): This is an existing sub-operation, operationState != 'COMPLETED' # - SUBOPERATION_STATUS_SKIP: This is an existing sub-operation, operationState == 'COMPLETED' def test_scale_check_or_add_scale_suboperation(self): - db_nslcmop = self.db.get_list('nslcmops')[0] + nslcmop_id = descriptors.test_ids["TEST-A"]["instantiate"] + db_nslcmop = self.db.get_one('nslcmops', {"_id": nslcmop_id}) operationType = 'PRE-SCALE' vnf_index = '1' primitive = 'touch' @@ -470,27 +538,116 @@ class TestMyNS(asynctest.TestCase): self.assertEqual(op_index_skip_RO, self.my_ns.SUBOPERATION_STATUS_SKIP) async def test_deploy_kdus(self): - db_nsr = self.db.get_list("nsrs")[1] - db_vnfr = self.db.get_list("vnfrs")[2] + nsr_id = descriptors.test_ids["TEST-KDU"]["ns"] + nslcmop_id = descriptors.test_ids["TEST-KDU"]["instantiate"] + db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) + db_vnfr = self.db.get_one("vnfrs", {"nsr-id-ref": nsr_id, "member-vnf-index-ref": "multikdu"}) db_vnfrs = {"multikdu": db_vnfr} - nsr_id = db_nsr["_id"] - # nslcmop_id = self.db.get_list("nslcmops")[1]["_id"] + db_vnfd = self.db.get_one("vnfds", {"_id": db_vnfr["vnfd-id"]}) + db_vnfds = {db_vnfd["_id"]: db_vnfd} + task_register = {} logging_text = "KDU" self.my_ns.k8sclusterhelm.install = asynctest.CoroutineMock(return_value="k8s_id") - await self.my_ns.deploy_kdus(logging_text, nsr_id, db_nsr, db_vnfrs) + self.my_ns.k8sclusterhelm.synchronize_repos = asynctest.CoroutineMock(return_value=("", "")) + self.my_ns.k8sclusterhelm.get_services = asynctest.CoroutineMock(return_value=([])) + await self.my_ns.deploy_kdus(logging_text, nsr_id, nslcmop_id, db_vnfrs, db_vnfds, task_register) + await asyncio.wait(list(task_register.keys()), timeout=100) db_nsr = self.db.get_list("nsrs")[1] self.assertIn("K8s", db_nsr["_admin"]["deployed"], "K8s entry not created at '_admin.deployed'") self.assertIsInstance(db_nsr["_admin"]["deployed"]["K8s"], list, "K8s entry is not of type list") self.assertEqual(len(db_nsr["_admin"]["deployed"]["K8s"]), 2, "K8s entry is not of type list") k8s_instace_info = {"kdu-instance": "k8s_id", "k8scluster-uuid": "73d96432-d692-40d2-8440-e0c73aee209c", - "k8scluster-type": "chart", - "kdu-name": "ldap", "kdu-model": "stable/openldap:1.2.1"} + "k8scluster-type": "helm-chart", + "kdu-name": "ldap", "kdu-model": "stable/openldap:1.2.1", + "member-vnf-index": "multikdu", "namespace": None} self.assertEqual(db_nsr["_admin"]["deployed"]["K8s"][0], k8s_instace_info) k8s_instace_info["kdu-name"] = "mongo" k8s_instace_info["kdu-model"] = "stable/mongodb" self.assertEqual(db_nsr["_admin"]["deployed"]["K8s"][1], k8s_instace_info) + async def test_instantiate_pdu(self): + nsr_id = descriptors.test_ids["TEST-A"]["ns"] + nslcmop_id = descriptors.test_ids["TEST-A"]["instantiate"] + # Modify vnfd/vnfr to change KDU for PDU. Adding keys that NBI will already set + self.db.set_one("vnfrs", {"nsr-id-ref": nsr_id, "member-vnf-index-ref": "1"}, + update_dict={"ip-address": "10.205.1.46", + "vdur.0.pdu-id": "53e1ec21-2464-451e-a8dc-6e311d45b2c8", + "vdur.0.pdu-type": "PDU-TYPE-1", + "vdur.0.ip-address": "10.205.1.46", + }, + unset={"vdur.status": None}) + self.db.set_one("vnfrs", {"nsr-id-ref": nsr_id, "member-vnf-index-ref": "2"}, + update_dict={"ip-address": "10.205.1.47", + "vdur.0.pdu-id": "53e1ec21-2464-451e-a8dc-6e311d45b2c8", + "vdur.0.pdu-type": "PDU-TYPE-1", + "vdur.0.ip-address": "10.205.1.47", + }, + unset={"vdur.status": None}) + + await self.my_ns.instantiate(nsr_id, nslcmop_id) + db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) + self.assertEqual(db_nsr.get("nsState"), "READY", str(db_nsr.get("errorDescription "))) + self.assertEqual(db_nsr.get("currentOperation"), "IDLE", "currentOperation different than 'IDLE'") + self.assertEqual(db_nsr.get("currentOperationID"), None, "currentOperationID different than None") + self.assertEqual(db_nsr.get("errorDescription "), None, "errorDescription different than None") + self.assertEqual(db_nsr.get("errorDetail"), None, "errorDetail different than None") + + @asynctest.fail_on(active_handles=True) # all async tasks must be completed + async def test_terminate_without_configuration(self): + nsr_id = descriptors.test_ids["TEST-A"]["ns"] + nslcmop_id = descriptors.test_ids["TEST-A"]["terminate"] + # set instantiation task as completed + self.db.set_list("nslcmops", {"nsInstanceId": nsr_id, "_id.ne": nslcmop_id}, + update_dict={"operationState": "COMPLETED"}) + self.my_ns.RO.show = asynctest.CoroutineMock(ROClient.show, side_effect=self._ro_show(delete=nslcmop_id)) + self.db.set_one("nsrs", {"_id": nsr_id}, + update_dict={"_admin.deployed.VCA.0": None, "_admin.deployed.VCA.1": None}) + + await self.my_ns.terminate(nsr_id, nslcmop_id) + db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id}) + self.assertEqual(db_nslcmop.get("operationState"), 'COMPLETED', db_nslcmop.get("detailed-status")) + db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) + self.assertEqual(db_nsr.get("nsState"), "NOT_INSTANTIATED", str(db_nsr.get("errorDescription "))) + self.assertEqual(db_nsr["_admin"].get("nsState"), "NOT_INSTANTIATED", str(db_nsr.get("errorDescription "))) + self.assertEqual(db_nsr.get("currentOperation"), "IDLE", "currentOperation different than 'IDLE'") + self.assertEqual(db_nsr.get("currentOperationID"), None, "currentOperationID different than None") + self.assertEqual(db_nsr.get("errorDescription "), None, "errorDescription different than None") + self.assertEqual(db_nsr.get("errorDetail"), None, "errorDetail different than None") + db_vnfrs_list = self.db.get_list("vnfrs", {"nsr-id-ref": nsr_id}) + for vnfr in db_vnfrs_list: + self.assertEqual(vnfr["_admin"].get("nsState"), "NOT_INSTANTIATED", "Not instantiated") + + @asynctest.fail_on(active_handles=True) # all async tasks must be completed + async def test_terminate_primitive(self): + nsr_id = descriptors.test_ids["TEST-A"]["ns"] + nslcmop_id = descriptors.test_ids["TEST-A"]["terminate"] + self.my_ns.RO.show = asynctest.CoroutineMock(ROClient.show, side_effect=self._ro_show(delete=nslcmop_id)) + # set instantiation task as completed + self.db.set_list("nslcmops", {"nsInstanceId": nsr_id, "_id.ne": nslcmop_id}, + update_dict={"operationState": "COMPLETED"}) + + # modify vnfd descriptor to include terminate_primitive + terminate_primitive = [{ + "name": "touch", + "parameter": [{"name": "filename", "value": "terminate_filename"}], + "seq": '1' + }] + db_vnfr = self.db.get_one("vnfrs", {"nsr-id-ref": nsr_id, "member-vnf-index-ref": "1"}) + self.db.set_one("vnfds", {"_id": db_vnfr["vnfd-id"]}, + {"vnf-configuration.terminate-config-primitive": terminate_primitive}) + + await self.my_ns.terminate(nsr_id, nslcmop_id) + db_nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id}) + self.assertEqual(db_nslcmop.get("operationState"), 'COMPLETED', db_nslcmop.get("detailed-status")) + db_nsr = self.db.get_one("nsrs", {"_id": nsr_id}) + self.assertEqual(db_nsr.get("nsState"), "NOT_INSTANTIATED", str(db_nsr.get("errorDescription "))) + self.assertEqual(db_nsr["_admin"].get("nsState"), "NOT_INSTANTIATED", str(db_nsr.get("errorDescription "))) + self.assertEqual(db_nsr.get("currentOperation"), "IDLE", "currentOperation different than 'IDLE'") + self.assertEqual(db_nsr.get("currentOperationID"), None, "currentOperationID different than None") + self.assertEqual(db_nsr.get("errorDescription "), None, "errorDescription different than None") + self.assertEqual(db_nsr.get("errorDetail"), None, "errorDetail different than None") + if __name__ == '__main__': asynctest.main() diff --git a/osm_lcm/tests/test_prometheus.py b/osm_lcm/tests/test_prometheus.py new file mode 100644 index 0000000..064ede8 --- /dev/null +++ b/osm_lcm/tests/test_prometheus.py @@ -0,0 +1,130 @@ +## +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# For those usages not covered by the Apache License, Version 2.0 please +# contact: alfonso.tiernosepulveda@telefonica.com +## + +import asynctest +from osm_lcm.prometheus import Prometheus, initial_prometheus_data +from asynctest.mock import Mock +from osm_common.dbmemory import DbMemory + +__author__ = 'Alfonso Tierno ' + + +class TestPrometheus(asynctest.TestCase): + + async def setUp(self): + config = {'uri': 'http:prometheus:9090', + 'path': '/etc/prometheus'} + self.db = Mock(DbMemory()) + self.p = Prometheus(config, worker_id='1', db=self.db, loop=self.loop) + + @asynctest.fail_on(active_handles=True) + async def test_start(self): + # test with database empty + self.db.get_one.return_value = False + self.p.update = asynctest.CoroutineMock() + await self.p.start() + self.db.create.assert_called_once_with('admin', initial_prometheus_data) + self.p.update.assert_called_once_with() + + # test with database not empty + self.db.create.reset_mock() + self.db.get_one.return_value = initial_prometheus_data + self.p.update.reset_mock() + await self.p.start() + self.db.create.assert_not_called() + self.p.update.assert_called_once_with() + + @asynctest.fail_on(active_handles=True) + async def test_update(self): + self.p.PROMETHEUS_LOCKED_TIME = 1 + number_call_set_one = 0 + + def _db_set_one(*args, **kwargs): + # simulated that database is not locked at first call + nonlocal number_call_set_one + + number_call_set_one += 1 + if number_call_set_one == 1: + return + else: + return {'update': 1} + + def _check_set_one_calls(set_one_calls): + # check the three calls to database set_one + self.assertEqual(len(set_one_calls), 3, 'Not called three times to db.set_one, two blocks, one unblock') + self.assertIn('admin', set_one_calls[0][0], 'db.set_one collection should be admin') + first_used_time = set_one_calls[0][1]['update_dict']['_admin.locked_at'] + second_used_time = set_one_calls[1][1]['update_dict']['_admin.locked_at'] + third_used_time = set_one_calls[2][1]['update_dict']['_admin.locked_at'] + self.assertTrue(first_used_time != 0 and second_used_time != 0, 'blocking locked_at time must not be 0') + self.assertGreater(second_used_time, first_used_time, + 'Every blocking try must contain a new locked_at time') + self.assertEqual(third_used_time, 0, 'For unblocking must be set locked_at=0') + + # check add_jobs + number_call_set_one = 0 + self.db.get_one.return_value = initial_prometheus_data + self.db.set_one.side_effect = _db_set_one + self.p.send_data = asynctest.CoroutineMock(return_value=True) + add_jobs = {'job1': {'job_name': 'job1', 'nsr_id': 'nsr_id'}} + await self.p.update(add_jobs=add_jobs) + set_one_calls = self.db.set_one.call_args_list + _check_set_one_calls(set_one_calls) + update_dict = set_one_calls[2][1]['update_dict'] + unset_dict = set_one_calls[2][1]['unset'] + expected_final_set = { + '_admin.locked_at': 0, + '_admin.locked_by': None, + '_admin.modified_at': set_one_calls[1][1]['update_dict']['_admin.locked_at'], + 'scrape_configs.job1': add_jobs['job1']} + self.assertEqual(update_dict, expected_final_set, 'invalid set and unlock values') + self.assertEqual(unset_dict, None, 'invalid unset and unlock values') + + # check remove_jobs + number_call_set_one = 0 + remove_jobs = ['job1'] + self.db.set_one.reset_mock() + await self.p.update(remove_jobs=remove_jobs) + set_one_calls = self.db.set_one.call_args_list + _check_set_one_calls(set_one_calls) + update_dict = set_one_calls[2][1]['update_dict'] + unset_dict = set_one_calls[2][1]['unset'] + expected_final_set = { + '_admin.locked_at': 0, + '_admin.locked_by': None, + '_admin.modified_at': set_one_calls[1][1]['update_dict']['_admin.locked_at'] + } + self.assertEqual(update_dict, expected_final_set, 'invalid set and unlock values') + self.assertEqual(unset_dict, {'scrape_configs.job1': None}, 'invalid unset and unlock values') + + def test_parse_job(self): + text_to_parse = """ + # yaml format with jinja2 + key1: "parsing var1='{{ var1 }}'" + key2: "parsing var2='{{ var2 }}'" + """ + vars = {'var1': 'VAR1', 'var2': 'VAR2', 'var3': 'VAR3'} + expected = { + 'key1': "parsing var1='VAR1'", + 'key2': "parsing var2='VAR2'" + } + result = self.p.parse_job(text_to_parse, vars) + self.assertEqual(result, expected, 'Error at jinja2 parse') + + +if __name__ == '__main__': + asynctest.main() diff --git a/osm_lcm/vim_sdn.py b/osm_lcm/vim_sdn.py index 931011b..a5eff36 100644 --- a/osm_lcm/vim_sdn.py +++ b/osm_lcm/vim_sdn.py @@ -16,8 +16,8 @@ # under the License. ## -import asyncio import yaml +import asyncio import logging import logging.handlers from osm_lcm import ROclient @@ -27,6 +27,7 @@ from n2vc.k8s_juju_conn import K8sJujuConnector from n2vc.exceptions import K8sException, N2VCException from osm_common.dbbase import DbException from copy import deepcopy +from time import time __author__ = "Alfonso Tierno" @@ -36,7 +37,7 @@ class VimLcm(LcmBase): vim_config_encrypted = {"1.1": ("admin_password", "nsx_password", "vcenter_password"), "default": ("admin_password", "nsx_password", "vcenter_password", "vrops_password")} - def __init__(self, db, msg, fs, lcm_tasks, ro_config, loop): + def __init__(self, db, msg, fs, lcm_tasks, config, loop): """ Init, Connect to database, filesystem storage, and messaging :param config: two level dictionary with configuration. Top level should contain 'database', 'storage', @@ -46,7 +47,7 @@ class VimLcm(LcmBase): self.logger = logging.getLogger('lcm.vim') self.loop = loop self.lcm_tasks = lcm_tasks - self.ro_config = ro_config + self.ro_config = config["ro_config"] super().__init__(db, msg, fs, self.logger) @@ -61,7 +62,6 @@ class VimLcm(LcmBase): return vim_id = vim_content["_id"] - vim_content.pop("op_id", None) logging_text = "Task vim_create={} ".format(vim_id) self.logger.debug(logging_text + "Enter") @@ -69,8 +69,6 @@ class VimLcm(LcmBase): db_vim_update = {} exc = None RO_sdn_id = None - operationState_HA = '' - detailed_status_HA = '' try: step = "Getting vim-id='{}' from db".format(vim_id) db_vim = self.db.get_one("vim_accounts", {"_id": vim_id}) @@ -140,14 +138,14 @@ class VimLcm(LcmBase): db_vim_update["_admin.operationalState"] = "ENABLED" db_vim_update["_admin.detailed-status"] = "Done" # Mark the VIM 'create' HA task as successful - operationState_HA = 'COMPLETED' - detailed_status_HA = 'Done' + operation_state = 'COMPLETED' + operation_details = 'Done' # await asyncio.sleep(15) # TODO remove. This is for test self.logger.debug(logging_text + "Exit Ok VIM account created at RO_vim_account_id={}".format(desc["uuid"])) return - except (ROclient.ROClientException, DbException) as e: + except (ROclient.ROClientException, DbException, asyncio.CancelledError) as e: self.logger.error(logging_text + "Exit Exception {}".format(e)) exc = e except Exception as e: @@ -158,16 +156,16 @@ class VimLcm(LcmBase): db_vim_update["_admin.operationalState"] = "ERROR" db_vim_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) # Mark the VIM 'create' HA task as erroneous - operationState_HA = 'FAILED' - detailed_status_HA = "ERROR {}: {}".format(step, exc) + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) try: if db_vim_update: self.update_db_2("vim_accounts", vim_id, db_vim_update) # Register the VIM 'create' HA task either # succesful or erroneous, or do nothing (if legacy NBI) - self.lcm_tasks.register_HA('vim', 'create', op_id, - operationState=operationState_HA, - detailed_status=detailed_status_HA) + self.lcm_tasks.unlock_HA('vim', 'create', op_id, + operationState=operation_state, + detailed_status=operation_details) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) @@ -183,7 +181,6 @@ class VimLcm(LcmBase): return vim_id = vim_content["_id"] - vim_content.pop("op_id", None) logging_text = "Task vim_edit={} ".format(vim_id) self.logger.debug(logging_text + "Enter") @@ -192,8 +189,6 @@ class VimLcm(LcmBase): RO_sdn_id = None RO_vim_id = None db_vim_update = {} - operationState_HA = '' - detailed_status_HA = '' step = "Getting vim-id='{}' from db".format(vim_id) try: # wait for any previous tasks in process @@ -271,13 +266,13 @@ class VimLcm(LcmBase): await RO.edit("vim_account", RO_vim_id, descriptor=vim_account_RO) db_vim_update["_admin.operationalState"] = "ENABLED" # Mark the VIM 'edit' HA task as successful - operationState_HA = 'COMPLETED' - detailed_status_HA = 'Done' + operation_state = 'COMPLETED' + operation_details = 'Done' self.logger.debug(logging_text + "Exit Ok RO_vim_id={}".format(RO_vim_id)) return - except (ROclient.ROClientException, DbException) as e: + except (ROclient.ROClientException, DbException, asyncio.CancelledError) as e: self.logger.error(logging_text + "Exit Exception {}".format(e)) exc = e except Exception as e: @@ -288,16 +283,16 @@ class VimLcm(LcmBase): db_vim_update["_admin.operationalState"] = "ERROR" db_vim_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) # Mark the VIM 'edit' HA task as erroneous - operationState_HA = 'FAILED' - detailed_status_HA = "ERROR {}: {}".format(step, exc) + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) try: if db_vim_update: self.update_db_2("vim_accounts", vim_id, db_vim_update) # Register the VIM 'edit' HA task either # succesful or erroneous, or do nothing (if legacy NBI) - self.lcm_tasks.register_HA('vim', 'edit', op_id, - operationState=operationState_HA, - detailed_status=detailed_status_HA) + self.lcm_tasks.unlock_HA('vim', 'edit', op_id, + operationState=operation_state, + detailed_status=operation_details) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) @@ -319,8 +314,6 @@ class VimLcm(LcmBase): db_vim = None db_vim_update = {} exc = None - operationState_HA = '' - detailed_status_HA = '' step = "Getting vim from db" try: # wait for any previous tasks in process @@ -355,7 +348,7 @@ class VimLcm(LcmBase): self.logger.debug(logging_text + "Exit Ok") return - except (ROclient.ROClientException, DbException) as e: + except (ROclient.ROClientException, DbException, asyncio.CancelledError) as e: self.logger.error(logging_text + "Exit Exception {}".format(e)) exc = e except Exception as e: @@ -367,11 +360,11 @@ class VimLcm(LcmBase): db_vim_update["_admin.operationalState"] = "ERROR" db_vim_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) # Mark the VIM 'delete' HA task as erroneous - operationState_HA = 'FAILED' - detailed_status_HA = "ERROR {}: {}".format(step, exc) - self.lcm_tasks.register_HA('vim', 'delete', op_id, - operationState=operationState_HA, - detailed_status=detailed_status_HA) + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) + self.lcm_tasks.unlock_HA('vim', 'delete', op_id, + operationState=operation_state, + detailed_status=operation_details) try: if db_vim and db_vim_update: self.update_db_2("vim_accounts", vim_id, db_vim_update) @@ -386,7 +379,7 @@ class WimLcm(LcmBase): # values that are encrypted at wim config because they are passwords wim_config_encrypted = () - def __init__(self, db, msg, fs, lcm_tasks, ro_config, loop): + def __init__(self, db, msg, fs, lcm_tasks, config, loop): """ Init, Connect to database, filesystem storage, and messaging :param config: two level dictionary with configuration. Top level should contain 'database', 'storage', @@ -396,7 +389,7 @@ class WimLcm(LcmBase): self.logger = logging.getLogger('lcm.vim') self.loop = loop self.lcm_tasks = lcm_tasks - self.ro_config = ro_config + self.ro_config = config["ro_config"] super().__init__(db, msg, fs, self.logger) @@ -410,15 +403,12 @@ class WimLcm(LcmBase): self.lcm_tasks.lock_HA('wim', 'create', op_id) wim_id = wim_content["_id"] - wim_content.pop("op_id", None) logging_text = "Task wim_create={} ".format(wim_id) self.logger.debug(logging_text + "Enter") db_wim = None db_wim_update = {} exc = None - operationState_HA = '' - detailed_status_HA = '' try: step = "Getting wim-id='{}' from db".format(wim_id) db_wim = self.db.get_one("wim_accounts", {"_id": wim_id}) @@ -469,13 +459,13 @@ class WimLcm(LcmBase): db_wim_update["_admin.operationalState"] = "ENABLED" db_wim_update["_admin.detailed-status"] = "Done" # Mark the WIM 'create' HA task as successful - operationState_HA = 'COMPLETED' - detailed_status_HA = 'Done' + operation_state = 'COMPLETED' + operation_details = 'Done' self.logger.debug(logging_text + "Exit Ok WIM account created at RO_wim_account_id={}".format(desc["uuid"])) return - except (ROclient.ROClientException, DbException) as e: + except (ROclient.ROClientException, DbException, asyncio.CancelledError) as e: self.logger.error(logging_text + "Exit Exception {}".format(e)) exc = e except Exception as e: @@ -486,16 +476,16 @@ class WimLcm(LcmBase): db_wim_update["_admin.operationalState"] = "ERROR" db_wim_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) # Mark the WIM 'create' HA task as erroneous - operationState_HA = 'FAILED' - detailed_status_HA = "ERROR {}: {}".format(step, exc) + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) try: if db_wim_update: self.update_db_2("wim_accounts", wim_id, db_wim_update) # Register the WIM 'create' HA task either # succesful or erroneous, or do nothing (if legacy NBI) - self.lcm_tasks.register_HA('wim', 'create', op_id, - operationState=operationState_HA, - detailed_status=detailed_status_HA) + self.lcm_tasks.unlock_HA('wim', 'create', op_id, + operationState=operation_state, + detailed_status=operation_details) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("wim_account", wim_id, order_id) @@ -510,7 +500,6 @@ class WimLcm(LcmBase): return wim_id = wim_content["_id"] - wim_content.pop("op_id", None) logging_text = "Task wim_edit={} ".format(wim_id) self.logger.debug(logging_text + "Enter") @@ -519,8 +508,6 @@ class WimLcm(LcmBase): RO_wim_id = None db_wim_update = {} step = "Getting wim-id='{}' from db".format(wim_id) - operationState_HA = '' - detailed_status_HA = '' try: # wait for any previous tasks in process await self.lcm_tasks.waitfor_related_HA('wim', 'edit', op_id) @@ -577,13 +564,13 @@ class WimLcm(LcmBase): await RO.edit("wim_account", RO_wim_id, descriptor=wim_account_RO) db_wim_update["_admin.operationalState"] = "ENABLED" # Mark the WIM 'edit' HA task as successful - operationState_HA = 'COMPLETED' - detailed_status_HA = 'Done' + operation_state = 'COMPLETED' + operation_details = 'Done' self.logger.debug(logging_text + "Exit Ok RO_wim_id={}".format(RO_wim_id)) return - except (ROclient.ROClientException, DbException) as e: + except (ROclient.ROClientException, DbException, asyncio.CancelledError) as e: self.logger.error(logging_text + "Exit Exception {}".format(e)) exc = e except Exception as e: @@ -594,16 +581,16 @@ class WimLcm(LcmBase): db_wim_update["_admin.operationalState"] = "ERROR" db_wim_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) # Mark the WIM 'edit' HA task as erroneous - operationState_HA = 'FAILED' - detailed_status_HA = "ERROR {}: {}".format(step, exc) + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) try: if db_wim_update: self.update_db_2("wim_accounts", wim_id, db_wim_update) # Register the WIM 'edit' HA task either # succesful or erroneous, or do nothing (if legacy NBI) - self.lcm_tasks.register_HA('wim', 'edit', op_id, - operationState=operationState_HA, - detailed_status=detailed_status_HA) + self.lcm_tasks.unlock_HA('wim', 'edit', op_id, + operationState=operation_state, + detailed_status=operation_details) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("wim_account", wim_id, order_id) @@ -625,8 +612,6 @@ class WimLcm(LcmBase): db_wim_update = {} exc = None step = "Getting wim from db" - operationState_HA = '' - detailed_status_HA = '' try: # wait for any previous tasks in process await self.lcm_tasks.waitfor_related_HA('wim', 'delete', op_id) @@ -660,7 +645,7 @@ class WimLcm(LcmBase): self.logger.debug(logging_text + "Exit Ok") return - except (ROclient.ROClientException, DbException) as e: + except (ROclient.ROClientException, DbException, asyncio.CancelledError) as e: self.logger.error(logging_text + "Exit Exception {}".format(e)) exc = e except Exception as e: @@ -672,11 +657,11 @@ class WimLcm(LcmBase): db_wim_update["_admin.operationalState"] = "ERROR" db_wim_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) # Mark the WIM 'delete' HA task as erroneous - operationState_HA = 'FAILED' - detailed_status_HA = "ERROR {}: {}".format(step, exc) - self.lcm_tasks.register_HA('wim', 'delete', op_id, - operationState=operationState_HA, - detailed_status=detailed_status_HA) + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) + self.lcm_tasks.unlock_HA('wim', 'delete', op_id, + operationState=operation_state, + detailed_status=operation_details) try: if db_wim and db_wim_update: self.update_db_2("wim_accounts", wim_id, db_wim_update) @@ -689,7 +674,7 @@ class WimLcm(LcmBase): class SdnLcm(LcmBase): - def __init__(self, db, msg, fs, lcm_tasks, ro_config, loop): + def __init__(self, db, msg, fs, lcm_tasks, config, loop): """ Init, Connect to database, filesystem storage, and messaging :param config: two level dictionary with configuration. Top level should contain 'database', 'storage', @@ -699,7 +684,7 @@ class SdnLcm(LcmBase): self.logger = logging.getLogger('lcm.sdn') self.loop = loop self.lcm_tasks = lcm_tasks - self.ro_config = ro_config + self.ro_config = config["ro_config"] super().__init__(db, msg, fs, self.logger) @@ -713,7 +698,6 @@ class SdnLcm(LcmBase): self.lcm_tasks.lock_HA('sdn', 'create', op_id) sdn_id = sdn_content["_id"] - sdn_content.pop("op_id", None) logging_text = "Task sdn_create={} ".format(sdn_id) self.logger.debug(logging_text + "Enter") @@ -721,8 +705,6 @@ class SdnLcm(LcmBase): db_sdn_update = {} RO_sdn_id = None exc = None - operationState_HA = '' - detailed_status_HA = '' try: step = "Getting sdn from db" db_sdn = self.db.get_one("sdns", {"_id": sdn_id}) @@ -748,11 +730,11 @@ class SdnLcm(LcmBase): db_sdn_update["_admin.operationalState"] = "ENABLED" self.logger.debug(logging_text + "Exit Ok RO_sdn_id={}".format(RO_sdn_id)) # Mark the SDN 'create' HA task as successful - operationState_HA = 'COMPLETED' - detailed_status_HA = 'Done' + operation_state = 'COMPLETED' + operation_details = 'Done' return - except (ROclient.ROClientException, DbException) as e: + except (ROclient.ROClientException, DbException, asyncio.CancelledError) as e: self.logger.error(logging_text + "Exit Exception {}".format(e)) exc = e except Exception as e: @@ -763,16 +745,16 @@ class SdnLcm(LcmBase): db_sdn_update["_admin.operationalState"] = "ERROR" db_sdn_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) # Mark the SDN 'create' HA task as erroneous - operationState_HA = 'FAILED' - detailed_status_HA = "ERROR {}: {}".format(step, exc) + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) try: if db_sdn and db_sdn_update: self.update_db_2("sdns", sdn_id, db_sdn_update) # Register the SDN 'create' HA task either # succesful or erroneous, or do nothing (if legacy NBI) - self.lcm_tasks.register_HA('sdn', 'create', op_id, - operationState=operationState_HA, - detailed_status=detailed_status_HA) + self.lcm_tasks.unlock_HA('sdn', 'create', op_id, + operationState=operation_state, + detailed_status=operation_details) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("sdn", sdn_id, order_id) @@ -787,15 +769,12 @@ class SdnLcm(LcmBase): return sdn_id = sdn_content["_id"] - sdn_content.pop("op_id", None) logging_text = "Task sdn_edit={} ".format(sdn_id) self.logger.debug(logging_text + "Enter") db_sdn = None db_sdn_update = {} exc = None - operationState_HA = '' - detailed_status_HA = '' step = "Getting sdn from db" try: # wait for any previous tasks in process @@ -819,13 +798,13 @@ class SdnLcm(LcmBase): await RO.edit("sdn", RO_sdn_id, descriptor=sdn_RO) db_sdn_update["_admin.operationalState"] = "ENABLED" # Mark the SDN 'edit' HA task as successful - operationState_HA = 'COMPLETED' - detailed_status_HA = 'Done' + operation_state = 'COMPLETED' + operation_details = 'Done' self.logger.debug(logging_text + "Exit Ok RO_sdn_id={}".format(RO_sdn_id)) return - except (ROclient.ROClientException, DbException) as e: + except (ROclient.ROClientException, DbException, asyncio.CancelledError) as e: self.logger.error(logging_text + "Exit Exception {}".format(e)) exc = e except Exception as e: @@ -836,16 +815,16 @@ class SdnLcm(LcmBase): db_sdn["_admin.operationalState"] = "ERROR" db_sdn["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) # Mark the SDN 'edit' HA task as erroneous - operationState_HA = 'FAILED' - detailed_status_HA = "ERROR {}: {}".format(step, exc) + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) try: if db_sdn_update: self.update_db_2("sdns", sdn_id, db_sdn_update) # Register the SDN 'edit' HA task either # succesful or erroneous, or do nothing (if legacy NBI) - self.lcm_tasks.register_HA('sdn', 'edit', op_id, - operationState=operationState_HA, - detailed_status=detailed_status_HA) + self.lcm_tasks.unlock_HA('sdn', 'edit', op_id, + operationState=operation_state, + detailed_status=operation_details) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("sdn", sdn_id, order_id) @@ -866,8 +845,6 @@ class SdnLcm(LcmBase): db_sdn = None db_sdn_update = {} exc = None - operationState_HA = '' - detailed_status_HA = '' step = "Getting sdn from db" try: # wait for any previous tasks in process @@ -893,7 +870,7 @@ class SdnLcm(LcmBase): self.logger.debug("sdn_delete task sdn_id={} Exit Ok".format(sdn_id)) return - except (ROclient.ROClientException, DbException) as e: + except (ROclient.ROClientException, DbException, asyncio.CancelledError) as e: self.logger.error(logging_text + "Exit Exception {}".format(e)) exc = e except Exception as e: @@ -904,11 +881,11 @@ class SdnLcm(LcmBase): db_sdn["_admin.operationalState"] = "ERROR" db_sdn["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) # Mark the SDN 'delete' HA task as erroneous - operationState_HA = 'FAILED' - detailed_status_HA = "ERROR {}: {}".format(step, exc) - self.lcm_tasks.register_HA('sdn', 'delete', op_id, - operationState=operationState_HA, - detailed_status=detailed_status_HA) + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) + self.lcm_tasks.unlock_HA('sdn', 'delete', op_id, + operationState=operation_state, + detailed_status=operation_details) try: if db_sdn and db_sdn_update: self.update_db_2("sdns", sdn_id, db_sdn_update) @@ -920,8 +897,9 @@ class SdnLcm(LcmBase): class K8sClusterLcm(LcmBase): + timeout_create = 300 - def __init__(self, db, msg, fs, lcm_tasks, vca_config, loop): + def __init__(self, db, msg, fs, lcm_tasks, config, loop): """ Init, Connect to database, filesystem storage, and messaging :param config: two level dictionary with configuration. Top level should contain 'database', 'storage', @@ -931,7 +909,7 @@ class K8sClusterLcm(LcmBase): self.logger = logging.getLogger('lcm.k8scluster') self.loop = loop self.lcm_tasks = lcm_tasks - self.vca_config = vca_config + self.vca_config = config["VCA"] self.fs = fs self.db = db @@ -950,32 +928,30 @@ class K8sClusterLcm(LcmBase): fs=self.fs, log=self.logger, db=self.db, - on_update_db=None + loop=self.loop, + on_update_db=None, + vca_config=self.vca_config, ) + self.k8s_map = { + "helm-chart": self.helm_k8scluster, + "juju-bundle": self.juju_k8scluster, + } super().__init__(db, msg, fs, self.logger) async def create(self, k8scluster_content, order_id): - # HA tasks and backward compatibility: - # If 'vim_content' does not include 'op_id', we a running a legacy NBI version. - # In such a case, HA is not supported by NBI, 'op_id' is None, and lock_HA() will do nothing. - # Register 'create' task here for related future HA operations op_id = k8scluster_content.pop('op_id', None) if not self.lcm_tasks.lock_HA('k8scluster', 'create', op_id): return k8scluster_id = k8scluster_content["_id"] - k8scluster_content.pop("op_id", None) logging_text = "Task k8scluster_create={} ".format(k8scluster_id) self.logger.debug(logging_text + "Enter") db_k8scluster = None db_k8scluster_update = {} - exc = None - operationState_HA = '' - detailed_status_HA = '' try: step = "Getting k8scluster-id='{}' from db".format(k8scluster_id) self.logger.debug(logging_text + step) @@ -983,87 +959,97 @@ class K8sClusterLcm(LcmBase): self.db.encrypt_decrypt_fields(db_k8scluster.get("credentials"), 'decrypt', ['password', 'secret'], schema_version=db_k8scluster["schema_version"], salt=db_k8scluster["_id"]) k8s_credentials = yaml.safe_dump(db_k8scluster.get("credentials")) - error_text_list = [] - # helm-chart - k8s_hc_id = None - try: - k8s_hc_id, uninstall_sw = await self.helm_k8scluster.init_env(k8s_credentials) - db_k8scluster_update["_admin.helm-chart.id"] = k8s_hc_id - db_k8scluster_update["_admin.helm-chart.created"] = uninstall_sw - except Exception as e: - error_text_list.append("Failing init helm-chart: {}".format(e)) - db_k8scluster_update["_admin.helm-chart.error_msg"] = str(e) - if isinstance(e, K8sException): - self.logger.error(logging_text + "Failing init helm-chart: {}".format(e)) - else: - self.logger.error(logging_text + "Failing init helm-chart: {}".format(e), exc_info=True) + pending_tasks = [] + task2name = {} + init_target = deep_get(db_k8scluster, ("_admin", "init")) + step = "Launching k8scluster init tasks" + for task_name in ("helm-chart", "juju-bundle"): + if init_target and task_name not in init_target: + continue + task = asyncio.ensure_future(self.k8s_map[task_name].init_env(k8s_credentials, + reuse_cluster_uuid=k8scluster_id)) + pending_tasks.append(task) + task2name[task] = task_name - # Juju/k8s cluster - k8s_jb_id = None - try: - k8s_jb_id, uninstall_sw = await self.juju_k8scluster.init_env(k8s_credentials) - db_k8scluster_update["_admin.juju-bundle.id"] = k8s_jb_id - db_k8scluster_update["_admin.juju-bundle.created"] = uninstall_sw - except Exception as e: - error_text_list.append("Failing init juju-bundle: {}".format(e)) - db_k8scluster_update["_admin.juju-bundle.error_msg"] = str(e) - if isinstance(e, N2VCException): - self.logger.error(logging_text + "Failing init juju-bundle: {}".format(e)) - else: - self.logger.error(logging_text + "Failing init juju-bundle: {}".format(e), exc_info=True) - - step = "Getting the list of repos" - if k8s_hc_id: - self.logger.debug(logging_text + step) - task_list = [] - db_k8srepo_list = self.db.get_list("k8srepos", {"type": "helm-chart"}) - for repo in db_k8srepo_list: - step = "Adding repo {} to cluster: {}".format(repo["name"], k8s_hc_id) - self.logger.debug(logging_text + step) - task = asyncio.ensure_future(self.helm_k8scluster.repo_add(cluster_uuid=k8s_hc_id, - name=repo["name"], url=repo["url"], - repo_type="chart")) - task_list.append(task) - repo_k8scluster_list = deep_get(repo, ("_admin", "cluster-inserted")) or [] - repo_k8scluster_list.append(k8s_hc_id) - self.update_db_2("k8srepos", repo["_id"], {"_admin.cluster-inserted": repo_k8scluster_list}) - - if task_list: - self.logger.debug(logging_text + 'Waiting for terminate tasks of repo_add') - done, pending = await asyncio.wait(task_list, timeout=3600) - if pending: - self.logger.error(logging_text + 'There are pending tasks: {}'.format(pending)) - - # mark as an error if both helm-chart and juju-bundle have been failed - if k8s_hc_id or k8s_jb_id: - db_k8scluster_update["_admin.operationalState"] = "ENABLED" + error_text_list = [] + tasks_name_ok = [] + reached_timeout = False + now = time() + + while pending_tasks: + _timeout = max(1, self.timeout_create - (time() - now)) # ensure not negative with max + step = "Waiting for k8scluster init tasks" + done, pending_tasks = await asyncio.wait(pending_tasks, timeout=_timeout, + return_when=asyncio.FIRST_COMPLETED) + if not done: + # timeout. Set timeout is reached and process pending as if they hase been finished + done = pending_tasks + pending_tasks = None + reached_timeout = True + for task in done: + task_name = task2name[task] + if reached_timeout: + exc = "Timeout" + elif task.cancelled(): + exc = "Cancelled" + else: + exc = task.exception() + + if exc: + error_text_list.append("Failing init {}: {}".format(task_name, exc)) + db_k8scluster_update["_admin.{}.error_msg".format(task_name)] = str(exc) + db_k8scluster_update["_admin.{}.id".format(task_name)] = None + db_k8scluster_update["_admin.{}.operationalState".format(task_name)] = "ERROR" + self.logger.error(logging_text + "{} init fail: {}".format(task_name, exc), + exc_info=not isinstance(exc, (N2VCException, str))) + else: + k8s_id, uninstall_sw = task.result() + tasks_name_ok.append(task_name) + self.logger.debug(logging_text + "{} init success. id={} created={}".format( + task_name, k8s_id, uninstall_sw)) + db_k8scluster_update["_admin.{}.error_msg".format(task_name)] = None + db_k8scluster_update["_admin.{}.id".format(task_name)] = k8s_id + db_k8scluster_update["_admin.{}.created".format(task_name)] = uninstall_sw + db_k8scluster_update["_admin.{}.operationalState".format(task_name)] = "ENABLED" + # update database + step = "Updating database for " + task_name + self.update_db_2("k8sclusters", k8scluster_id, db_k8scluster_update) + if tasks_name_ok: + operation_details = "ready for " + ", ".join(tasks_name_ok) + operation_state = "COMPLETED" + db_k8scluster_update["_admin.operationalState"] = "ENABLED" if not error_text_list else "DEGRADED" + operation_details += "; " + ";".join(error_text_list) else: db_k8scluster_update["_admin.operationalState"] = "ERROR" - db_k8scluster_update["_admin.detailed-status"] = ";".join(error_text_list) + operation_state = "FAILED" + operation_details = ";".join(error_text_list) + db_k8scluster_update["_admin.detailed-status"] = operation_details + self.logger.debug(logging_text + "Done. Result: " + operation_state) + exc = None except Exception as e: - self.logger.critical(logging_text + "Exit Exception {}".format(e), exc_info=True) + if isinstance(e, (LcmException, DbException, K8sException, N2VCException, asyncio.CancelledError)): + self.logger.error(logging_text + "Exit Exception {}".format(e)) + else: + self.logger.critical(logging_text + "Exit Exception {}".format(e), exc_info=True) exc = e finally: if exc and db_k8scluster: db_k8scluster_update["_admin.operationalState"] = "ERROR" db_k8scluster_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) - - # Mark the k8scluster 'create' HA task as erroneous - operationState_HA = 'FAILED' - detailed_status_HA = "ERROR {}: {}".format(step, exc) + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) try: - if db_k8scluster_update: + if db_k8scluster and db_k8scluster_update: self.update_db_2("k8sclusters", k8scluster_id, db_k8scluster_update) - # Register the K8scluster 'create' HA task either - # succesful or erroneous, or do nothing (if legacy NBI) - self.lcm_tasks.register_HA('k8scluster', 'create', op_id, - operationState=operationState_HA, - detailed_status=detailed_status_HA) + # Register the operation and unlock + self.lcm_tasks.unlock_HA('k8scluster', 'create', op_id, + operationState=operation_state, + detailed_status=operation_details) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) - self.lcm_tasks.remove("k8sclusters", k8scluster_id, order_id) + self.lcm_tasks.remove("k8scluster", k8scluster_id, order_id) async def delete(self, k8scluster_content, order_id): @@ -1076,15 +1062,12 @@ class K8sClusterLcm(LcmBase): return k8scluster_id = k8scluster_content["_id"] - k8scluster_content.pop("op_id", None) logging_text = "Task k8scluster_delete={} ".format(k8scluster_id) self.logger.debug(logging_text + "Enter") db_k8scluster = None db_k8scluster_update = {} exc = None - operationState_HA = '' - detailed_status_HA = '' try: step = "Getting k8scluster='{}' from db".format(k8scluster_id) self.logger.debug(logging_text + step) @@ -1092,16 +1075,22 @@ class K8sClusterLcm(LcmBase): k8s_hc_id = deep_get(db_k8scluster, ("_admin", "helm-chart", "id")) k8s_jb_id = deep_get(db_k8scluster, ("_admin", "juju-bundle", "id")) - uninstall_sw = deep_get(db_k8scluster, ("_admin", "helm-chart", "created")) cluster_removed = True + if k8s_jb_id: # delete in reverse order of creation + step = "Removing juju-bundle '{}'".format(k8s_jb_id) + uninstall_sw = deep_get(db_k8scluster, ("_admin", "juju-bundle", "created")) or False + cluster_removed = await self.juju_k8scluster.reset(cluster_uuid=k8s_jb_id, uninstall_sw=uninstall_sw) + db_k8scluster_update["_admin.juju-bundle.id"] = None + db_k8scluster_update["_admin.juju-bundle.operationalState"] = "DISABLED" + if k8s_hc_id: - uninstall_sw = uninstall_sw or False + step = "Removing helm-chart '{}'".format(k8s_hc_id) + uninstall_sw = deep_get(db_k8scluster, ("_admin", "helm-chart", "created")) or False cluster_removed = await self.helm_k8scluster.reset(cluster_uuid=k8s_hc_id, uninstall_sw=uninstall_sw) + db_k8scluster_update["_admin.helm-chart.id"] = None + db_k8scluster_update["_admin.helm-chart.operationalState"] = "DISABLED" - if k8s_jb_id: - uninstall_sw = uninstall_sw or False - cluster_removed = await self.juju_k8scluster.reset(cluster_uuid=k8s_jb_id, uninstall_sw=uninstall_sw) - + # Try to remove from cluster_inserted to clean old versions if k8s_hc_id and cluster_removed: step = "Removing k8scluster='{}' from k8srepos".format(k8scluster_id) self.logger.debug(logging_text + step) @@ -1113,38 +1102,43 @@ class K8sClusterLcm(LcmBase): self.update_db_2("k8srepos", k8srepo["_id"], {"_admin.cluster-inserted": cluster_list}) except Exception as e: self.logger.error("{}: {}".format(step, e)) - self.db.del_one("k8sclusters", {"_id": k8scluster_id}) - else: - raise LcmException("An error happened during the reset of the k8s cluster '{}'".format(k8scluster_id)) - # if not cluster_removed: - # raise Exception("K8scluster was not properly removed") + self.db.del_one("k8sclusters", {"_id": k8scluster_id}) + db_k8scluster_update = None + self.logger.debug(logging_text + "Done") except Exception as e: - self.logger.critical(logging_text + "Exit Exception {}".format(e), exc_info=True) + if isinstance(e, (LcmException, DbException, K8sException, N2VCException, asyncio.CancelledError)): + self.logger.error(logging_text + "Exit Exception {}".format(e)) + else: + self.logger.critical(logging_text + "Exit Exception {}".format(e), exc_info=True) exc = e finally: if exc and db_k8scluster: db_k8scluster_update["_admin.operationalState"] = "ERROR" db_k8scluster_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) # Mark the WIM 'create' HA task as erroneous - operationState_HA = 'FAILED' - detailed_status_HA = "ERROR {}: {}".format(step, exc) + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) + else: + operation_state = 'COMPLETED' + operation_details = "deleted" + try: if db_k8scluster_update: self.update_db_2("k8sclusters", k8scluster_id, db_k8scluster_update) # Register the K8scluster 'delete' HA task either # succesful or erroneous, or do nothing (if legacy NBI) - self.lcm_tasks.register_HA('k8scluster', 'delete', op_id, - operationState=operationState_HA, - detailed_status=detailed_status_HA) + self.lcm_tasks.unlock_HA('k8scluster', 'delete', op_id, + operationState=operation_state, + detailed_status=operation_details) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) - self.lcm_tasks.remove("k8sclusters", k8scluster_id, order_id) + self.lcm_tasks.remove("k8scluster", k8scluster_id, order_id) class K8sRepoLcm(LcmBase): - def __init__(self, db, msg, fs, lcm_tasks, vca_config, loop): + def __init__(self, db, msg, fs, lcm_tasks, config, loop): """ Init, Connect to database, filesystem storage, and messaging :param config: two level dictionary with configuration. Top level should contain 'database', 'storage', @@ -1154,7 +1148,7 @@ class K8sRepoLcm(LcmBase): self.logger = logging.getLogger('lcm.k8srepo') self.loop = loop self.lcm_tasks = lcm_tasks - self.vca_config = vca_config + self.vca_config = config["VCA"] self.fs = fs self.db = db @@ -1187,56 +1181,33 @@ class K8sRepoLcm(LcmBase): db_k8srepo = None db_k8srepo_update = {} exc = None - operationState_HA = '' - detailed_status_HA = '' + operation_state = 'COMPLETED' + operation_details = '' try: step = "Getting k8srepo-id='{}' from db".format(k8srepo_id) self.logger.debug(logging_text + step) db_k8srepo = self.db.get_one("k8srepos", {"_id": k8srepo_id}) - step = "Getting k8scluster_list from db" - self.logger.debug(logging_text + step) - db_k8scluster_list = self.db.get_list("k8sclusters", {}) - db_k8srepo_update["_admin.cluster-inserted"] = [] - task_list = [] - for k8scluster in db_k8scluster_list: - hc_id = deep_get(k8scluster, ("_admin", "helm-chart", "id")) - if hc_id: - step = "Adding repo to cluster: {}".format(hc_id) - self.logger.debug(logging_text + step) - task = asyncio.ensure_future(self.k8srepo.repo_add(cluster_uuid=hc_id, - name=db_k8srepo["name"], url=db_k8srepo["url"], - repo_type="chart")) - task_list.append(task) - db_k8srepo_update["_admin.cluster-inserted"].append(hc_id) - - done = None - pending = None - if len(task_list) > 0: - self.logger.debug('Waiting for terminate pending tasks...') - done, pending = await asyncio.wait(task_list, timeout=3600) - if not pending: - self.logger.debug('All tasks finished...') - else: - self.logger.info('There are pending tasks: {}'.format(pending)) db_k8srepo_update["_admin.operationalState"] = "ENABLED" except Exception as e: - self.logger.critical(logging_text + "Exit Exception {}".format(e), exc_info=True) + self.logger.error(logging_text + "Exit Exception {}".format(e), + exc_info=not isinstance(e, (LcmException, DbException, K8sException, N2VCException, + asyncio.CancelledError))) exc = e finally: if exc and db_k8srepo: db_k8srepo_update["_admin.operationalState"] = "ERROR" db_k8srepo_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) # Mark the WIM 'create' HA task as erroneous - operationState_HA = 'FAILED' - detailed_status_HA = "ERROR {}: {}".format(step, exc) + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) try: if db_k8srepo_update: self.update_db_2("k8srepos", k8srepo_id, db_k8srepo_update) # Register the K8srepo 'create' HA task either # succesful or erroneous, or do nothing (if legacy NBI) - self.lcm_tasks.register_HA('k8srepo', 'create', op_id, - operationState=operationState_HA, - detailed_status=detailed_status_HA) + self.lcm_tasks.unlock_HA('k8srepo', 'create', op_id, + operationState=operation_state, + detailed_status=operation_details) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("k8srepo", k8srepo_id, order_id) @@ -1258,52 +1229,35 @@ class K8sRepoLcm(LcmBase): db_k8srepo = None db_k8srepo_update = {} - operationState_HA = '' - detailed_status_HA = '' + exc = None + operation_state = 'COMPLETED' + operation_details = '' try: step = "Getting k8srepo-id='{}' from db".format(k8srepo_id) self.logger.debug(logging_text + step) db_k8srepo = self.db.get_one("k8srepos", {"_id": k8srepo_id}) - step = "Getting k8scluster_list from db" - self.logger.debug(logging_text + step) - db_k8scluster_list = self.db.get_list("k8sclusters", {}) - - task_list = [] - for k8scluster in db_k8scluster_list: - hc_id = deep_get(k8scluster, ("_admin", "helm-chart", "id")) - if hc_id: - task = asyncio.ensure_future(self.k8srepo.repo_remove(cluster_uuid=hc_id, - name=db_k8srepo["name"])) - task_list.append(task) - done = None - pending = None - if len(task_list) > 0: - self.logger.debug('Waiting for terminate pending tasks...') - done, pending = await asyncio.wait(task_list, timeout=3600) - if not pending: - self.logger.debug('All tasks finished...') - else: - self.logger.info('There are pending tasks: {}'.format(pending)) - self.db.del_one("k8srepos", {"_id": k8srepo_id}) except Exception as e: - self.logger.critical(logging_text + "Exit Exception {}".format(e), exc_info=True) + self.logger.error(logging_text + "Exit Exception {}".format(e), + exc_info=not isinstance(e, (LcmException, DbException, K8sException, N2VCException, + asyncio.CancelledError))) exc = e finally: if exc and db_k8srepo: db_k8srepo_update["_admin.operationalState"] = "ERROR" db_k8srepo_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) # Mark the WIM 'create' HA task as erroneous - operationState_HA = 'FAILED' - detailed_status_HA = "ERROR {}: {}".format(step, exc) + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) try: if db_k8srepo_update: self.update_db_2("k8srepos", k8srepo_id, db_k8srepo_update) # Register the K8srepo 'delete' HA task either # succesful or erroneous, or do nothing (if legacy NBI) - self.lcm_tasks.register_HA('k8srepo', 'delete', op_id, - operationState=operationState_HA, - detailed_status=detailed_status_HA) + self.lcm_tasks.unlock_HA('k8srepo', 'delete', op_id, + operationState=operation_state, + detailed_status=operation_details) + self.db.del_one("k8srepos", {"_id": k8srepo_id}) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("k8srepo", k8srepo_id, order_id) diff --git a/requirements.txt b/requirements.txt index ea2d1c2..32a09a9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,10 @@ ## pyyaml -aiohttp==0.20.2 +aiohttp>=2.3.10 jinja2 +grpcio-tools +grpclib git+https://osm.etsi.org/gerrit/osm/common.git#egg=osm-common git+https://osm.etsi.org/gerrit/osm/N2VC.git#egg=n2vc + diff --git a/setup.py b/setup.py index b4d614a..a2d8a1b 100644 --- a/setup.py +++ b/setup.py @@ -45,17 +45,15 @@ setup( # data_files=[('/etc/osm/', ['osm_lcm/lcm.cfg']), # ('/etc/systemd/system/', ['osm_lcm/osm-lcm.service']), # ], - dependency_links=[ - 'git+https://osm.etsi.org/gerrit/osm/common.git#egg=osm-common', - 'git+https://osm.etsi.org/gerrit/osm/N2VC.git#egg=n2vc', - ], install_requires=[ # 'pymongo', 'PyYAML', - 'aiohttp==0.20.2', - 'osm-common', - 'n2vc', + 'aiohttp>=2.3.10', + 'osm-common @ git+https://osm.etsi.org/gerrit/osm/common.git#egg=osm-common', + 'n2vc @ git+https://osm.etsi.org/gerrit/osm/N2VC.git#egg=n2vc', 'jinja2', + 'grpcio-tools', + 'grpclib', # TODO this is version installed by 'apt python3-aiohttp' on Ubuntu Sserver 14.04 # version installed by pip 3.3.2 is not compatible. Code should be migrated to this version and use pip3 ], diff --git a/stdeb.cfg b/stdeb.cfg index 36d1fb0..2794a1f 100644 --- a/stdeb.cfg +++ b/stdeb.cfg @@ -1,3 +1,20 @@ +## +# Copyright 2020 Telefonica Investigacion y Desarrollo, S.A.U. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +## [DEFAULT] -X-Python3-Version : >= 3.5 -Depends3: python3-osm-common, python3-n2vc, python3-yaml, python3-aiohttp, python3-jinja2 +X-Python3-Version : >= 3.6 +Depends3: python3-osm-common, python3-n2vc, python3-yaml, python3-aiohttp, python3-jinja2, python3-pip diff --git a/tox.ini b/tox.ini index de33e96..7e5cccf 100644 --- a/tox.ini +++ b/tox.ini @@ -14,25 +14,41 @@ # limitations under the License. [tox] -envlist = flake8, unittest -toxworkdir={homedir}/.tox +envlist = cover, flake8, unittest [testenv] +usedevelop = True basepython = python3 install_command = python3 -m pip install -r requirements.txt -U {opts} {packages} -deps = -r{toxinidir}/test-requirements.txt -commands=python3 -m unittest discover -v + +[testenv:cover] +basepython = python3 +deps = + nose2 + nose2-cov + coverage + -rrequirements.txt + -rtest-requirements.txt +commands = + coverage erase + nose2 -C --coverage osm_lcm --plugin nose2.plugins.junitxml -s osm_lcm/tests + coverage report --omit='*tests*' + coverage html -d ./cover --omit='*tests*' + coverage xml -o coverage.xml --omit='*tests*' [testenv:flake8] basepython = python3 -deps = flake8 +deps = + flake8 + -rrequirements.txt commands = flake8 osm_lcm --max-line-length 120 \ - --exclude .svn,CVS,.gz,.git,__pycache__,.tox,local,temp --ignore W291,W293,E226,W504 + --exclude .svn,CVS,.gz,.git,__pycache__,.tox,local,temp,frontend_grpc.py,frontend_pb2.py \ + --ignore W291,W293,E226,W504 [testenv:unittest] basepython = python3 deps = asynctest -commands = python3 -m unittest osm_lcm.tests.test_ns +commands = python3 -m unittest discover osm_lcm/tests -v [testenv:build] basepython = python3 -- 2.17.1