fix 1201: for a KDU with configuration, wait for KDU up instead of deployed at RO
[osm/LCM.git] / osm_lcm / lcm.py
index 3b8da7d..eaf2558 100644 (file)
 # under the License.
 ##
 
+
+# DEBUG WITH PDB
+import os
+import pdb
+
 import asyncio
 import yaml
 import logging
@@ -24,12 +29,15 @@ import logging.handlers
 import getopt
 import sys
 
-from osm_lcm import ROclient, ns, vim_sdn, netslice
-from time import time, sleep
+from osm_lcm import ns, prometheus, vim_sdn, netslice
+from osm_lcm.ng_ro import NgRoException, NgRoClient
+from osm_lcm.ROclient import ROClient, ROClientException
+
+from time import time
 from osm_lcm.lcm_utils import versiontuple, LcmException, TaskRegistry, LcmExceptionExit
 from osm_lcm import version as lcm_version, version_date as lcm_version_date
 
-from osm_common import dbmemory, dbmongo, fslocal, msglocal, msgkafka
+from osm_common import dbmemory, dbmongo, fslocal, fsmongo, msglocal, msgkafka
 from osm_common import version as common_version
 from osm_common.dbbase import DbException
 from osm_common.fsbase import FsException
@@ -38,14 +46,15 @@ from os import environ, path
 from random import choice as random_choice
 from n2vc import version as n2vc_version
 
+if os.getenv('OSMLCM_PDB_DEBUG', None) is not None:
+    pdb.set_trace()
+
 
 __author__ = "Alfonso Tierno"
-min_RO_version = "0.6.3"
+min_RO_version = "6.0.2"
 min_n2vc_version = "0.0.2"
+
 min_common_version = "0.1.19"
-# uncomment if LCM is installed as library and installed, and get them from __init__.py
-# lcm_version = '0.1.41'
-# lcm_version_date = '2019-06-19'
 health_check_file = path.expanduser("~") + "/time_last_ping"   # TODO find better location for this file
 
 
@@ -53,6 +62,8 @@ class Lcm:
 
     ping_interval_pace = 120  # how many time ping is send once is confirmed all is running
     ping_interval_boot = 5    # how many time ping is sent when booting
+    cfg_logger_name = {"message": "lcm.msg", "database": "lcm.db", "storage": "lcm.fs", "tsdb": "lcm.prometheus"}
+    # ^ contains for each section at lcm.cfg the used logger name
 
     def __init__(self, config_file, loop=None):
         """
@@ -60,7 +71,6 @@ class Lcm:
         :param config: two level dictionary with configuration. Top level should contain 'database', 'storage',
         :return: None
         """
-
         self.db = None
         self.msg = None
         self.msg_admin = None
@@ -76,14 +86,19 @@ class Lcm:
         # load configuration
         config = self.read_config_file(config_file)
         self.config = config
-        self.ro_config = {
-            "endpoint_url": "http://{}:{}/openmano".format(config["RO"]["host"], config["RO"]["port"]),
+        self.config["ro_config"] = {
+            "ng": config["RO"].get("ng", False),
+            "uri": config["RO"].get("uri"),
             "tenant": config.get("tenant", "osm"),
-            "logger_name": "lcm.ROclient",
-            "loglevel": "ERROR",
+            "logger_name": "lcm.roclient",
+            "loglevel": config["RO"].get("loglevel", "ERROR"),
         }
-
-        self.vca_config = config["VCA"]
+        if not self.config["ro_config"]["uri"]:
+            if not self.config["ro_config"]["ng"]:
+                self.config["ro_config"]["uri"] = "http://{}:{}/openmano".format(config["RO"]["host"],
+                                                                                 config["RO"]["port"])
+            else:
+                self.config["ro_config"]["uri"] = "http://{}:{}/ro".format(config["RO"]["host"], config["RO"]["port"])
 
         self.loop = loop or asyncio.get_event_loop()
 
@@ -107,7 +122,7 @@ class Lcm:
             self.logger.setLevel(config["global"]["loglevel"])
 
         # logging other modules
-        for k1, logname in {"message": "lcm.msg", "database": "lcm.db", "storage": "lcm.fs"}.items():
+        for k1, logname in self.cfg_logger_name.items():
             config[k1]["logger_name"] = logname
             logger_module = logging.getLogger(logname)
             if config[k1].get("logfile"):
@@ -145,10 +160,14 @@ class Lcm:
             if config["storage"]["driver"] == "local":
                 self.fs = fslocal.FsLocal()
                 self.fs.fs_connect(config["storage"])
+            elif config["storage"]["driver"] == "mongo":
+                self.fs = fsmongo.FsMongo()
+                self.fs.fs_connect(config["storage"])
             else:
                 raise LcmException("Invalid configuration param '{}' at '[storage]':'driver'".format(
                     config["storage"]["driver"]))
 
+            # copy message configuration in order to remove 'group_id' for msg_admin
             config_message = config["message"].copy()
             config_message["loop"] = self.loop
             if config_message["driver"] == "local":
@@ -173,25 +192,48 @@ class Lcm:
         # contains created tasks/futures to be able to cancel
         self.lcm_tasks = TaskRegistry(self.worker_id, self.db, self.logger)
 
-        self.ns = ns.NsLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, self.vca_config, self.loop)
-        self.netslice = netslice.NetsliceLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, 
-                                             self.vca_config, self.loop)
-        self.vim = vim_sdn.VimLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, self.loop)
-        self.wim = vim_sdn.WimLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, self.loop)
-        self.sdn = vim_sdn.SdnLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, self.loop)
+        if self.config.get("tsdb") and self.config["tsdb"].get("driver"):
+            if self.config["tsdb"]["driver"] == "prometheus":
+                self.prometheus = prometheus.Prometheus(self.config["tsdb"], self.worker_id, self.db, self.loop)
+            else:
+                raise LcmException("Invalid configuration param '{}' at '[tsdb]':'driver'".format(
+                    config["tsdb"]["driver"]))
+        else:
+            self.prometheus = None
+        self.ns = ns.NsLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.config, self.loop, self.prometheus)
+        self.netslice = netslice.NetsliceLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.config, self.loop,
+                                             self.ns)
+        self.vim = vim_sdn.VimLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.config, self.loop)
+        self.wim = vim_sdn.WimLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.config, self.loop)
+        self.sdn = vim_sdn.SdnLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.config, self.loop)
+        self.k8scluster = vim_sdn.K8sClusterLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.config, self.loop)
+        self.k8srepo = vim_sdn.K8sRepoLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.config, self.loop)
 
     async def check_RO_version(self):
-        try:
-            RO = ROclient.ROClient(self.loop, **self.ro_config)
-            RO_version = await RO.get_version()
-            if versiontuple(RO_version) < versiontuple(min_RO_version):
-                raise LcmException("Not compatible osm/RO version '{}.{}.{}'. Needed '{}.{}.{}' or higher".format(
-                    *RO_version, *min_RO_version
-                ))
-        except ROclient.ROClientException as e:
-            error_text = "Error while conneting to osm/RO " + str(e)
-            self.logger.critical(error_text, exc_info=True)
-            raise LcmException(error_text)
+        tries = 14
+        last_error = None
+        while True:
+            try:
+                if self.config["ro_config"].get("ng"):
+                    ro_server = NgRoClient(self.loop, **self.config["ro_config"])
+                else:
+                    ro_server = ROClient(self.loop, **self.config["ro_config"])
+                ro_version = await ro_server.get_version()
+                if versiontuple(ro_version) < versiontuple(min_RO_version):
+                    raise LcmException("Not compatible osm/RO version '{}'. Needed '{}' or higher".format(
+                        ro_version, min_RO_version))
+                self.logger.info("Connected to RO version {}".format(ro_version))
+                return
+            except (ROClientException, NgRoException) as e:
+                tries -= 1
+                error_text = "Error while connecting to RO on {}: {}".format(self.config["ro_config"]["uri"], e)
+                if tries <= 0:
+                    self.logger.critical(error_text)
+                    raise LcmException(error_text)
+                if last_error != error_text:
+                    last_error = error_text
+                    self.logger.error(error_text + ". Waiting until {} seconds".format(5*tries))
+                await asyncio.sleep(5)
 
     async def test(self, param=None):
         self.logger.debug("Starting/Ending test task: {}".format(param))
@@ -263,6 +305,33 @@ class Lcm:
                 except Exception as e:
                     self.logger.error("Cannot write into '{}' for healthcheck: {}".format(health_check_file, e))
             return
+        elif topic == "pla":
+            if command == "placement":
+                self.ns.update_nsrs_with_pla_result(params)
+            return
+        elif topic == "k8scluster":
+            if command == "create" or command == "created":
+                k8scluster_id = params.get("_id")
+                task = asyncio.ensure_future(self.k8scluster.create(params, order_id))
+                self.lcm_tasks.register("k8scluster", k8scluster_id, order_id, "k8scluster_create", task)
+                return
+            elif command == "delete" or command == "deleted":
+                k8scluster_id = params.get("_id")
+                task = asyncio.ensure_future(self.k8scluster.delete(params, order_id))
+                self.lcm_tasks.register("k8scluster", k8scluster_id, order_id, "k8scluster_delete", task)
+                return
+        elif topic == "k8srepo":
+            if command == "create" or command == "created":
+                k8srepo_id = params.get("_id")
+                self.logger.debug("k8srepo_id = {}".format(k8srepo_id))
+                task = asyncio.ensure_future(self.k8srepo.create(params, order_id))
+                self.lcm_tasks.register("k8srepo", k8srepo_id, order_id, "k8srepo_create", task)
+                return
+            elif command == "delete" or command == "deleted":
+                k8srepo_id = params.get("_id")
+                task = asyncio.ensure_future(self.k8srepo.delete(params, order_id))
+                self.lcm_tasks.register("k8srepo", k8srepo_id, order_id, "k8srepo_delete", task)
+                return
         elif topic == "ns":
             if command == "instantiate":
                 # self.logger.debug("Deploying NS {}".format(nsr_id))
@@ -351,11 +420,11 @@ class Lcm:
                 return
         elif topic == "vim_account":
             vim_id = params["_id"]
-            if command == "create":
+            if command in ("create", "created"):
                 task = asyncio.ensure_future(self.vim.create(params, order_id))
                 self.lcm_tasks.register("vim_account", vim_id, order_id, "vim_create", task)
                 return
-            elif command == "delete":
+            elif command == "delete" or command == "deleted":
                 self.lcm_tasks.cancel(topic, vim_id)
                 task = asyncio.ensure_future(self.vim.delete(params, order_id))
                 self.lcm_tasks.register("vim_account", vim_id, order_id, "vim_delete", task)
@@ -364,17 +433,19 @@ class Lcm:
                 print("not implemented show with vim_account")
                 sys.stdout.flush()
                 return
-            elif command == "edit":
+            elif command in ("edit", "edited"):
                 task = asyncio.ensure_future(self.vim.edit(params, order_id))
                 self.lcm_tasks.register("vim_account", vim_id, order_id, "vim_edit", task)
                 return
+            elif command == "deleted":
+                return  # TODO cleaning of task just in case should be done
         elif topic == "wim_account":
             wim_id = params["_id"]
-            if command == "create":
+            if command in ("create", "created"):
                 task = asyncio.ensure_future(self.wim.create(params, order_id))
                 self.lcm_tasks.register("wim_account", wim_id, order_id, "wim_create", task)
                 return
-            elif command == "delete":
+            elif command == "delete" or command == "deleted":
                 self.lcm_tasks.cancel(topic, wim_id)
                 task = asyncio.ensure_future(self.wim.delete(params, order_id))
                 self.lcm_tasks.register("wim_account", wim_id, order_id, "wim_delete", task)
@@ -383,25 +454,29 @@ class Lcm:
                 print("not implemented show with wim_account")
                 sys.stdout.flush()
                 return
-            elif command == "edit":
+            elif command in ("edit", "edited"):
                 task = asyncio.ensure_future(self.wim.edit(params, order_id))
                 self.lcm_tasks.register("wim_account", wim_id, order_id, "wim_edit", task)
                 return
+            elif command == "deleted":
+                return  # TODO cleaning of task just in case should be done
         elif topic == "sdn":
             _sdn_id = params["_id"]
-            if command == "create":
+            if command in ("create", "created"):
                 task = asyncio.ensure_future(self.sdn.create(params, order_id))
                 self.lcm_tasks.register("sdn", _sdn_id, order_id, "sdn_create", task)
                 return
-            elif command == "delete":
+            elif command == "delete" or command == "deleted":
                 self.lcm_tasks.cancel(topic, _sdn_id)
                 task = asyncio.ensure_future(self.sdn.delete(params, order_id))
                 self.lcm_tasks.register("sdn", _sdn_id, order_id, "sdn_delete", task)
                 return
-            elif command == "edit":
+            elif command in ("edit", "edited"):
                 task = asyncio.ensure_future(self.sdn.edit(params, order_id))
                 self.lcm_tasks.register("sdn", _sdn_id, order_id, "sdn_edit", task)
                 return
+            elif command == "deleted":
+                return  # TODO cleaning of task just in case should be done
         self.logger.critical("unknown topic {} and command '{}'".format(topic, command))
 
     async def kafka_read(self):
@@ -411,10 +486,10 @@ class Lcm:
         self.first_start = True
         while self.consecutive_errors < 10:
             try:
-                topics = ("ns", "vim_account", "wim_account", "sdn", "nsi")
+                topics = ("ns", "vim_account", "wim_account", "sdn", "nsi", "k8scluster", "k8srepo", "pla")
                 topics_admin = ("admin", )
                 await asyncio.gather(
-                    self.msg.aioread(topics, self.loop, self.kafka_read_callback),
+                    self.msg.aioread(topics, self.loop, self.kafka_read_callback, from_beginning=True),
                     self.msg_admin.aioread(topics_admin, self.loop, self.kafka_read_callback, group_id=False)
                 )
 
@@ -440,6 +515,10 @@ class Lcm:
         # check RO version
         self.loop.run_until_complete(self.check_RO_version())
 
+        # configure tsdb prometheus
+        if self.prometheus:
+            self.loop.run_until_complete(self.prometheus.start())
+
         self.loop.run_until_complete(asyncio.gather(
             self.kafka_read(),
             self.kafka_ping()
@@ -470,27 +549,51 @@ class Lcm:
         # the configparser library is not suitable, because it does not admit comments at the end of line,
         # and not parse integer or boolean
         try:
+            # read file as yaml format
             with open(config_file) as f:
-                conf = yaml.load(f)
+                conf = yaml.load(f, Loader=yaml.Loader)
+            # Ensure all sections are not empty
+            for k in ("global", "timeout", "RO", "VCA", "database", "storage", "message"):
+                if not conf.get(k):
+                    conf[k] = {}
+
+            # read all environ that starts with OSMLCM_
             for k, v in environ.items():
                 if not k.startswith("OSMLCM_"):
                     continue
-                k_items = k.lower().split("_")
-                if len(k_items) < 3:
+                subject, _, item = k[7:].lower().partition("_")
+                if not item:
                     continue
-                if k_items[1] in ("ro", "vca"):
+                if subject in ("ro", "vca"):
                     # put in capital letter
-                    k_items[1] = k_items[1].upper()
-                c = conf
+                    subject = subject.upper()
                 try:
-                    for k_item in k_items[1:-1]:
-                        c = c[k_item]
-                    if k_items[-1] == "port":
-                        c[k_items[-1]] = int(v)
+                    if item == "port" or subject == "timeout":
+                        conf[subject][item] = int(v)
                     else:
-                        c[k_items[-1]] = v
+                        conf[subject][item] = v
                 except Exception as e:
-                    self.logger.warn("skipping environ '{}' on exception '{}'".format(k, e))
+                    self.logger.warning("skipping environ '{}' on exception '{}'".format(k, e))
+
+            # backward compatibility of VCA parameters
+
+            if 'pubkey' in conf["VCA"]:
+                conf["VCA"]['public_key'] = conf["VCA"].pop('pubkey')
+            if 'cacert' in conf["VCA"]:
+                conf["VCA"]['ca_cert'] = conf["VCA"].pop('cacert')
+            if 'apiproxy' in conf["VCA"]:
+                conf["VCA"]['api_proxy'] = conf["VCA"].pop('apiproxy')
+
+            if 'enableosupgrade' in conf["VCA"]:
+                conf["VCA"]['enable_os_upgrade'] = conf["VCA"].pop('enableosupgrade')
+            if isinstance(conf["VCA"].get('enable_os_upgrade'), str):
+                if conf["VCA"]['enable_os_upgrade'].lower() == 'false':
+                    conf["VCA"]['enable_os_upgrade'] = False
+                elif conf["VCA"]['enable_os_upgrade'].lower() == 'true':
+                    conf["VCA"]['enable_os_upgrade'] = True
+
+            if 'aptmirror' in conf["VCA"]:
+                conf["VCA"]['apt_mirror'] = conf["VCA"].pop('aptmirror')
 
             return conf
         except Exception as e:
@@ -520,7 +623,7 @@ class Lcm:
 
 def usage():
     print("""Usage: {} [options]
-        -c|--config [configuration_file]: loads the configuration file (default: ./nbi.cfg)
+        -c|--config [configuration_file]: loads the configuration file (default: ./lcm.cfg)
         --health-check: do not run lcm, but inspect kafka bus to determine if lcm is healthy
         -h|--help: shows this help
         """.format(sys.argv[0]))
@@ -528,26 +631,16 @@ def usage():
     # --log-socket-port PORT: send logs using this port (default: 9022)")
 
 
-def health_check():
-    retry = 2
-    while retry:
-        retry -= 1
-        try:
-            with open(health_check_file, "r") as f:
-                last_received_ping = f.read()
-
-            if time() - float(last_received_ping) < Lcm.ping_interval_pace + 10:
-                exit(0)
-        except Exception:
-            pass
-        if retry:
-            sleep(6)
-    exit(1)
-
-
 if __name__ == '__main__':
+
     try:
+        # print("SYS.PATH='{}'".format(sys.path))
         # load parameters and configuration
+        # -h
+        # -c value
+        # --config value
+        # --help
+        # --health-check
         opts, args = getopt.getopt(sys.argv[1:], "hc:", ["config=", "help", "health-check"])
         # TODO add  "log-socket-host=", "log-socket-port=", "log-file="
         config_file = None
@@ -558,7 +651,8 @@ if __name__ == '__main__':
             elif o in ("-c", "--config"):
                 config_file = a
             elif o == "--health-check":
-                health_check()
+                from osm_lcm.lcm_hc import health_check
+                health_check(health_check_file, Lcm.ping_interval_pace)
             # elif o == "--log-socket-port":
             #     log_socket_port = a
             # elif o == "--log-socket-host":
@@ -567,9 +661,10 @@ if __name__ == '__main__':
             #     log_file = a
             else:
                 assert False, "Unhandled option"
+
         if config_file:
             if not path.isfile(config_file):
-                print("configuration file '{}' not exist".format(config_file), file=sys.stderr)
+                print("configuration file '{}' does not exist".format(config_file), file=sys.stderr)
                 exit(1)
         else:
             for config_file in (__file__[:__file__.rfind(".")] + ".cfg", "./lcm.cfg", "/etc/osm/lcm.cfg"):