X-Git-Url: https://osm.etsi.org/gitweb/?a=blobdiff_plain;f=osm_lcm%2Flcm.py;h=d8ac91f9597a25d23ba64b0ab14f916e6cf6fc0e;hb=refs%2Fheads%2Ffeature7928;hp=ca89b8ba78d30a457c6d7de120d5c5d986429c5f;hpb=a009e55ba79a19ce8c9b04fac4e5554550d94975;p=osm%2FLCM.git diff --git a/osm_lcm/lcm.py b/osm_lcm/lcm.py index ca89b8b..d8ac91f 100644 --- a/osm_lcm/lcm.py +++ b/osm_lcm/lcm.py @@ -17,35 +17,49 @@ # under the License. ## + +# DEBUG WITH PDB +import os +import pdb + import asyncio import yaml import logging import logging.handlers import getopt import sys -import ROclient -import ns -import vim_sdn -import netslice -from lcm_utils import versiontuple, LcmException, TaskRegistry, LcmExceptionExit -# from osm_lcm import version as lcm_version, version_date as lcm_version_date, ROclient +from osm_lcm import ns +from osm_lcm import vim_sdn +from osm_lcm import netslice +from osm_lcm import ROclient + +from time import time, sleep +from osm_lcm.lcm_utils import versiontuple, LcmException, TaskRegistry, LcmExceptionExit +from osm_lcm import version as lcm_version, version_date as lcm_version_date + from osm_common import dbmemory, dbmongo, fslocal, msglocal, msgkafka from osm_common import version as common_version from osm_common.dbbase import DbException from osm_common.fsbase import FsException from osm_common.msgbase import MsgException from os import environ, path +from random import choice as random_choice from n2vc import version as n2vc_version +if os.getenv('OSMLCM_PDB_DEBUG', None) is not None: + pdb.set_trace() + __author__ = "Alfonso Tierno" -min_RO_version = [0, 6, 3] +min_RO_version = "6.0.2" min_n2vc_version = "0.0.2" -min_common_version = "0.1.11" + +min_common_version = "0.1.19" # uncomment if LCM is installed as library and installed, and get them from __init__.py -lcm_version = '0.1.33' -lcm_version_date = '2019-01-31' +# lcm_version = '0.1.41' +# lcm_version_date = '2019-06-19' +health_check_file = path.expanduser("~") + "/time_last_ping" # TODO find better location for this file class Lcm: @@ -62,15 +76,16 @@ class Lcm: self.db = None self.msg = None + self.msg_admin = None self.fs = None self.pings_not_received = 1 self.consecutive_errors = 0 self.first_start = False - # contains created tasks/futures to be able to cancel - self.lcm_tasks = TaskRegistry() # logging self.logger = logging.getLogger('lcm') + # get id + self.worker_id = self.get_process_id() # load configuration config = self.read_config_file(config_file) self.config = config @@ -147,14 +162,21 @@ class Lcm: raise LcmException("Invalid configuration param '{}' at '[storage]':'driver'".format( config["storage"]["driver"])) + # copy message configuration in order to remove 'group_id' for msg_admin config_message = config["message"].copy() config_message["loop"] = self.loop if config_message["driver"] == "local": self.msg = msglocal.MsgLocal() self.msg.connect(config_message) + self.msg_admin = msglocal.MsgLocal() + config_message.pop("group_id", None) + self.msg_admin.connect(config_message) elif config_message["driver"] == "kafka": self.msg = msgkafka.MsgKafka() self.msg.connect(config_message) + self.msg_admin = msgkafka.MsgKafka() + config_message.pop("group_id", None) + self.msg_admin.connect(config_message) else: raise LcmException("Invalid configuration param '{}' at '[message]':'driver'".format( config["message"]["driver"])) @@ -162,25 +184,40 @@ class Lcm: self.logger.critical(str(e), exc_info=True) raise LcmException(str(e)) + # contains created tasks/futures to be able to cancel + self.lcm_tasks = TaskRegistry(self.worker_id, self.db, self.logger) + self.ns = ns.NsLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, self.vca_config, self.loop) self.netslice = netslice.NetsliceLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, self.vca_config, self.loop) self.vim = vim_sdn.VimLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, self.loop) self.wim = vim_sdn.WimLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, self.loop) self.sdn = vim_sdn.SdnLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, self.loop) + self.k8scluster = vim_sdn.K8sClusterLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.vca_config, self.loop) + self.k8srepo = vim_sdn.K8sRepoLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.vca_config, self.loop) async def check_RO_version(self): - try: - RO = ROclient.ROClient(self.loop, **self.ro_config) - RO_version = await RO.get_version() - if RO_version < min_RO_version: - raise LcmException("Not compatible osm/RO version '{}.{}.{}'. Needed '{}.{}.{}' or higher".format( - *RO_version, *min_RO_version - )) - except ROclient.ROClientException as e: - error_text = "Error while conneting to osm/RO " + str(e) - self.logger.critical(error_text, exc_info=True) - raise LcmException(error_text) + tries = 14 + last_error = None + while True: + try: + ro_server = ROclient.ROClient(self.loop, **self.ro_config) + ro_version = await ro_server.get_version() + if versiontuple(ro_version) < versiontuple(min_RO_version): + raise LcmException("Not compatible osm/RO version '{}'. Needed '{}' or higher".format( + ro_version, min_RO_version)) + self.logger.info("Connected to RO version {}".format(ro_version)) + return + except ROclient.ROClientException as e: + tries -= 1 + error_text = "Error while connecting to RO on {}: {}".format(self.ro_config["endpoint_url"], e) + if tries <= 0: + self.logger.critical(error_text) + raise LcmException(error_text) + if last_error != error_text: + last_error = error_text + self.logger.error(error_text + ". Waiting until {} seconds".format(5*tries)) + await asyncio.sleep(5) async def test(self, param=None): self.logger.debug("Starting/Ending test task: {}".format(param)) @@ -193,7 +230,10 @@ class Lcm: self.pings_not_received = 1 while True: try: - await self.msg.aiowrite("admin", "ping", {"from": "lcm", "to": "lcm"}, self.loop) + await self.msg_admin.aiowrite( + "admin", "ping", + {"from": "lcm", "to": "lcm", "worker_id": self.worker_id, "version": lcm_version}, + self.loop) # time between pings are low when it is not received and at starting wait_time = self.ping_interval_boot if not kafka_has_received else self.ping_interval_pace if not self.pings_not_received: @@ -214,7 +254,7 @@ class Lcm: raise consecutive_errors += 1 self.logger.error("Task kafka_read retrying after Exception {}".format(e)) - wait_time = 1 if not first_start else 5 + wait_time = 2 if not first_start else 5 await asyncio.sleep(wait_time, loop=self.loop) def kafka_read_callback(self, topic, command, params): @@ -240,10 +280,40 @@ class Lcm: if topic == "admin": if command == "ping" and params["to"] == "lcm" and params["from"] == "lcm": + if params.get("worker_id") != self.worker_id: + return self.pings_not_received = 0 + try: + with open(health_check_file, "w") as f: + f.write(str(time())) + except Exception as e: + self.logger.error("Cannot write into '{}' for healthcheck: {}".format(health_check_file, e)) return + elif topic == "k8scluster": + if command == "create" or command == "created": + k8scluster_id = params.get("_id") + task = asyncio.ensure_future(self.k8scluster.create(params, order_id)) + self.lcm_tasks.register("k8scluster", k8scluster_id, order_id, "k8scluster_create", task) + return + elif command == "delete" or command == "deleted": + k8scluster_id = params.get("_id") + task = asyncio.ensure_future(self.k8scluster.delete(params, order_id)) + self.lcm_tasks.register("k8scluster", k8scluster_id, order_id, "k8scluster_delete", task) + return + elif topic == "k8srepo": + if command == "create" or command == "created": + k8srepo_id = params.get("_id") + self.logger.debug("k8srepo_id = {}".format(k8srepo_id)) + task = asyncio.ensure_future(self.k8srepo.create(params, order_id)) + self.lcm_tasks.register("k8srepo", k8srepo_id, order_id, "k8srepo_create", task) + return + elif command == "delete" or command == "deleted": + k8srepo_id = params.get("_id") + task = asyncio.ensure_future(self.k8srepo.delete(params, order_id)) + self.lcm_tasks.register("k8srepo", k8srepo_id, order_id, "k8srepo_delete", task) + return elif topic == "ns": - if command == "instantiate": + if command == "instantiate" or command == "instantiated": # self.logger.debug("Deploying NS {}".format(nsr_id)) nslcmop = params nslcmop_id = nslcmop["_id"] @@ -251,7 +321,7 @@ class Lcm: task = asyncio.ensure_future(self.ns.instantiate(nsr_id, nslcmop_id)) self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "ns_instantiate", task) return - elif command == "terminate": + elif command == "terminate" or command == "terminated": # self.logger.debug("Deleting NS {}".format(nsr_id)) nslcmop = params nslcmop_id = nslcmop["_id"] @@ -294,7 +364,7 @@ class Lcm: elif command in ("terminated", "instantiated", "scaled", "actioned"): # "scaled-cooldown-time" return elif topic == "nsi": # netslice LCM processes (instantiate, terminate, etc) - if command == "instantiate": + if command == "instantiate" or command == "instantiated": # self.logger.debug("Instantiating Network Slice {}".format(nsilcmop["netsliceInstanceId"])) nsilcmop = params nsilcmop_id = nsilcmop["_id"] # slice operation id @@ -302,7 +372,7 @@ class Lcm: task = asyncio.ensure_future(self.netslice.instantiate(nsir_id, nsilcmop_id)) self.lcm_tasks.register("nsi", nsir_id, nsilcmop_id, "nsi_instantiate", task) return - elif command == "terminate": + elif command == "terminate" or command == "terminated": # self.logger.debug("Terminating Network Slice NS {}".format(nsilcmop["netsliceInstanceId"])) nsilcmop = params nsilcmop_id = nsilcmop["_id"] # slice operation id @@ -330,68 +400,72 @@ class Lcm: return elif topic == "vim_account": vim_id = params["_id"] - if command == "create": + if command == "create" or command == "created": task = asyncio.ensure_future(self.vim.create(params, order_id)) self.lcm_tasks.register("vim_account", vim_id, order_id, "vim_create", task) return - elif command == "delete": + elif command == "delete" or command == "deleted": self.lcm_tasks.cancel(topic, vim_id) - task = asyncio.ensure_future(self.vim.delete(vim_id, order_id)) + task = asyncio.ensure_future(self.vim.delete(params, order_id)) self.lcm_tasks.register("vim_account", vim_id, order_id, "vim_delete", task) return elif command == "show": print("not implemented show with vim_account") sys.stdout.flush() return - elif command == "edit": + elif command == "edit" or command == "edited": task = asyncio.ensure_future(self.vim.edit(params, order_id)) self.lcm_tasks.register("vim_account", vim_id, order_id, "vim_edit", task) return elif topic == "wim_account": wim_id = params["_id"] - if command == "create": + if command == "create" or command == "created": task = asyncio.ensure_future(self.wim.create(params, order_id)) self.lcm_tasks.register("wim_account", wim_id, order_id, "wim_create", task) return - elif command == "delete": + elif command == "delete" or command == "deleted": self.lcm_tasks.cancel(topic, wim_id) - task = asyncio.ensure_future(self.wim.delete(wim_id, order_id)) + task = asyncio.ensure_future(self.wim.delete(params, order_id)) self.lcm_tasks.register("wim_account", wim_id, order_id, "wim_delete", task) return elif command == "show": print("not implemented show with wim_account") sys.stdout.flush() return - elif command == "edit": + elif command == "edit" or command == "edited": task = asyncio.ensure_future(self.wim.edit(params, order_id)) self.lcm_tasks.register("wim_account", wim_id, order_id, "wim_edit", task) return elif topic == "sdn": _sdn_id = params["_id"] - if command == "create": + if command == "create" or command == "created": task = asyncio.ensure_future(self.sdn.create(params, order_id)) self.lcm_tasks.register("sdn", _sdn_id, order_id, "sdn_create", task) return - elif command == "delete": + elif command == "delete" or command == "deleted": self.lcm_tasks.cancel(topic, _sdn_id) - task = asyncio.ensure_future(self.sdn.delete(_sdn_id, order_id)) + task = asyncio.ensure_future(self.sdn.delete(params, order_id)) self.lcm_tasks.register("sdn", _sdn_id, order_id, "sdn_delete", task) return - elif command == "edit": + elif command == "edit" or command == "edited": task = asyncio.ensure_future(self.sdn.edit(params, order_id)) self.lcm_tasks.register("sdn", _sdn_id, order_id, "sdn_edit", task) return self.logger.critical("unknown topic {} and command '{}'".format(topic, command)) async def kafka_read(self): - self.logger.debug("Task kafka_read Enter") + self.logger.debug("Task kafka_read Enter with worker_id={}".format(self.worker_id)) # future = asyncio.Future() self.consecutive_errors = 0 self.first_start = True while self.consecutive_errors < 10: try: - topics = ("admin", "ns", "vim_account", "wim_account", "sdn", "nsi") - await self.msg.aioread(topics, self.loop, self.kafka_read_callback) + topics = ("ns", "vim_account", "wim_account", "sdn", "nsi", "k8scluster", "k8srepo") + topics_admin = ("admin", ) + await asyncio.gather( + self.msg.aioread(topics, self.loop, self.kafka_read_callback), + self.msg_admin.aioread(topics_admin, self.loop, self.kafka_read_callback, group_id=False) + ) except LcmExceptionExit: self.logger.debug("Bye!") @@ -410,27 +484,6 @@ class Lcm: # self.logger.debug("Task kafka_read terminating") self.logger.debug("Task kafka_read exit") - def health_check(self): - - global exit_code - task = None - exit_code = 1 - - def health_check_callback(topic, command, params): - global exit_code - print("receiving callback {} {} {}".format(topic, command, params)) - if topic == "admin" and command == "ping" and params["to"] == "lcm" and params["from"] == "lcm": - # print("received LCM ping") - exit_code = 0 - task.cancel() - - try: - task = asyncio.ensure_future(self.msg.aioread(("admin",), self.loop, health_check_callback)) - self.loop.run_until_complete(task) - except Exception: - pass - exit(exit_code) - def start(self): # check RO version @@ -456,6 +509,8 @@ class Lcm: self.db.db_disconnect() if self.msg: self.msg.disconnect() + if self.msg_admin: + self.msg_admin.disconnect() if self.fs: self.fs.fs_disconnect() @@ -491,10 +546,30 @@ class Lcm: self.logger.critical("At config file '{}': {}".format(config_file, e)) exit(1) + @staticmethod + def get_process_id(): + """ + Obtain a unique ID for this process. If running from inside docker, it will get docker ID. If not it + will provide a random one + :return: Obtained ID + """ + # Try getting docker id. If fails, get pid + try: + with open("/proc/self/cgroup", "r") as f: + text_id_ = f.readline() + _, _, text_id = text_id_.rpartition("/") + text_id = text_id.replace('\n', '')[:12] + if text_id: + return text_id + except Exception: + pass + # Return a random id + return ''.join(random_choice("0123456789abcdef") for _ in range(12)) + def usage(): print("""Usage: {} [options] - -c|--config [configuration_file]: loads the configuration file (default: ./nbi.cfg) + -c|--config [configuration_file]: loads the configuration file (default: ./lcm.cfg) --health-check: do not run lcm, but inspect kafka bus to determine if lcm is healthy -h|--help: shows this help """.format(sys.argv[0])) @@ -502,13 +577,35 @@ def usage(): # --log-socket-port PORT: send logs using this port (default: 9022)") +def health_check(): + retry = 2 + while retry: + retry -= 1 + try: + with open(health_check_file, "r") as f: + last_received_ping = f.read() + + if time() - float(last_received_ping) < Lcm.ping_interval_pace + 10: + exit(0) + except Exception: + pass + if retry: + sleep(6) + exit(1) + + if __name__ == '__main__': + try: # load parameters and configuration + # -h + # -c value + # --config value + # --help + # --health-check opts, args = getopt.getopt(sys.argv[1:], "hc:", ["config=", "help", "health-check"]) # TODO add "log-socket-host=", "log-socket-port=", "log-file=" config_file = None - health_check = None for o, a in opts: if o in ("-h", "--help"): usage() @@ -516,7 +613,7 @@ if __name__ == '__main__': elif o in ("-c", "--config"): config_file = a elif o == "--health-check": - health_check = True + health_check() # elif o == "--log-socket-port": # log_socket_port = a # elif o == "--log-socket-host": @@ -525,9 +622,10 @@ if __name__ == '__main__': # log_file = a else: assert False, "Unhandled option" + if config_file: if not path.isfile(config_file): - print("configuration file '{}' not exist".format(config_file), file=sys.stderr) + print("configuration file '{}' does not exist".format(config_file), file=sys.stderr) exit(1) else: for config_file in (__file__[:__file__.rfind(".")] + ".cfg", "./lcm.cfg", "/etc/osm/lcm.cfg"): @@ -537,10 +635,7 @@ if __name__ == '__main__': print("No configuration file 'lcm.cfg' found neither at local folder nor at /etc/osm/", file=sys.stderr) exit(1) lcm = Lcm(config_file) - if health_check: - lcm.health_check() - else: - lcm.start() + lcm.start() except (LcmException, getopt.GetoptError) as e: print(str(e), file=sys.stderr) # usage()