8f66055883d3ef2be8446db22e05732c2bd335a0
[osm/LCM.git] / osm_lcm / lcm.py
1 #!/usr/bin/python3
2 # -*- coding: utf-8 -*-
3
4 ##
5 # Copyright 2018 Telefonica S.A.
6 #
7 # Licensed under the Apache License, Version 2.0 (the "License"); you may
8 # not use this file except in compliance with the License. You may obtain
9 # a copy of the License at
10 #
11 # http://www.apache.org/licenses/LICENSE-2.0
12 #
13 # Unless required by applicable law or agreed to in writing, software
14 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
16 # License for the specific language governing permissions and limitations
17 # under the License.
18 ##
19
20 import asyncio
21 import yaml
22 import logging
23 import logging.handlers
24 import getopt
25 import sys
26 import ROclient
27 import ns
28 import vim_sdn
29 import netslice
30 from time import time, sleep
31 from lcm_utils import versiontuple, LcmException, TaskRegistry, LcmExceptionExit
32
33 # from osm_lcm import version as lcm_version, version_date as lcm_version_date, ROclient
34 from osm_common import dbmemory, dbmongo, fslocal, msglocal, msgkafka
35 from osm_common import version as common_version
36 from osm_common.dbbase import DbException
37 from osm_common.fsbase import FsException
38 from osm_common.msgbase import MsgException
39 from os import environ, path
40 from random import choice as random_choice
41 from n2vc import version as n2vc_version
42
43
44 __author__ = "Alfonso Tierno"
45 min_RO_version = [0, 6, 3]
46 min_n2vc_version = "0.0.2"
47 min_common_version = "0.1.19"
48 # uncomment if LCM is installed as library and installed, and get them from __init__.py
49 lcm_version = '0.1.41'
50 lcm_version_date = '2019-06-19'
51 health_check_file = path.expanduser("~") + "/time_last_ping" # TODO find better location for this file
52
53
54 class Lcm:
55
56 ping_interval_pace = 120 # how many time ping is send once is confirmed all is running
57 ping_interval_boot = 5 # how many time ping is sent when booting
58
59 def __init__(self, config_file, loop=None):
60 """
61 Init, Connect to database, filesystem storage, and messaging
62 :param config: two level dictionary with configuration. Top level should contain 'database', 'storage',
63 :return: None
64 """
65
66 self.db = None
67 self.msg = None
68 self.msg_admin = None
69 self.fs = None
70 self.pings_not_received = 1
71 self.consecutive_errors = 0
72 self.first_start = False
73
74 # contains created tasks/futures to be able to cancel
75 self.lcm_tasks = TaskRegistry()
76 # logging
77 self.logger = logging.getLogger('lcm')
78 # get id
79 self.worker_id = self.get_process_id()
80 # load configuration
81 config = self.read_config_file(config_file)
82 self.config = config
83 self.ro_config = {
84 "endpoint_url": "http://{}:{}/openmano".format(config["RO"]["host"], config["RO"]["port"]),
85 "tenant": config.get("tenant", "osm"),
86 "logger_name": "lcm.ROclient",
87 "loglevel": "ERROR",
88 }
89
90 self.vca_config = config["VCA"]
91
92 self.loop = loop or asyncio.get_event_loop()
93
94 # logging
95 log_format_simple = "%(asctime)s %(levelname)s %(name)s %(filename)s:%(lineno)s %(message)s"
96 log_formatter_simple = logging.Formatter(log_format_simple, datefmt='%Y-%m-%dT%H:%M:%S')
97 config["database"]["logger_name"] = "lcm.db"
98 config["storage"]["logger_name"] = "lcm.fs"
99 config["message"]["logger_name"] = "lcm.msg"
100 if config["global"].get("logfile"):
101 file_handler = logging.handlers.RotatingFileHandler(config["global"]["logfile"],
102 maxBytes=100e6, backupCount=9, delay=0)
103 file_handler.setFormatter(log_formatter_simple)
104 self.logger.addHandler(file_handler)
105 if not config["global"].get("nologging"):
106 str_handler = logging.StreamHandler()
107 str_handler.setFormatter(log_formatter_simple)
108 self.logger.addHandler(str_handler)
109
110 if config["global"].get("loglevel"):
111 self.logger.setLevel(config["global"]["loglevel"])
112
113 # logging other modules
114 for k1, logname in {"message": "lcm.msg", "database": "lcm.db", "storage": "lcm.fs"}.items():
115 config[k1]["logger_name"] = logname
116 logger_module = logging.getLogger(logname)
117 if config[k1].get("logfile"):
118 file_handler = logging.handlers.RotatingFileHandler(config[k1]["logfile"],
119 maxBytes=100e6, backupCount=9, delay=0)
120 file_handler.setFormatter(log_formatter_simple)
121 logger_module.addHandler(file_handler)
122 if config[k1].get("loglevel"):
123 logger_module.setLevel(config[k1]["loglevel"])
124 self.logger.critical("starting osm/lcm version {} {}".format(lcm_version, lcm_version_date))
125
126 # check version of N2VC
127 # TODO enhance with int conversion or from distutils.version import LooseVersion
128 # or with list(map(int, version.split(".")))
129 if versiontuple(n2vc_version) < versiontuple(min_n2vc_version):
130 raise LcmException("Not compatible osm/N2VC version '{}'. Needed '{}' or higher".format(
131 n2vc_version, min_n2vc_version))
132 # check version of common
133 if versiontuple(common_version) < versiontuple(min_common_version):
134 raise LcmException("Not compatible osm/common version '{}'. Needed '{}' or higher".format(
135 common_version, min_common_version))
136
137 try:
138 # TODO check database version
139 if config["database"]["driver"] == "mongo":
140 self.db = dbmongo.DbMongo()
141 self.db.db_connect(config["database"])
142 elif config["database"]["driver"] == "memory":
143 self.db = dbmemory.DbMemory()
144 self.db.db_connect(config["database"])
145 else:
146 raise LcmException("Invalid configuration param '{}' at '[database]':'driver'".format(
147 config["database"]["driver"]))
148
149 if config["storage"]["driver"] == "local":
150 self.fs = fslocal.FsLocal()
151 self.fs.fs_connect(config["storage"])
152 else:
153 raise LcmException("Invalid configuration param '{}' at '[storage]':'driver'".format(
154 config["storage"]["driver"]))
155
156 config_message = config["message"].copy()
157 config_message["loop"] = self.loop
158 if config_message["driver"] == "local":
159 self.msg = msglocal.MsgLocal()
160 self.msg.connect(config_message)
161 self.msg_admin = msglocal.MsgLocal()
162 config_message.pop("group_id", None)
163 self.msg_admin.connect(config_message)
164 elif config_message["driver"] == "kafka":
165 self.msg = msgkafka.MsgKafka()
166 self.msg.connect(config_message)
167 self.msg_admin = msgkafka.MsgKafka()
168 config_message.pop("group_id", None)
169 self.msg_admin.connect(config_message)
170 else:
171 raise LcmException("Invalid configuration param '{}' at '[message]':'driver'".format(
172 config["message"]["driver"]))
173 except (DbException, FsException, MsgException) as e:
174 self.logger.critical(str(e), exc_info=True)
175 raise LcmException(str(e))
176
177 self.ns = ns.NsLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, self.vca_config, self.loop)
178 self.netslice = netslice.NetsliceLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config,
179 self.vca_config, self.loop)
180 self.vim = vim_sdn.VimLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, self.loop)
181 self.wim = vim_sdn.WimLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, self.loop)
182 self.sdn = vim_sdn.SdnLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, self.loop)
183
184 async def check_RO_version(self):
185 try:
186 RO = ROclient.ROClient(self.loop, **self.ro_config)
187 RO_version = await RO.get_version()
188 if RO_version < min_RO_version:
189 raise LcmException("Not compatible osm/RO version '{}.{}.{}'. Needed '{}.{}.{}' or higher".format(
190 *RO_version, *min_RO_version
191 ))
192 except ROclient.ROClientException as e:
193 error_text = "Error while conneting to osm/RO " + str(e)
194 self.logger.critical(error_text, exc_info=True)
195 raise LcmException(error_text)
196
197 async def test(self, param=None):
198 self.logger.debug("Starting/Ending test task: {}".format(param))
199
200 async def kafka_ping(self):
201 self.logger.debug("Task kafka_ping Enter")
202 consecutive_errors = 0
203 first_start = True
204 kafka_has_received = False
205 self.pings_not_received = 1
206 while True:
207 try:
208 await self.msg_admin.aiowrite(
209 "admin", "ping",
210 {"from": "lcm", "to": "lcm", "worker_id": self.worker_id, "version": lcm_version},
211 self.loop)
212 # time between pings are low when it is not received and at starting
213 wait_time = self.ping_interval_boot if not kafka_has_received else self.ping_interval_pace
214 if not self.pings_not_received:
215 kafka_has_received = True
216 self.pings_not_received += 1
217 await asyncio.sleep(wait_time, loop=self.loop)
218 if self.pings_not_received > 10:
219 raise LcmException("It is not receiving pings from Kafka bus")
220 consecutive_errors = 0
221 first_start = False
222 except LcmException:
223 raise
224 except Exception as e:
225 # if not first_start is the first time after starting. So leave more time and wait
226 # to allow kafka starts
227 if consecutive_errors == 8 if not first_start else 30:
228 self.logger.error("Task kafka_read task exit error too many errors. Exception: {}".format(e))
229 raise
230 consecutive_errors += 1
231 self.logger.error("Task kafka_read retrying after Exception {}".format(e))
232 wait_time = 2 if not first_start else 5
233 await asyncio.sleep(wait_time, loop=self.loop)
234
235 def kafka_read_callback(self, topic, command, params):
236 order_id = 1
237
238 if topic != "admin" and command != "ping":
239 self.logger.debug("Task kafka_read receives {} {}: {}".format(topic, command, params))
240 self.consecutive_errors = 0
241 self.first_start = False
242 order_id += 1
243 if command == "exit":
244 raise LcmExceptionExit
245 elif command.startswith("#"):
246 return
247 elif command == "echo":
248 # just for test
249 print(params)
250 sys.stdout.flush()
251 return
252 elif command == "test":
253 asyncio.Task(self.test(params), loop=self.loop)
254 return
255
256 if topic == "admin":
257 if command == "ping" and params["to"] == "lcm" and params["from"] == "lcm":
258 if params.get("worker_id") != self.worker_id:
259 return
260 self.pings_not_received = 0
261 try:
262 with open(health_check_file, "w") as f:
263 f.write(str(time()))
264 except Exception as e:
265 self.logger.error("Cannot write into '{}' for healthcheck: {}".format(health_check_file, e))
266 return
267 elif topic == "ns":
268 if command == "instantiate":
269 # self.logger.debug("Deploying NS {}".format(nsr_id))
270 nslcmop = params
271 nslcmop_id = nslcmop["_id"]
272 nsr_id = nslcmop["nsInstanceId"]
273 task = asyncio.ensure_future(self.ns.instantiate(nsr_id, nslcmop_id))
274 self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "ns_instantiate", task)
275 return
276 elif command == "terminate":
277 # self.logger.debug("Deleting NS {}".format(nsr_id))
278 nslcmop = params
279 nslcmop_id = nslcmop["_id"]
280 nsr_id = nslcmop["nsInstanceId"]
281 self.lcm_tasks.cancel(topic, nsr_id)
282 task = asyncio.ensure_future(self.ns.terminate(nsr_id, nslcmop_id))
283 self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "ns_terminate", task)
284 return
285 elif command == "action":
286 # self.logger.debug("Update NS {}".format(nsr_id))
287 nslcmop = params
288 nslcmop_id = nslcmop["_id"]
289 nsr_id = nslcmop["nsInstanceId"]
290 task = asyncio.ensure_future(self.ns.action(nsr_id, nslcmop_id))
291 self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "ns_action", task)
292 return
293 elif command == "scale":
294 # self.logger.debug("Update NS {}".format(nsr_id))
295 nslcmop = params
296 nslcmop_id = nslcmop["_id"]
297 nsr_id = nslcmop["nsInstanceId"]
298 task = asyncio.ensure_future(self.ns.scale(nsr_id, nslcmop_id))
299 self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "ns_scale", task)
300 return
301 elif command == "show":
302 nsr_id = params
303 try:
304 db_nsr = self.db.get_one("nsrs", {"_id": nsr_id})
305 print("nsr:\n _id={}\n operational-status: {}\n config-status: {}"
306 "\n detailed-status: {}\n deploy: {}\n tasks: {}"
307 "".format(nsr_id, db_nsr["operational-status"], db_nsr["config-status"],
308 db_nsr["detailed-status"],
309 db_nsr["_admin"]["deployed"], self.lcm_ns_tasks.get(nsr_id)))
310 except Exception as e:
311 print("nsr {} not found: {}".format(nsr_id, e))
312 sys.stdout.flush()
313 return
314 elif command == "deleted":
315 return # TODO cleaning of task just in case should be done
316 elif command in ("terminated", "instantiated", "scaled", "actioned"): # "scaled-cooldown-time"
317 return
318 elif topic == "nsi": # netslice LCM processes (instantiate, terminate, etc)
319 if command == "instantiate":
320 # self.logger.debug("Instantiating Network Slice {}".format(nsilcmop["netsliceInstanceId"]))
321 nsilcmop = params
322 nsilcmop_id = nsilcmop["_id"] # slice operation id
323 nsir_id = nsilcmop["netsliceInstanceId"] # slice record id
324 task = asyncio.ensure_future(self.netslice.instantiate(nsir_id, nsilcmop_id))
325 self.lcm_tasks.register("nsi", nsir_id, nsilcmop_id, "nsi_instantiate", task)
326 return
327 elif command == "terminate":
328 # self.logger.debug("Terminating Network Slice NS {}".format(nsilcmop["netsliceInstanceId"]))
329 nsilcmop = params
330 nsilcmop_id = nsilcmop["_id"] # slice operation id
331 nsir_id = nsilcmop["netsliceInstanceId"] # slice record id
332 self.lcm_tasks.cancel(topic, nsir_id)
333 task = asyncio.ensure_future(self.netslice.terminate(nsir_id, nsilcmop_id))
334 self.lcm_tasks.register("nsi", nsir_id, nsilcmop_id, "nsi_terminate", task)
335 return
336 elif command == "show":
337 nsir_id = params
338 try:
339 db_nsir = self.db.get_one("nsirs", {"_id": nsir_id})
340 print("nsir:\n _id={}\n operational-status: {}\n config-status: {}"
341 "\n detailed-status: {}\n deploy: {}\n tasks: {}"
342 "".format(nsir_id, db_nsir["operational-status"], db_nsir["config-status"],
343 db_nsir["detailed-status"],
344 db_nsir["_admin"]["deployed"], self.lcm_netslice_tasks.get(nsir_id)))
345 except Exception as e:
346 print("nsir {} not found: {}".format(nsir_id, e))
347 sys.stdout.flush()
348 return
349 elif command == "deleted":
350 return # TODO cleaning of task just in case should be done
351 elif command in ("terminated", "instantiated", "scaled", "actioned"): # "scaled-cooldown-time"
352 return
353 elif topic == "vim_account":
354 vim_id = params["_id"]
355 if command == "create":
356 task = asyncio.ensure_future(self.vim.create(params, order_id))
357 self.lcm_tasks.register("vim_account", vim_id, order_id, "vim_create", task)
358 return
359 elif command == "delete":
360 self.lcm_tasks.cancel(topic, vim_id)
361 task = asyncio.ensure_future(self.vim.delete(vim_id, order_id))
362 self.lcm_tasks.register("vim_account", vim_id, order_id, "vim_delete", task)
363 return
364 elif command == "show":
365 print("not implemented show with vim_account")
366 sys.stdout.flush()
367 return
368 elif command == "edit":
369 task = asyncio.ensure_future(self.vim.edit(params, order_id))
370 self.lcm_tasks.register("vim_account", vim_id, order_id, "vim_edit", task)
371 return
372 elif topic == "wim_account":
373 wim_id = params["_id"]
374 if command == "create":
375 task = asyncio.ensure_future(self.wim.create(params, order_id))
376 self.lcm_tasks.register("wim_account", wim_id, order_id, "wim_create", task)
377 return
378 elif command == "delete":
379 self.lcm_tasks.cancel(topic, wim_id)
380 task = asyncio.ensure_future(self.wim.delete(wim_id, order_id))
381 self.lcm_tasks.register("wim_account", wim_id, order_id, "wim_delete", task)
382 return
383 elif command == "show":
384 print("not implemented show with wim_account")
385 sys.stdout.flush()
386 return
387 elif command == "edit":
388 task = asyncio.ensure_future(self.wim.edit(params, order_id))
389 self.lcm_tasks.register("wim_account", wim_id, order_id, "wim_edit", task)
390 return
391 elif topic == "sdn":
392 _sdn_id = params["_id"]
393 if command == "create":
394 task = asyncio.ensure_future(self.sdn.create(params, order_id))
395 self.lcm_tasks.register("sdn", _sdn_id, order_id, "sdn_create", task)
396 return
397 elif command == "delete":
398 self.lcm_tasks.cancel(topic, _sdn_id)
399 task = asyncio.ensure_future(self.sdn.delete(_sdn_id, order_id))
400 self.lcm_tasks.register("sdn", _sdn_id, order_id, "sdn_delete", task)
401 return
402 elif command == "edit":
403 task = asyncio.ensure_future(self.sdn.edit(params, order_id))
404 self.lcm_tasks.register("sdn", _sdn_id, order_id, "sdn_edit", task)
405 return
406 self.logger.critical("unknown topic {} and command '{}'".format(topic, command))
407
408 async def kafka_read(self):
409 self.logger.debug("Task kafka_read Enter with worker_id={}".format(self.worker_id))
410 # future = asyncio.Future()
411 self.consecutive_errors = 0
412 self.first_start = True
413 while self.consecutive_errors < 10:
414 try:
415 topics = ("ns", "vim_account", "wim_account", "sdn", "nsi")
416 topics_admin = ("admin", )
417 await asyncio.gather(
418 self.msg.aioread(topics, self.loop, self.kafka_read_callback),
419 self.msg_admin.aioread(topics_admin, self.loop, self.kafka_read_callback, group_id=False)
420 )
421
422 except LcmExceptionExit:
423 self.logger.debug("Bye!")
424 break
425 except Exception as e:
426 # if not first_start is the first time after starting. So leave more time and wait
427 # to allow kafka starts
428 if self.consecutive_errors == 8 if not self.first_start else 30:
429 self.logger.error("Task kafka_read task exit error too many errors. Exception: {}".format(e))
430 raise
431 self.consecutive_errors += 1
432 self.logger.error("Task kafka_read retrying after Exception {}".format(e))
433 wait_time = 2 if not self.first_start else 5
434 await asyncio.sleep(wait_time, loop=self.loop)
435
436 # self.logger.debug("Task kafka_read terminating")
437 self.logger.debug("Task kafka_read exit")
438
439 def start(self):
440
441 # check RO version
442 self.loop.run_until_complete(self.check_RO_version())
443
444 self.loop.run_until_complete(asyncio.gather(
445 self.kafka_read(),
446 self.kafka_ping()
447 ))
448 # TODO
449 # self.logger.debug("Terminating cancelling creation tasks")
450 # self.lcm_tasks.cancel("ALL", "create")
451 # timeout = 200
452 # while self.is_pending_tasks():
453 # self.logger.debug("Task kafka_read terminating. Waiting for tasks termination")
454 # await asyncio.sleep(2, loop=self.loop)
455 # timeout -= 2
456 # if not timeout:
457 # self.lcm_tasks.cancel("ALL", "ALL")
458 self.loop.close()
459 self.loop = None
460 if self.db:
461 self.db.db_disconnect()
462 if self.msg:
463 self.msg.disconnect()
464 if self.msg_admin:
465 self.msg_admin.disconnect()
466 if self.fs:
467 self.fs.fs_disconnect()
468
469 def read_config_file(self, config_file):
470 # TODO make a [ini] + yaml inside parser
471 # the configparser library is not suitable, because it does not admit comments at the end of line,
472 # and not parse integer or boolean
473 try:
474 with open(config_file) as f:
475 conf = yaml.load(f)
476 for k, v in environ.items():
477 if not k.startswith("OSMLCM_"):
478 continue
479 k_items = k.lower().split("_")
480 if len(k_items) < 3:
481 continue
482 if k_items[1] in ("ro", "vca"):
483 # put in capital letter
484 k_items[1] = k_items[1].upper()
485 c = conf
486 try:
487 for k_item in k_items[1:-1]:
488 c = c[k_item]
489 if k_items[-1] == "port":
490 c[k_items[-1]] = int(v)
491 else:
492 c[k_items[-1]] = v
493 except Exception as e:
494 self.logger.warn("skipping environ '{}' on exception '{}'".format(k, e))
495
496 return conf
497 except Exception as e:
498 self.logger.critical("At config file '{}': {}".format(config_file, e))
499 exit(1)
500
501 @staticmethod
502 def get_process_id():
503 """
504 Obtain a unique ID for this process. If running from inside docker, it will get docker ID. If not it
505 will provide a random one
506 :return: Obtained ID
507 """
508 # Try getting docker id. If fails, get pid
509 try:
510 with open("/proc/self/cgroup", "r") as f:
511 text_id_ = f.readline()
512 _, _, text_id = text_id_.rpartition("/")
513 text_id = text_id.replace('\n', '')[:12]
514 if text_id:
515 return text_id
516 except Exception:
517 pass
518 # Return a random id
519 return ''.join(random_choice("0123456789abcdef") for _ in range(12))
520
521
522 def usage():
523 print("""Usage: {} [options]
524 -c|--config [configuration_file]: loads the configuration file (default: ./nbi.cfg)
525 --health-check: do not run lcm, but inspect kafka bus to determine if lcm is healthy
526 -h|--help: shows this help
527 """.format(sys.argv[0]))
528 # --log-socket-host HOST: send logs to this host")
529 # --log-socket-port PORT: send logs using this port (default: 9022)")
530
531
532 def health_check():
533 retry = 2
534 while retry:
535 retry -= 1
536 try:
537 with open(health_check_file, "r") as f:
538 last_received_ping = f.read()
539
540 if time() - float(last_received_ping) < Lcm.ping_interval_pace + 10:
541 exit(0)
542 except Exception:
543 pass
544 if retry:
545 sleep(6)
546 exit(1)
547
548
549 if __name__ == '__main__':
550 try:
551 # load parameters and configuration
552 opts, args = getopt.getopt(sys.argv[1:], "hc:", ["config=", "help", "health-check"])
553 # TODO add "log-socket-host=", "log-socket-port=", "log-file="
554 config_file = None
555 for o, a in opts:
556 if o in ("-h", "--help"):
557 usage()
558 sys.exit()
559 elif o in ("-c", "--config"):
560 config_file = a
561 elif o == "--health-check":
562 health_check()
563 # elif o == "--log-socket-port":
564 # log_socket_port = a
565 # elif o == "--log-socket-host":
566 # log_socket_host = a
567 # elif o == "--log-file":
568 # log_file = a
569 else:
570 assert False, "Unhandled option"
571 if config_file:
572 if not path.isfile(config_file):
573 print("configuration file '{}' not exist".format(config_file), file=sys.stderr)
574 exit(1)
575 else:
576 for config_file in (__file__[:__file__.rfind(".")] + ".cfg", "./lcm.cfg", "/etc/osm/lcm.cfg"):
577 if path.isfile(config_file):
578 break
579 else:
580 print("No configuration file 'lcm.cfg' found neither at local folder nor at /etc/osm/", file=sys.stderr)
581 exit(1)
582 lcm = Lcm(config_file)
583 lcm.start()
584 except (LcmException, getopt.GetoptError) as e:
585 print(str(e), file=sys.stderr)
586 # usage()
587 exit(1)