3b8da7d982cf1d9ce615235b501899b36bfddec2
[osm/LCM.git] / osm_lcm / lcm.py
1 #!/usr/bin/python3
2 # -*- coding: utf-8 -*-
3
4 ##
5 # Copyright 2018 Telefonica S.A.
6 #
7 # Licensed under the Apache License, Version 2.0 (the "License"); you may
8 # not use this file except in compliance with the License. You may obtain
9 # a copy of the License at
10 #
11 # http://www.apache.org/licenses/LICENSE-2.0
12 #
13 # Unless required by applicable law or agreed to in writing, software
14 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
16 # License for the specific language governing permissions and limitations
17 # under the License.
18 ##
19
20 import asyncio
21 import yaml
22 import logging
23 import logging.handlers
24 import getopt
25 import sys
26
27 from osm_lcm import ROclient, ns, vim_sdn, netslice
28 from time import time, sleep
29 from osm_lcm.lcm_utils import versiontuple, LcmException, TaskRegistry, LcmExceptionExit
30 from osm_lcm import version as lcm_version, version_date as lcm_version_date
31
32 from osm_common import dbmemory, dbmongo, fslocal, msglocal, msgkafka
33 from osm_common import version as common_version
34 from osm_common.dbbase import DbException
35 from osm_common.fsbase import FsException
36 from osm_common.msgbase import MsgException
37 from os import environ, path
38 from random import choice as random_choice
39 from n2vc import version as n2vc_version
40
41
42 __author__ = "Alfonso Tierno"
43 min_RO_version = "0.6.3"
44 min_n2vc_version = "0.0.2"
45 min_common_version = "0.1.19"
46 # uncomment if LCM is installed as library and installed, and get them from __init__.py
47 # lcm_version = '0.1.41'
48 # lcm_version_date = '2019-06-19'
49 health_check_file = path.expanduser("~") + "/time_last_ping" # TODO find better location for this file
50
51
52 class Lcm:
53
54 ping_interval_pace = 120 # how many time ping is send once is confirmed all is running
55 ping_interval_boot = 5 # how many time ping is sent when booting
56
57 def __init__(self, config_file, loop=None):
58 """
59 Init, Connect to database, filesystem storage, and messaging
60 :param config: two level dictionary with configuration. Top level should contain 'database', 'storage',
61 :return: None
62 """
63
64 self.db = None
65 self.msg = None
66 self.msg_admin = None
67 self.fs = None
68 self.pings_not_received = 1
69 self.consecutive_errors = 0
70 self.first_start = False
71
72 # logging
73 self.logger = logging.getLogger('lcm')
74 # get id
75 self.worker_id = self.get_process_id()
76 # load configuration
77 config = self.read_config_file(config_file)
78 self.config = config
79 self.ro_config = {
80 "endpoint_url": "http://{}:{}/openmano".format(config["RO"]["host"], config["RO"]["port"]),
81 "tenant": config.get("tenant", "osm"),
82 "logger_name": "lcm.ROclient",
83 "loglevel": "ERROR",
84 }
85
86 self.vca_config = config["VCA"]
87
88 self.loop = loop or asyncio.get_event_loop()
89
90 # logging
91 log_format_simple = "%(asctime)s %(levelname)s %(name)s %(filename)s:%(lineno)s %(message)s"
92 log_formatter_simple = logging.Formatter(log_format_simple, datefmt='%Y-%m-%dT%H:%M:%S')
93 config["database"]["logger_name"] = "lcm.db"
94 config["storage"]["logger_name"] = "lcm.fs"
95 config["message"]["logger_name"] = "lcm.msg"
96 if config["global"].get("logfile"):
97 file_handler = logging.handlers.RotatingFileHandler(config["global"]["logfile"],
98 maxBytes=100e6, backupCount=9, delay=0)
99 file_handler.setFormatter(log_formatter_simple)
100 self.logger.addHandler(file_handler)
101 if not config["global"].get("nologging"):
102 str_handler = logging.StreamHandler()
103 str_handler.setFormatter(log_formatter_simple)
104 self.logger.addHandler(str_handler)
105
106 if config["global"].get("loglevel"):
107 self.logger.setLevel(config["global"]["loglevel"])
108
109 # logging other modules
110 for k1, logname in {"message": "lcm.msg", "database": "lcm.db", "storage": "lcm.fs"}.items():
111 config[k1]["logger_name"] = logname
112 logger_module = logging.getLogger(logname)
113 if config[k1].get("logfile"):
114 file_handler = logging.handlers.RotatingFileHandler(config[k1]["logfile"],
115 maxBytes=100e6, backupCount=9, delay=0)
116 file_handler.setFormatter(log_formatter_simple)
117 logger_module.addHandler(file_handler)
118 if config[k1].get("loglevel"):
119 logger_module.setLevel(config[k1]["loglevel"])
120 self.logger.critical("starting osm/lcm version {} {}".format(lcm_version, lcm_version_date))
121
122 # check version of N2VC
123 # TODO enhance with int conversion or from distutils.version import LooseVersion
124 # or with list(map(int, version.split(".")))
125 if versiontuple(n2vc_version) < versiontuple(min_n2vc_version):
126 raise LcmException("Not compatible osm/N2VC version '{}'. Needed '{}' or higher".format(
127 n2vc_version, min_n2vc_version))
128 # check version of common
129 if versiontuple(common_version) < versiontuple(min_common_version):
130 raise LcmException("Not compatible osm/common version '{}'. Needed '{}' or higher".format(
131 common_version, min_common_version))
132
133 try:
134 # TODO check database version
135 if config["database"]["driver"] == "mongo":
136 self.db = dbmongo.DbMongo()
137 self.db.db_connect(config["database"])
138 elif config["database"]["driver"] == "memory":
139 self.db = dbmemory.DbMemory()
140 self.db.db_connect(config["database"])
141 else:
142 raise LcmException("Invalid configuration param '{}' at '[database]':'driver'".format(
143 config["database"]["driver"]))
144
145 if config["storage"]["driver"] == "local":
146 self.fs = fslocal.FsLocal()
147 self.fs.fs_connect(config["storage"])
148 else:
149 raise LcmException("Invalid configuration param '{}' at '[storage]':'driver'".format(
150 config["storage"]["driver"]))
151
152 config_message = config["message"].copy()
153 config_message["loop"] = self.loop
154 if config_message["driver"] == "local":
155 self.msg = msglocal.MsgLocal()
156 self.msg.connect(config_message)
157 self.msg_admin = msglocal.MsgLocal()
158 config_message.pop("group_id", None)
159 self.msg_admin.connect(config_message)
160 elif config_message["driver"] == "kafka":
161 self.msg = msgkafka.MsgKafka()
162 self.msg.connect(config_message)
163 self.msg_admin = msgkafka.MsgKafka()
164 config_message.pop("group_id", None)
165 self.msg_admin.connect(config_message)
166 else:
167 raise LcmException("Invalid configuration param '{}' at '[message]':'driver'".format(
168 config["message"]["driver"]))
169 except (DbException, FsException, MsgException) as e:
170 self.logger.critical(str(e), exc_info=True)
171 raise LcmException(str(e))
172
173 # contains created tasks/futures to be able to cancel
174 self.lcm_tasks = TaskRegistry(self.worker_id, self.db, self.logger)
175
176 self.ns = ns.NsLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, self.vca_config, self.loop)
177 self.netslice = netslice.NetsliceLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config,
178 self.vca_config, self.loop)
179 self.vim = vim_sdn.VimLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, self.loop)
180 self.wim = vim_sdn.WimLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, self.loop)
181 self.sdn = vim_sdn.SdnLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.ro_config, self.loop)
182
183 async def check_RO_version(self):
184 try:
185 RO = ROclient.ROClient(self.loop, **self.ro_config)
186 RO_version = await RO.get_version()
187 if versiontuple(RO_version) < versiontuple(min_RO_version):
188 raise LcmException("Not compatible osm/RO version '{}.{}.{}'. Needed '{}.{}.{}' or higher".format(
189 *RO_version, *min_RO_version
190 ))
191 except ROclient.ROClientException as e:
192 error_text = "Error while conneting to osm/RO " + str(e)
193 self.logger.critical(error_text, exc_info=True)
194 raise LcmException(error_text)
195
196 async def test(self, param=None):
197 self.logger.debug("Starting/Ending test task: {}".format(param))
198
199 async def kafka_ping(self):
200 self.logger.debug("Task kafka_ping Enter")
201 consecutive_errors = 0
202 first_start = True
203 kafka_has_received = False
204 self.pings_not_received = 1
205 while True:
206 try:
207 await self.msg_admin.aiowrite(
208 "admin", "ping",
209 {"from": "lcm", "to": "lcm", "worker_id": self.worker_id, "version": lcm_version},
210 self.loop)
211 # time between pings are low when it is not received and at starting
212 wait_time = self.ping_interval_boot if not kafka_has_received else self.ping_interval_pace
213 if not self.pings_not_received:
214 kafka_has_received = True
215 self.pings_not_received += 1
216 await asyncio.sleep(wait_time, loop=self.loop)
217 if self.pings_not_received > 10:
218 raise LcmException("It is not receiving pings from Kafka bus")
219 consecutive_errors = 0
220 first_start = False
221 except LcmException:
222 raise
223 except Exception as e:
224 # if not first_start is the first time after starting. So leave more time and wait
225 # to allow kafka starts
226 if consecutive_errors == 8 if not first_start else 30:
227 self.logger.error("Task kafka_read task exit error too many errors. Exception: {}".format(e))
228 raise
229 consecutive_errors += 1
230 self.logger.error("Task kafka_read retrying after Exception {}".format(e))
231 wait_time = 2 if not first_start else 5
232 await asyncio.sleep(wait_time, loop=self.loop)
233
234 def kafka_read_callback(self, topic, command, params):
235 order_id = 1
236
237 if topic != "admin" and command != "ping":
238 self.logger.debug("Task kafka_read receives {} {}: {}".format(topic, command, params))
239 self.consecutive_errors = 0
240 self.first_start = False
241 order_id += 1
242 if command == "exit":
243 raise LcmExceptionExit
244 elif command.startswith("#"):
245 return
246 elif command == "echo":
247 # just for test
248 print(params)
249 sys.stdout.flush()
250 return
251 elif command == "test":
252 asyncio.Task(self.test(params), loop=self.loop)
253 return
254
255 if topic == "admin":
256 if command == "ping" and params["to"] == "lcm" and params["from"] == "lcm":
257 if params.get("worker_id") != self.worker_id:
258 return
259 self.pings_not_received = 0
260 try:
261 with open(health_check_file, "w") as f:
262 f.write(str(time()))
263 except Exception as e:
264 self.logger.error("Cannot write into '{}' for healthcheck: {}".format(health_check_file, e))
265 return
266 elif topic == "ns":
267 if command == "instantiate":
268 # self.logger.debug("Deploying NS {}".format(nsr_id))
269 nslcmop = params
270 nslcmop_id = nslcmop["_id"]
271 nsr_id = nslcmop["nsInstanceId"]
272 task = asyncio.ensure_future(self.ns.instantiate(nsr_id, nslcmop_id))
273 self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "ns_instantiate", task)
274 return
275 elif command == "terminate":
276 # self.logger.debug("Deleting NS {}".format(nsr_id))
277 nslcmop = params
278 nslcmop_id = nslcmop["_id"]
279 nsr_id = nslcmop["nsInstanceId"]
280 self.lcm_tasks.cancel(topic, nsr_id)
281 task = asyncio.ensure_future(self.ns.terminate(nsr_id, nslcmop_id))
282 self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "ns_terminate", task)
283 return
284 elif command == "action":
285 # self.logger.debug("Update NS {}".format(nsr_id))
286 nslcmop = params
287 nslcmop_id = nslcmop["_id"]
288 nsr_id = nslcmop["nsInstanceId"]
289 task = asyncio.ensure_future(self.ns.action(nsr_id, nslcmop_id))
290 self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "ns_action", task)
291 return
292 elif command == "scale":
293 # self.logger.debug("Update NS {}".format(nsr_id))
294 nslcmop = params
295 nslcmop_id = nslcmop["_id"]
296 nsr_id = nslcmop["nsInstanceId"]
297 task = asyncio.ensure_future(self.ns.scale(nsr_id, nslcmop_id))
298 self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "ns_scale", task)
299 return
300 elif command == "show":
301 nsr_id = params
302 try:
303 db_nsr = self.db.get_one("nsrs", {"_id": nsr_id})
304 print("nsr:\n _id={}\n operational-status: {}\n config-status: {}"
305 "\n detailed-status: {}\n deploy: {}\n tasks: {}"
306 "".format(nsr_id, db_nsr["operational-status"], db_nsr["config-status"],
307 db_nsr["detailed-status"],
308 db_nsr["_admin"]["deployed"], self.lcm_ns_tasks.get(nsr_id)))
309 except Exception as e:
310 print("nsr {} not found: {}".format(nsr_id, e))
311 sys.stdout.flush()
312 return
313 elif command == "deleted":
314 return # TODO cleaning of task just in case should be done
315 elif command in ("terminated", "instantiated", "scaled", "actioned"): # "scaled-cooldown-time"
316 return
317 elif topic == "nsi": # netslice LCM processes (instantiate, terminate, etc)
318 if command == "instantiate":
319 # self.logger.debug("Instantiating Network Slice {}".format(nsilcmop["netsliceInstanceId"]))
320 nsilcmop = params
321 nsilcmop_id = nsilcmop["_id"] # slice operation id
322 nsir_id = nsilcmop["netsliceInstanceId"] # slice record id
323 task = asyncio.ensure_future(self.netslice.instantiate(nsir_id, nsilcmop_id))
324 self.lcm_tasks.register("nsi", nsir_id, nsilcmop_id, "nsi_instantiate", task)
325 return
326 elif command == "terminate":
327 # self.logger.debug("Terminating Network Slice NS {}".format(nsilcmop["netsliceInstanceId"]))
328 nsilcmop = params
329 nsilcmop_id = nsilcmop["_id"] # slice operation id
330 nsir_id = nsilcmop["netsliceInstanceId"] # slice record id
331 self.lcm_tasks.cancel(topic, nsir_id)
332 task = asyncio.ensure_future(self.netslice.terminate(nsir_id, nsilcmop_id))
333 self.lcm_tasks.register("nsi", nsir_id, nsilcmop_id, "nsi_terminate", task)
334 return
335 elif command == "show":
336 nsir_id = params
337 try:
338 db_nsir = self.db.get_one("nsirs", {"_id": nsir_id})
339 print("nsir:\n _id={}\n operational-status: {}\n config-status: {}"
340 "\n detailed-status: {}\n deploy: {}\n tasks: {}"
341 "".format(nsir_id, db_nsir["operational-status"], db_nsir["config-status"],
342 db_nsir["detailed-status"],
343 db_nsir["_admin"]["deployed"], self.lcm_netslice_tasks.get(nsir_id)))
344 except Exception as e:
345 print("nsir {} not found: {}".format(nsir_id, e))
346 sys.stdout.flush()
347 return
348 elif command == "deleted":
349 return # TODO cleaning of task just in case should be done
350 elif command in ("terminated", "instantiated", "scaled", "actioned"): # "scaled-cooldown-time"
351 return
352 elif topic == "vim_account":
353 vim_id = params["_id"]
354 if command == "create":
355 task = asyncio.ensure_future(self.vim.create(params, order_id))
356 self.lcm_tasks.register("vim_account", vim_id, order_id, "vim_create", task)
357 return
358 elif command == "delete":
359 self.lcm_tasks.cancel(topic, vim_id)
360 task = asyncio.ensure_future(self.vim.delete(params, order_id))
361 self.lcm_tasks.register("vim_account", vim_id, order_id, "vim_delete", task)
362 return
363 elif command == "show":
364 print("not implemented show with vim_account")
365 sys.stdout.flush()
366 return
367 elif command == "edit":
368 task = asyncio.ensure_future(self.vim.edit(params, order_id))
369 self.lcm_tasks.register("vim_account", vim_id, order_id, "vim_edit", task)
370 return
371 elif topic == "wim_account":
372 wim_id = params["_id"]
373 if command == "create":
374 task = asyncio.ensure_future(self.wim.create(params, order_id))
375 self.lcm_tasks.register("wim_account", wim_id, order_id, "wim_create", task)
376 return
377 elif command == "delete":
378 self.lcm_tasks.cancel(topic, wim_id)
379 task = asyncio.ensure_future(self.wim.delete(params, order_id))
380 self.lcm_tasks.register("wim_account", wim_id, order_id, "wim_delete", task)
381 return
382 elif command == "show":
383 print("not implemented show with wim_account")
384 sys.stdout.flush()
385 return
386 elif command == "edit":
387 task = asyncio.ensure_future(self.wim.edit(params, order_id))
388 self.lcm_tasks.register("wim_account", wim_id, order_id, "wim_edit", task)
389 return
390 elif topic == "sdn":
391 _sdn_id = params["_id"]
392 if command == "create":
393 task = asyncio.ensure_future(self.sdn.create(params, order_id))
394 self.lcm_tasks.register("sdn", _sdn_id, order_id, "sdn_create", task)
395 return
396 elif command == "delete":
397 self.lcm_tasks.cancel(topic, _sdn_id)
398 task = asyncio.ensure_future(self.sdn.delete(params, order_id))
399 self.lcm_tasks.register("sdn", _sdn_id, order_id, "sdn_delete", task)
400 return
401 elif command == "edit":
402 task = asyncio.ensure_future(self.sdn.edit(params, order_id))
403 self.lcm_tasks.register("sdn", _sdn_id, order_id, "sdn_edit", task)
404 return
405 self.logger.critical("unknown topic {} and command '{}'".format(topic, command))
406
407 async def kafka_read(self):
408 self.logger.debug("Task kafka_read Enter with worker_id={}".format(self.worker_id))
409 # future = asyncio.Future()
410 self.consecutive_errors = 0
411 self.first_start = True
412 while self.consecutive_errors < 10:
413 try:
414 topics = ("ns", "vim_account", "wim_account", "sdn", "nsi")
415 topics_admin = ("admin", )
416 await asyncio.gather(
417 self.msg.aioread(topics, self.loop, self.kafka_read_callback),
418 self.msg_admin.aioread(topics_admin, self.loop, self.kafka_read_callback, group_id=False)
419 )
420
421 except LcmExceptionExit:
422 self.logger.debug("Bye!")
423 break
424 except Exception as e:
425 # if not first_start is the first time after starting. So leave more time and wait
426 # to allow kafka starts
427 if self.consecutive_errors == 8 if not self.first_start else 30:
428 self.logger.error("Task kafka_read task exit error too many errors. Exception: {}".format(e))
429 raise
430 self.consecutive_errors += 1
431 self.logger.error("Task kafka_read retrying after Exception {}".format(e))
432 wait_time = 2 if not self.first_start else 5
433 await asyncio.sleep(wait_time, loop=self.loop)
434
435 # self.logger.debug("Task kafka_read terminating")
436 self.logger.debug("Task kafka_read exit")
437
438 def start(self):
439
440 # check RO version
441 self.loop.run_until_complete(self.check_RO_version())
442
443 self.loop.run_until_complete(asyncio.gather(
444 self.kafka_read(),
445 self.kafka_ping()
446 ))
447 # TODO
448 # self.logger.debug("Terminating cancelling creation tasks")
449 # self.lcm_tasks.cancel("ALL", "create")
450 # timeout = 200
451 # while self.is_pending_tasks():
452 # self.logger.debug("Task kafka_read terminating. Waiting for tasks termination")
453 # await asyncio.sleep(2, loop=self.loop)
454 # timeout -= 2
455 # if not timeout:
456 # self.lcm_tasks.cancel("ALL", "ALL")
457 self.loop.close()
458 self.loop = None
459 if self.db:
460 self.db.db_disconnect()
461 if self.msg:
462 self.msg.disconnect()
463 if self.msg_admin:
464 self.msg_admin.disconnect()
465 if self.fs:
466 self.fs.fs_disconnect()
467
468 def read_config_file(self, config_file):
469 # TODO make a [ini] + yaml inside parser
470 # the configparser library is not suitable, because it does not admit comments at the end of line,
471 # and not parse integer or boolean
472 try:
473 with open(config_file) as f:
474 conf = yaml.load(f)
475 for k, v in environ.items():
476 if not k.startswith("OSMLCM_"):
477 continue
478 k_items = k.lower().split("_")
479 if len(k_items) < 3:
480 continue
481 if k_items[1] in ("ro", "vca"):
482 # put in capital letter
483 k_items[1] = k_items[1].upper()
484 c = conf
485 try:
486 for k_item in k_items[1:-1]:
487 c = c[k_item]
488 if k_items[-1] == "port":
489 c[k_items[-1]] = int(v)
490 else:
491 c[k_items[-1]] = v
492 except Exception as e:
493 self.logger.warn("skipping environ '{}' on exception '{}'".format(k, e))
494
495 return conf
496 except Exception as e:
497 self.logger.critical("At config file '{}': {}".format(config_file, e))
498 exit(1)
499
500 @staticmethod
501 def get_process_id():
502 """
503 Obtain a unique ID for this process. If running from inside docker, it will get docker ID. If not it
504 will provide a random one
505 :return: Obtained ID
506 """
507 # Try getting docker id. If fails, get pid
508 try:
509 with open("/proc/self/cgroup", "r") as f:
510 text_id_ = f.readline()
511 _, _, text_id = text_id_.rpartition("/")
512 text_id = text_id.replace('\n', '')[:12]
513 if text_id:
514 return text_id
515 except Exception:
516 pass
517 # Return a random id
518 return ''.join(random_choice("0123456789abcdef") for _ in range(12))
519
520
521 def usage():
522 print("""Usage: {} [options]
523 -c|--config [configuration_file]: loads the configuration file (default: ./nbi.cfg)
524 --health-check: do not run lcm, but inspect kafka bus to determine if lcm is healthy
525 -h|--help: shows this help
526 """.format(sys.argv[0]))
527 # --log-socket-host HOST: send logs to this host")
528 # --log-socket-port PORT: send logs using this port (default: 9022)")
529
530
531 def health_check():
532 retry = 2
533 while retry:
534 retry -= 1
535 try:
536 with open(health_check_file, "r") as f:
537 last_received_ping = f.read()
538
539 if time() - float(last_received_ping) < Lcm.ping_interval_pace + 10:
540 exit(0)
541 except Exception:
542 pass
543 if retry:
544 sleep(6)
545 exit(1)
546
547
548 if __name__ == '__main__':
549 try:
550 # load parameters and configuration
551 opts, args = getopt.getopt(sys.argv[1:], "hc:", ["config=", "help", "health-check"])
552 # TODO add "log-socket-host=", "log-socket-port=", "log-file="
553 config_file = None
554 for o, a in opts:
555 if o in ("-h", "--help"):
556 usage()
557 sys.exit()
558 elif o in ("-c", "--config"):
559 config_file = a
560 elif o == "--health-check":
561 health_check()
562 # elif o == "--log-socket-port":
563 # log_socket_port = a
564 # elif o == "--log-socket-host":
565 # log_socket_host = a
566 # elif o == "--log-file":
567 # log_file = a
568 else:
569 assert False, "Unhandled option"
570 if config_file:
571 if not path.isfile(config_file):
572 print("configuration file '{}' not exist".format(config_file), file=sys.stderr)
573 exit(1)
574 else:
575 for config_file in (__file__[:__file__.rfind(".")] + ".cfg", "./lcm.cfg", "/etc/osm/lcm.cfg"):
576 if path.isfile(config_file):
577 break
578 else:
579 print("No configuration file 'lcm.cfg' found neither at local folder nor at /etc/osm/", file=sys.stderr)
580 exit(1)
581 lcm = Lcm(config_file)
582 lcm.start()
583 except (LcmException, getopt.GetoptError) as e:
584 print(str(e), file=sys.stderr)
585 # usage()
586 exit(1)