HA for vim, wim and sdn
[osm/LCM.git] / osm_lcm / lcm_utils.py
1 # -*- coding: utf-8 -*-
2
3 ##
4 # Copyright 2018 Telefonica S.A.
5 #
6 # Licensed under the Apache License, Version 2.0 (the "License"); you may
7 # not use this file except in compliance with the License. You may obtain
8 # a copy of the License at
9 #
10 # http://www.apache.org/licenses/LICENSE-2.0
11 #
12 # Unless required by applicable law or agreed to in writing, software
13 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15 # License for the specific language governing permissions and limitations
16 # under the License.
17 ##
18
19 import asyncio
20 from collections import OrderedDict
21 # from osm_common.dbbase import DbException
22
23 __author__ = "Alfonso Tierno"
24
25
26 class LcmException(Exception):
27 pass
28
29
30 class LcmExceptionNoMgmtIP(LcmException):
31 pass
32
33
34 class LcmExceptionExit(LcmException):
35 pass
36
37
38 def versiontuple(v):
39 """utility for compare dot separate versions. Fills with zeros to proper number comparison
40 package version will be something like 4.0.1.post11+gb3f024d.dirty-1. Where 4.0.1 is the git tag, postXX is the
41 number of commits from this tag, and +XXXXXXX is the git commit short id. Total length is 16 with until 999 commits
42 """
43 filled = []
44 for point in v.split("."):
45 filled.append(point.zfill(16))
46 return tuple(filled)
47
48
49 # LcmBase must be listed before TaskRegistry, as it is a dependency.
50 class LcmBase:
51
52 def __init__(self, db, msg, fs, logger):
53 """
54
55 :param db: database connection
56 """
57 self.db = db
58 self.msg = msg
59 self.fs = fs
60 self.logger = logger
61
62 def update_db_2(self, item, _id, _desc):
63 """
64 Updates database with _desc information. If success _desc is cleared
65 :param item:
66 :param _id:
67 :param _desc: dictionary with the content to update. Keys are dot separated keys for
68 :return: None. Exception is raised on error
69 """
70 if not _desc:
71 return
72 self.db.set_one(item, {"_id": _id}, _desc)
73 _desc.clear()
74 # except DbException as e:
75 # self.logger.error("Updating {} _id={} with '{}'. Error: {}".format(item, _id, _desc, e))
76
77
78 class TaskRegistry(LcmBase):
79 """
80 Implements a registry of task needed for later cancelation, look for related tasks that must be completed before
81 etc. It stores a four level dict
82 First level is the topic, ns, vim_account, sdn
83 Second level is the _id
84 Third level is the operation id
85 Fourth level is a descriptive name, the value is the task class
86
87 The HA (High-Availability) methods are used when more than one LCM instance is running.
88 To register the current task in the external DB, use LcmBase as base class, to be able
89 to reuse LcmBase.update_db_2()
90 The DB registry uses the following fields to distinguish a task:
91 - op_type: operation type ("nslcmops" or "nsilcmops")
92 - op_id: operation ID
93 - worker: the worker ID for this process
94 """
95
96 # NS/NSI: "services" VIM/WIM/SDN: "accounts"
97 topic_service_list = ['ns', 'nsi']
98 topic_account_list = ['vim', 'wim', 'sdn']
99
100 # Map topic to InstanceID
101 topic2instid_dict = {
102 'ns': 'nsInstanceId',
103 'nsi': 'netsliceInstanceId'}
104
105 # Map topic to DB table name
106 topic2dbtable_dict = {
107 'ns': 'nslcmops',
108 'nsi': 'nsilcmops',
109 'vim': 'vim_accounts',
110 'wim': 'wim_accounts',
111 'sdn': 'sdns'}
112
113 def __init__(self, worker_id=None, db=None, logger=None):
114 self.task_registry = {
115 "ns": {},
116 "nsi": {},
117 "vim_account": {},
118 "wim_account": {},
119 "sdn": {},
120 }
121 self.worker_id = worker_id
122 self.db = db
123 self.logger = logger
124
125 def register(self, topic, _id, op_id, task_name, task):
126 """
127 Register a new task
128 :param topic: Can be "ns", "nsi", "vim_account", "sdn"
129 :param _id: _id of the related item
130 :param op_id: id of the operation of the related item
131 :param task_name: Task descriptive name, as create, instantiate, terminate. Must be unique in this op_id
132 :param task: Task class
133 :return: none
134 """
135 if _id not in self.task_registry[topic]:
136 self.task_registry[topic][_id] = OrderedDict()
137 if op_id not in self.task_registry[topic][_id]:
138 self.task_registry[topic][_id][op_id] = {task_name: task}
139 else:
140 self.task_registry[topic][_id][op_id][task_name] = task
141 # print("registering task", topic, _id, op_id, task_name, task)
142
143 def remove(self, topic, _id, op_id, task_name=None):
144 """
145 When task is ended, it should be removed. It ignores missing tasks. It also removes tasks done with this _id
146 :param topic: Can be "ns", "nsi", "vim_account", "sdn"
147 :param _id: _id of the related item
148 :param op_id: id of the operation of the related item
149 :param task_name: Task descriptive name. If none it deletes all tasks with same _id and op_id
150 :return: None
151 """
152 if not self.task_registry[topic].get(_id):
153 return
154 if not task_name:
155 self.task_registry[topic][_id].pop(op_id, None)
156 elif self.task_registry[topic][_id].get(op_id):
157 self.task_registry[topic][_id][op_id].pop(task_name, None)
158
159 # delete done tasks
160 for op_id_ in list(self.task_registry[topic][_id]):
161 for name, task in self.task_registry[topic][_id][op_id_].items():
162 if not task.done():
163 break
164 else:
165 del self.task_registry[topic][_id][op_id_]
166 if not self.task_registry[topic][_id]:
167 del self.task_registry[topic][_id]
168
169 def lookfor_related(self, topic, _id, my_op_id=None):
170 task_list = []
171 task_name_list = []
172 if _id not in self.task_registry[topic]:
173 return "", task_name_list
174 for op_id in reversed(self.task_registry[topic][_id]):
175 if my_op_id:
176 if my_op_id == op_id:
177 my_op_id = None # so that the next task is taken
178 continue
179
180 for task_name, task in self.task_registry[topic][_id][op_id].items():
181 if not task.done():
182 task_list.append(task)
183 task_name_list.append(task_name)
184 break
185 return ", ".join(task_name_list), task_list
186
187 def cancel(self, topic, _id, target_op_id=None, target_task_name=None):
188 """
189 Cancel all active tasks of a concrete ns, nsi, vim_account, sdn identified for _id. If op_id is supplied only
190 this is cancelled, and the same with task_name
191 """
192 if not self.task_registry[topic].get(_id):
193 return
194 for op_id in reversed(self.task_registry[topic][_id]):
195 if target_op_id and target_op_id != op_id:
196 continue
197 for task_name, task in self.task_registry[topic][_id][op_id].items():
198 if target_task_name and target_task_name != task_name:
199 continue
200 # result =
201 task.cancel()
202 # if result:
203 # self.logger.debug("{} _id={} order_id={} task={} cancelled".format(topic, _id, op_id, task_name))
204
205 # Is topic NS/NSI?
206 def _is_service_type_HA(self, topic):
207 return topic in self.topic_service_list
208
209 # Is topic VIM/WIM/SDN?
210 def _is_account_type_HA(self, topic):
211 return topic in self.topic_account_list
212
213 # Input: op_id, example: 'abc123def:3' Output: account_id='abc123def', op_index=3
214 def _get_account_and_op_HA(self, op_id):
215 if not op_id:
216 return (None, None)
217 account_id, _, op_index = op_id.rpartition(':')
218 if not account_id:
219 return (None, None)
220 if not op_index.isdigit():
221 return (None, None)
222 return account_id, op_index
223
224 # Get '_id' for any topic and operation
225 def _get_instance_id_HA(self, topic, op_type, op_id):
226 _id = None
227 # Special operation 'ANY', for SDN account associated to a VIM account: op_id as '_id'
228 if op_type == 'ANY':
229 _id = op_id
230 # NS/NSI: Use op_id as '_id'
231 elif self._is_service_type_HA(topic):
232 _id = op_id
233 # VIM/SDN/WIM: Split op_id to get Account ID and Operation Index, use Account ID as '_id'
234 elif self._is_account_type_HA(topic):
235 _id, _ = self._get_account_and_op_HA(op_id)
236 return _id
237
238 # Set DB _filter for querying any related process state
239 def _get_waitfor_filter_HA(self, db_lcmop, topic, op_type, op_id):
240 _filter = {}
241 # Special operation 'ANY', for SDN account associated to a VIM account: op_id as '_id'
242 # In this special case, the timestamp is ignored
243 if op_type == 'ANY':
244 _filter = {'operationState': 'PROCESSING'}
245 # Otherwise, get 'startTime' timestamp for this operation
246 else:
247 # NS/NSI
248 if self._is_service_type_HA(topic):
249 starttime_this_op = db_lcmop.get("startTime")
250 instance_id_label = self.topic2instid_dict.get(topic)
251 instance_id = db_lcmop.get(instance_id_label)
252 _filter = {instance_id_label: instance_id,
253 'operationState': 'PROCESSING',
254 'startTime.lt': starttime_this_op}
255 # VIM/WIM/SDN
256 elif self._is_account_type_HA(topic):
257 _, op_index = self._get_account_and_op_HA(op_id)
258 _ops = db_lcmop['_admin']['operations']
259 _this_op = _ops[int(op_index)]
260 starttime_this_op = _this_op.get('startTime', None)
261 _filter = {'operationState': 'PROCESSING',
262 'startTime.lt': starttime_this_op}
263 return _filter
264
265 # Get DB params for any topic and operation
266 def _get_dbparams_for_lock_HA(self, topic, op_type, op_id):
267 q_filter = {}
268 update_dict = {}
269 # NS/NSI
270 if self._is_service_type_HA(topic):
271 q_filter = {'_id': op_id, '_admin.worker': None}
272 update_dict = {'_admin.worker': self.worker_id}
273 # VIM/WIM/SDN
274 elif self._is_account_type_HA(topic):
275 account_id, op_index = self._get_account_and_op_HA(op_id)
276 if not account_id:
277 return None, None
278 if op_type == 'create':
279 # Creating a VIM/WIM/SDN account implies setting '_admin.current_operation' = 0
280 op_index = 0
281 q_filter = {'_id': account_id, "_admin.operations.{}.worker".format(op_index): None}
282 update_dict = {'_admin.operations.{}.worker'.format(op_index): self.worker_id,
283 '_admin.current_operation': op_index}
284 return q_filter, update_dict
285
286 def lock_HA(self, topic, op_type, op_id):
287 """
288 Lock a task, if possible, to indicate to the HA system that
289 the task will be executed in this LCM instance.
290 :param topic: Can be "ns", "nsi", "vim", "wim", or "sdn"
291 :param op_type: Operation type, can be "nslcmops", "nsilcmops", "create", "edit", "delete"
292 :param op_id: NS, NSI: Operation ID VIM,WIM,SDN: Account ID + ':' + Operation Index
293 :return:
294 True=lock was successful => execute the task (not registered by any other LCM instance)
295 False=lock failed => do NOT execute the task (already registered by another LCM instance)
296
297 HA tasks and backward compatibility:
298 If topic is "account type" (VIM/WIM/SDN) and op_id is None, 'op_id' was not provided by NBI.
299 This means that the running NBI instance does not support HA.
300 In such a case this method should always return True, to always execute
301 the task in this instance of LCM, without querying the DB.
302 """
303
304 # Backward compatibility for VIM/WIM/SDN without op_id
305 if self._is_account_type_HA(topic) and op_id is None:
306 return True
307
308 # Try to lock this task
309 db_table_name = self.topic2dbtable_dict.get(topic)
310 q_filter, update_dict = self._get_dbparams_for_lock_HA(topic, op_type, op_id)
311 db_lock_task = self.db.set_one(db_table_name,
312 q_filter=q_filter,
313 update_dict=update_dict,
314 fail_on_empty=False)
315 if db_lock_task is None:
316 self.logger.debug("Task {} operation={} already locked by another worker".format(topic, op_id))
317 return False
318 else:
319 # Set 'detailed-status' to 'In progress' for VIM/WIM/SDN operations
320 if self._is_account_type_HA(topic):
321 detailed_status = 'In progress'
322 account_id, op_index = self._get_account_and_op_HA(op_id)
323 q_filter = {'_id': account_id}
324 update_dict = {'_admin.operations.{}.detailed-status'.format(op_index): detailed_status}
325 self.db.set_one(db_table_name,
326 q_filter=q_filter,
327 update_dict=update_dict,
328 fail_on_empty=False)
329 return True
330
331 def register_HA(self, topic, op_type, op_id, operationState, detailed_status):
332 """
333 Register a task, done when finished a VIM/WIM/SDN 'create' operation.
334 :param topic: Can be "vim", "wim", or "sdn"
335 :param op_type: Operation type, can be "create", "edit", "delete"
336 :param op_id: Account ID + ':' + Operation Index
337 :return: nothing
338 """
339
340 # Backward compatibility
341 if not self._is_account_type_HA(topic) or (self._is_account_type_HA(topic) and op_id is None):
342 return
343
344 # Get Account ID and Operation Index
345 account_id, op_index = self._get_account_and_op_HA(op_id)
346 db_table_name = self.topic2dbtable_dict.get(topic)
347
348 # If this is a 'delete' operation, the account may have been deleted (SUCCESS) or may still exist (FAILED)
349 # If the account exist, register the HA task.
350 # Update DB for HA tasks
351 q_filter = {'_id': account_id}
352 update_dict = {'_admin.operations.{}.operationState'.format(op_index): operationState,
353 '_admin.operations.{}.detailed-status'.format(op_index): detailed_status}
354 self.db.set_one(db_table_name,
355 q_filter=q_filter,
356 update_dict=update_dict,
357 fail_on_empty=False)
358 return
359
360 async def waitfor_related_HA(self, topic, op_type, op_id=None):
361 """
362 Wait for any pending related HA tasks
363 """
364
365 # Backward compatibility
366 if not (self._is_service_type_HA(topic) or self._is_account_type_HA(topic)) and (op_id is None):
367 return
368
369 # Get DB table name
370 db_table_name = self.topic2dbtable_dict.get(topic)
371
372 # Get instance ID
373 _id = self._get_instance_id_HA(topic, op_type, op_id)
374 _filter = {"_id": _id}
375 db_lcmop = self.db.get_one(db_table_name,
376 _filter,
377 fail_on_empty=False)
378 if not db_lcmop:
379 return
380
381 # Set DB _filter for querying any related process state
382 _filter = self._get_waitfor_filter_HA(db_lcmop, topic, op_type, op_id)
383
384 # For HA, get list of tasks from DB instead of from dictionary (in-memory) variable.
385 timeout_wait_for_task = 3600 # Max time (seconds) to wait for a related task to finish
386 # interval_wait_for_task = 30 # A too long polling interval slows things down considerably
387 interval_wait_for_task = 10 # Interval in seconds for polling related tasks
388 time_left = timeout_wait_for_task
389 old_num_related_tasks = 0
390 while True:
391 # Get related tasks (operations within the same instance as this) which are
392 # still running (operationState='PROCESSING') and which were started before this task.
393 # In the case of op_type='ANY', get any related tasks with operationState='PROCESSING', ignore timestamps.
394 db_waitfor_related_task = self.db.get_list(db_table_name,
395 q_filter=_filter)
396 new_num_related_tasks = len(db_waitfor_related_task)
397 # If there are no related tasks, there is nothing to wait for, so return.
398 if not new_num_related_tasks:
399 return
400 # If number of pending related tasks have changed,
401 # update the 'detailed-status' field and log the change.
402 # Do NOT update the 'detailed-status' for SDNC-associated-to-VIM operations ('ANY').
403 if (op_type != 'ANY') and (new_num_related_tasks != old_num_related_tasks):
404 step = "Waiting for {} related tasks to be completed.".format(new_num_related_tasks)
405 update_dict = {}
406 q_filter = {'_id': _id}
407 # NS/NSI
408 if self._is_service_type_HA(topic):
409 update_dict = {'detailed-status': step}
410 # VIM/WIM/SDN
411 elif self._is_account_type_HA(topic):
412 _, op_index = self._get_account_and_op_HA(op_id)
413 update_dict = {'_admin.operations.{}.detailed-status'.format(op_index): step}
414 self.logger.debug("Task {} operation={} {}".format(topic, _id, step))
415 self.db.set_one(db_table_name,
416 q_filter=q_filter,
417 update_dict=update_dict,
418 fail_on_empty=False)
419 old_num_related_tasks = new_num_related_tasks
420 time_left -= interval_wait_for_task
421 if time_left < 0:
422 raise LcmException(
423 "Timeout ({}) when waiting for related tasks to be completed".format(
424 timeout_wait_for_task))
425 await asyncio.sleep(interval_wait_for_task)
426
427 return