Prepare LCM tasks for HA
[osm/LCM.git] / osm_lcm / lcm_utils.py
1 # -*- coding: utf-8 -*-
2
3 ##
4 # Copyright 2018 Telefonica S.A.
5 #
6 # Licensed under the Apache License, Version 2.0 (the "License"); you may
7 # not use this file except in compliance with the License. You may obtain
8 # a copy of the License at
9 #
10 # http://www.apache.org/licenses/LICENSE-2.0
11 #
12 # Unless required by applicable law or agreed to in writing, software
13 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15 # License for the specific language governing permissions and limitations
16 # under the License.
17 ##
18
19 import asyncio
20 from collections import OrderedDict
21 # from osm_common.dbbase import DbException
22
23 __author__ = "Alfonso Tierno"
24
25
26 class LcmException(Exception):
27 pass
28
29
30 class LcmExceptionNoMgmtIP(LcmException):
31 pass
32
33
34 class LcmExceptionExit(LcmException):
35 pass
36
37
38 def versiontuple(v):
39 """utility for compare dot separate versions. Fills with zeros to proper number comparison
40 package version will be something like 4.0.1.post11+gb3f024d.dirty-1. Where 4.0.1 is the git tag, postXX is the
41 number of commits from this tag, and +XXXXXXX is the git commit short id. Total length is 16 with until 999 commits
42 """
43 filled = []
44 for point in v.split("."):
45 filled.append(point.zfill(16))
46 return tuple(filled)
47
48
49 # LcmBase must be listed before TaskRegistry, as it is a dependency.
50 class LcmBase:
51
52 def __init__(self, db, msg, fs, logger):
53 """
54
55 :param db: database connection
56 """
57 self.db = db
58 self.msg = msg
59 self.fs = fs
60 self.logger = logger
61
62 def update_db_2(self, item, _id, _desc):
63 """
64 Updates database with _desc information. If success _desc is cleared
65 :param item:
66 :param _id:
67 :param _desc: dictionary with the content to update. Keys are dot separated keys for
68 :return: None. Exception is raised on error
69 """
70 if not _desc:
71 return
72 self.db.set_one(item, {"_id": _id}, _desc)
73 _desc.clear()
74 # except DbException as e:
75 # self.logger.error("Updating {} _id={} with '{}'. Error: {}".format(item, _id, _desc, e))
76
77
78 class TaskRegistry(LcmBase):
79 """
80 Implements a registry of task needed for later cancelation, look for related tasks that must be completed before
81 etc. It stores a four level dict
82 First level is the topic, ns, vim_account, sdn
83 Second level is the _id
84 Third level is the operation id
85 Fourth level is a descriptive name, the value is the task class
86
87 The HA (High-Availability) methods are used when more than one LCM instance is running.
88 To register the current task in the external DB, use LcmBase as base class, to be able
89 to reuse LcmBase.update_db_2()
90 The DB registry uses the following fields to distinguish a task:
91 - op_type: operation type ("nslcmops" or "nsilcmops")
92 - op_id: operation ID
93 - worker: the worker ID for this process
94 """
95
96 instance_id_label_dict = {'ns': 'nsInstanceId', 'nsi': 'netsliceInstanceId'}
97
98 def __init__(self, worker_id=None, db=None, logger=None):
99 self.task_registry = {
100 "ns": {},
101 "nsi": {},
102 "vim_account": {},
103 "wim_account": {},
104 "sdn": {},
105 }
106 self.worker_id = worker_id
107 self.db = db
108 self.logger = logger
109
110 def register(self, topic, _id, op_id, task_name, task):
111 """
112 Register a new task
113 :param topic: Can be "ns", "nsi", "vim_account", "sdn"
114 :param _id: _id of the related item
115 :param op_id: id of the operation of the related item
116 :param task_name: Task descriptive name, as create, instantiate, terminate. Must be unique in this op_id
117 :param task: Task class
118 :return: none
119 """
120 if _id not in self.task_registry[topic]:
121 self.task_registry[topic][_id] = OrderedDict()
122 if op_id not in self.task_registry[topic][_id]:
123 self.task_registry[topic][_id][op_id] = {task_name: task}
124 else:
125 self.task_registry[topic][_id][op_id][task_name] = task
126 # print("registering task", topic, _id, op_id, task_name, task)
127
128 def remove(self, topic, _id, op_id, task_name=None):
129 """
130 When task is ended, it should be removed. It ignores missing tasks. It also removes tasks done with this _id
131 :param topic: Can be "ns", "nsi", "vim_account", "sdn"
132 :param _id: _id of the related item
133 :param op_id: id of the operation of the related item
134 :param task_name: Task descriptive name. If none it deletes all tasks with same _id and op_id
135 :return: None
136 """
137 if not self.task_registry[topic].get(_id):
138 return
139 if not task_name:
140 self.task_registry[topic][_id].pop(op_id, None)
141 elif self.task_registry[topic][_id].get(op_id):
142 self.task_registry[topic][_id][op_id].pop(task_name, None)
143
144 # delete done tasks
145 for op_id_ in list(self.task_registry[topic][_id]):
146 for name, task in self.task_registry[topic][_id][op_id_].items():
147 if not task.done():
148 break
149 else:
150 del self.task_registry[topic][_id][op_id_]
151 if not self.task_registry[topic][_id]:
152 del self.task_registry[topic][_id]
153
154 def lookfor_related(self, topic, _id, my_op_id=None):
155 task_list = []
156 task_name_list = []
157 if _id not in self.task_registry[topic]:
158 return "", task_name_list
159 for op_id in reversed(self.task_registry[topic][_id]):
160 if my_op_id:
161 if my_op_id == op_id:
162 my_op_id = None # so that the next task is taken
163 continue
164
165 for task_name, task in self.task_registry[topic][_id][op_id].items():
166 if not task.done():
167 task_list.append(task)
168 task_name_list.append(task_name)
169 break
170 return ", ".join(task_name_list), task_list
171
172 def cancel(self, topic, _id, target_op_id=None, target_task_name=None):
173 """
174 Cancel all active tasks of a concrete ns, nsi, vim_account, sdn identified for _id. If op_id is supplied only
175 this is cancelled, and the same with task_name
176 """
177 if not self.task_registry[topic].get(_id):
178 return
179 for op_id in reversed(self.task_registry[topic][_id]):
180 if target_op_id and target_op_id != op_id:
181 continue
182 for task_name, task in self.task_registry[topic][_id][op_id].items():
183 if target_task_name and target_task_name != task_name:
184 continue
185 # result =
186 task.cancel()
187 # if result:
188 # self.logger.debug("{} _id={} order_id={} task={} cancelled".format(topic, _id, op_id, task_name))
189
190 def lock_HA(self, topic, op_type, op_id):
191 """
192 Lock an task, if possible, to indicate to the HA system that
193 the task will be executed in this LCM instance.
194 :param topic: Can be "ns", "nsi"
195 :param op_type: Operation type, can be "nslcmops", "nsilcmops"
196 :param op_id: id of the operation of the related item
197 :return:
198 True=lock successful => execute the task (not registered by any other LCM instance)
199 False=lock failed => do NOT execute the task (already registered by another LCM instance)
200 """
201
202 db_lock_task = self.db.set_one(op_type,
203 q_filter={'_id': op_id, '_admin.worker': None},
204 update_dict={'_admin.worker': self.worker_id},
205 fail_on_empty=False)
206
207 if db_lock_task is None:
208 self.logger.debug("Task {} operation={} already locked by another worker".format(topic, op_id))
209 return False
210 else:
211 return True
212
213 async def waitfor_related_HA(self, topic, op_type, op_id=None):
214 """
215 Wait for any pending related HA tasks
216 """
217
218 # InstanceId label
219 instance_id_label = self.instance_id_label_dict.get(topic)
220
221 # Get 'startTime' timestamp for this operation
222 step = "Getting timestamp for op_id={} from db".format(op_id)
223 db_lcmop = self.db.get_one(op_type,
224 {"_id": op_id},
225 fail_on_empty=False)
226 if not db_lcmop:
227 return
228 starttime_this_op = db_lcmop.get("startTime")
229 instance_id = db_lcmop.get(instance_id_label)
230
231 # For HA, get list of tasks from DB instead of from dictionary (in-memory) variable.
232 timeout_wait_for_task = 3600 # Max time (seconds) to wait for a related task to finish
233 # interval_wait_for_task = 30 # A too long polling interval slows things down considerably
234 interval_wait_for_task = 10 # Interval in seconds for polling related tasks
235 time_left = timeout_wait_for_task
236 old_num_related_tasks = 0
237 while True:
238 # Get related tasks (operations within the same NS or NSI instance) which are
239 # still running (operationState='PROCESSING') and which were started before this task.
240 _filter = {instance_id_label: instance_id,
241 'operationState': 'PROCESSING',
242 'startTime.lt': starttime_this_op}
243 db_waitfor_related_task = self.db.get_list(op_type,
244 q_filter=_filter)
245 new_num_related_tasks = len(db_waitfor_related_task)
246 if not new_num_related_tasks:
247 # There are no related tasks, no need to wait, so return.
248 return
249 # If number of pending related tasks have changed,
250 # update the 'detailed-status' field and log the change.
251 if new_num_related_tasks != old_num_related_tasks:
252 db_lcmops_update = {}
253 step = db_lcmops_update["detailed-status"] = \
254 "Waiting for {} related tasks to be completed.".format(
255 new_num_related_tasks)
256 self.logger.debug("Task {} operation={} {}".format(topic, op_id, step))
257 self.update_db_2(op_type, op_id, db_lcmops_update)
258 old_num_related_tasks = new_num_related_tasks
259 time_left -= interval_wait_for_task
260 if time_left < 0:
261 raise LcmException(
262 "Timeout ({}) when waiting for related tasks to be completed".format(
263 timeout_wait_for_task))
264 await asyncio.sleep(interval_wait_for_task)
265
266 return