From: kuuse Date: Wed, 10 Jul 2019 11:52:45 +0000 (+0200) Subject: HA for vim, wim and sdn X-Git-Tag: v6.0.2rc1~6 X-Git-Url: https://osm.etsi.org/gitweb/?a=commitdiff_plain;h=refs%2Fchanges%2F67%2F7767%2F8;p=osm%2FLCM.git HA for vim, wim and sdn Change-Id: I14f9321519a07df32b5053f86d0e635b0abed4b9 Signed-off-by: kuuse --- diff --git a/osm_lcm/lcm.py b/osm_lcm/lcm.py index 27a44cb..57e8173 100644 --- a/osm_lcm/lcm.py +++ b/osm_lcm/lcm.py @@ -359,7 +359,7 @@ class Lcm: return elif command == "delete": self.lcm_tasks.cancel(topic, vim_id) - task = asyncio.ensure_future(self.vim.delete(vim_id, order_id)) + task = asyncio.ensure_future(self.vim.delete(params, order_id)) self.lcm_tasks.register("vim_account", vim_id, order_id, "vim_delete", task) return elif command == "show": @@ -378,7 +378,7 @@ class Lcm: return elif command == "delete": self.lcm_tasks.cancel(topic, wim_id) - task = asyncio.ensure_future(self.wim.delete(wim_id, order_id)) + task = asyncio.ensure_future(self.wim.delete(params, order_id)) self.lcm_tasks.register("wim_account", wim_id, order_id, "wim_delete", task) return elif command == "show": @@ -397,7 +397,7 @@ class Lcm: return elif command == "delete": self.lcm_tasks.cancel(topic, _sdn_id) - task = asyncio.ensure_future(self.sdn.delete(_sdn_id, order_id)) + task = asyncio.ensure_future(self.sdn.delete(params, order_id)) self.lcm_tasks.register("sdn", _sdn_id, order_id, "sdn_delete", task) return elif command == "edit": diff --git a/osm_lcm/lcm_utils.py b/osm_lcm/lcm_utils.py index b4d0455..bb53d7c 100644 --- a/osm_lcm/lcm_utils.py +++ b/osm_lcm/lcm_utils.py @@ -93,7 +93,22 @@ class TaskRegistry(LcmBase): - worker: the worker ID for this process """ - instance_id_label_dict = {'ns': 'nsInstanceId', 'nsi': 'netsliceInstanceId'} + # NS/NSI: "services" VIM/WIM/SDN: "accounts" + topic_service_list = ['ns', 'nsi'] + topic_account_list = ['vim', 'wim', 'sdn'] + + # Map topic to InstanceID + topic2instid_dict = { + 'ns': 'nsInstanceId', + 'nsi': 'netsliceInstanceId'} + + # Map topic to DB table name + topic2dbtable_dict = { + 'ns': 'nslcmops', + 'nsi': 'nsilcmops', + 'vim': 'vim_accounts', + 'wim': 'wim_accounts', + 'sdn': 'sdns'} def __init__(self, worker_id=None, db=None, logger=None): self.task_registry = { @@ -187,46 +202,184 @@ class TaskRegistry(LcmBase): # if result: # self.logger.debug("{} _id={} order_id={} task={} cancelled".format(topic, _id, op_id, task_name)) + # Is topic NS/NSI? + def _is_service_type_HA(self, topic): + return topic in self.topic_service_list + + # Is topic VIM/WIM/SDN? + def _is_account_type_HA(self, topic): + return topic in self.topic_account_list + + # Input: op_id, example: 'abc123def:3' Output: account_id='abc123def', op_index=3 + def _get_account_and_op_HA(self, op_id): + if not op_id: + return (None, None) + account_id, _, op_index = op_id.rpartition(':') + if not account_id: + return (None, None) + if not op_index.isdigit(): + return (None, None) + return account_id, op_index + + # Get '_id' for any topic and operation + def _get_instance_id_HA(self, topic, op_type, op_id): + _id = None + # Special operation 'ANY', for SDN account associated to a VIM account: op_id as '_id' + if op_type == 'ANY': + _id = op_id + # NS/NSI: Use op_id as '_id' + elif self._is_service_type_HA(topic): + _id = op_id + # VIM/SDN/WIM: Split op_id to get Account ID and Operation Index, use Account ID as '_id' + elif self._is_account_type_HA(topic): + _id, _ = self._get_account_and_op_HA(op_id) + return _id + + # Set DB _filter for querying any related process state + def _get_waitfor_filter_HA(self, db_lcmop, topic, op_type, op_id): + _filter = {} + # Special operation 'ANY', for SDN account associated to a VIM account: op_id as '_id' + # In this special case, the timestamp is ignored + if op_type == 'ANY': + _filter = {'operationState': 'PROCESSING'} + # Otherwise, get 'startTime' timestamp for this operation + else: + # NS/NSI + if self._is_service_type_HA(topic): + starttime_this_op = db_lcmop.get("startTime") + instance_id_label = self.topic2instid_dict.get(topic) + instance_id = db_lcmop.get(instance_id_label) + _filter = {instance_id_label: instance_id, + 'operationState': 'PROCESSING', + 'startTime.lt': starttime_this_op} + # VIM/WIM/SDN + elif self._is_account_type_HA(topic): + _, op_index = self._get_account_and_op_HA(op_id) + _ops = db_lcmop['_admin']['operations'] + _this_op = _ops[int(op_index)] + starttime_this_op = _this_op.get('startTime', None) + _filter = {'operationState': 'PROCESSING', + 'startTime.lt': starttime_this_op} + return _filter + + # Get DB params for any topic and operation + def _get_dbparams_for_lock_HA(self, topic, op_type, op_id): + q_filter = {} + update_dict = {} + # NS/NSI + if self._is_service_type_HA(topic): + q_filter = {'_id': op_id, '_admin.worker': None} + update_dict = {'_admin.worker': self.worker_id} + # VIM/WIM/SDN + elif self._is_account_type_HA(topic): + account_id, op_index = self._get_account_and_op_HA(op_id) + if not account_id: + return None, None + if op_type == 'create': + # Creating a VIM/WIM/SDN account implies setting '_admin.current_operation' = 0 + op_index = 0 + q_filter = {'_id': account_id, "_admin.operations.{}.worker".format(op_index): None} + update_dict = {'_admin.operations.{}.worker'.format(op_index): self.worker_id, + '_admin.current_operation': op_index} + return q_filter, update_dict + def lock_HA(self, topic, op_type, op_id): """ - Lock an task, if possible, to indicate to the HA system that + Lock a task, if possible, to indicate to the HA system that the task will be executed in this LCM instance. - :param topic: Can be "ns", "nsi" - :param op_type: Operation type, can be "nslcmops", "nsilcmops" - :param op_id: id of the operation of the related item + :param topic: Can be "ns", "nsi", "vim", "wim", or "sdn" + :param op_type: Operation type, can be "nslcmops", "nsilcmops", "create", "edit", "delete" + :param op_id: NS, NSI: Operation ID VIM,WIM,SDN: Account ID + ':' + Operation Index :return: - True=lock successful => execute the task (not registered by any other LCM instance) + True=lock was successful => execute the task (not registered by any other LCM instance) False=lock failed => do NOT execute the task (already registered by another LCM instance) + + HA tasks and backward compatibility: + If topic is "account type" (VIM/WIM/SDN) and op_id is None, 'op_id' was not provided by NBI. + This means that the running NBI instance does not support HA. + In such a case this method should always return True, to always execute + the task in this instance of LCM, without querying the DB. """ - db_lock_task = self.db.set_one(op_type, - q_filter={'_id': op_id, '_admin.worker': None}, - update_dict={'_admin.worker': self.worker_id}, - fail_on_empty=False) + # Backward compatibility for VIM/WIM/SDN without op_id + if self._is_account_type_HA(topic) and op_id is None: + return True + # Try to lock this task + db_table_name = self.topic2dbtable_dict.get(topic) + q_filter, update_dict = self._get_dbparams_for_lock_HA(topic, op_type, op_id) + db_lock_task = self.db.set_one(db_table_name, + q_filter=q_filter, + update_dict=update_dict, + fail_on_empty=False) if db_lock_task is None: self.logger.debug("Task {} operation={} already locked by another worker".format(topic, op_id)) return False else: + # Set 'detailed-status' to 'In progress' for VIM/WIM/SDN operations + if self._is_account_type_HA(topic): + detailed_status = 'In progress' + account_id, op_index = self._get_account_and_op_HA(op_id) + q_filter = {'_id': account_id} + update_dict = {'_admin.operations.{}.detailed-status'.format(op_index): detailed_status} + self.db.set_one(db_table_name, + q_filter=q_filter, + update_dict=update_dict, + fail_on_empty=False) return True + def register_HA(self, topic, op_type, op_id, operationState, detailed_status): + """ + Register a task, done when finished a VIM/WIM/SDN 'create' operation. + :param topic: Can be "vim", "wim", or "sdn" + :param op_type: Operation type, can be "create", "edit", "delete" + :param op_id: Account ID + ':' + Operation Index + :return: nothing + """ + + # Backward compatibility + if not self._is_account_type_HA(topic) or (self._is_account_type_HA(topic) and op_id is None): + return + + # Get Account ID and Operation Index + account_id, op_index = self._get_account_and_op_HA(op_id) + db_table_name = self.topic2dbtable_dict.get(topic) + + # If this is a 'delete' operation, the account may have been deleted (SUCCESS) or may still exist (FAILED) + # If the account exist, register the HA task. + # Update DB for HA tasks + q_filter = {'_id': account_id} + update_dict = {'_admin.operations.{}.operationState'.format(op_index): operationState, + '_admin.operations.{}.detailed-status'.format(op_index): detailed_status} + self.db.set_one(db_table_name, + q_filter=q_filter, + update_dict=update_dict, + fail_on_empty=False) + return + async def waitfor_related_HA(self, topic, op_type, op_id=None): """ Wait for any pending related HA tasks """ - # InstanceId label - instance_id_label = self.instance_id_label_dict.get(topic) + # Backward compatibility + if not (self._is_service_type_HA(topic) or self._is_account_type_HA(topic)) and (op_id is None): + return + + # Get DB table name + db_table_name = self.topic2dbtable_dict.get(topic) - # Get 'startTime' timestamp for this operation - step = "Getting timestamp for op_id={} from db".format(op_id) - db_lcmop = self.db.get_one(op_type, - {"_id": op_id}, + # Get instance ID + _id = self._get_instance_id_HA(topic, op_type, op_id) + _filter = {"_id": _id} + db_lcmop = self.db.get_one(db_table_name, + _filter, fail_on_empty=False) if not db_lcmop: return - starttime_this_op = db_lcmop.get("startTime") - instance_id = db_lcmop.get(instance_id_label) + + # Set DB _filter for querying any related process state + _filter = self._get_waitfor_filter_HA(db_lcmop, topic, op_type, op_id) # For HA, get list of tasks from DB instead of from dictionary (in-memory) variable. timeout_wait_for_task = 3600 # Max time (seconds) to wait for a related task to finish @@ -235,26 +388,34 @@ class TaskRegistry(LcmBase): time_left = timeout_wait_for_task old_num_related_tasks = 0 while True: - # Get related tasks (operations within the same NS or NSI instance) which are + # Get related tasks (operations within the same instance as this) which are # still running (operationState='PROCESSING') and which were started before this task. - _filter = {instance_id_label: instance_id, - 'operationState': 'PROCESSING', - 'startTime.lt': starttime_this_op} - db_waitfor_related_task = self.db.get_list(op_type, + # In the case of op_type='ANY', get any related tasks with operationState='PROCESSING', ignore timestamps. + db_waitfor_related_task = self.db.get_list(db_table_name, q_filter=_filter) new_num_related_tasks = len(db_waitfor_related_task) + # If there are no related tasks, there is nothing to wait for, so return. if not new_num_related_tasks: - # There are no related tasks, no need to wait, so return. return # If number of pending related tasks have changed, # update the 'detailed-status' field and log the change. - if new_num_related_tasks != old_num_related_tasks: - db_lcmops_update = {} - step = db_lcmops_update["detailed-status"] = \ - "Waiting for {} related tasks to be completed.".format( - new_num_related_tasks) - self.logger.debug("Task {} operation={} {}".format(topic, op_id, step)) - self.update_db_2(op_type, op_id, db_lcmops_update) + # Do NOT update the 'detailed-status' for SDNC-associated-to-VIM operations ('ANY'). + if (op_type != 'ANY') and (new_num_related_tasks != old_num_related_tasks): + step = "Waiting for {} related tasks to be completed.".format(new_num_related_tasks) + update_dict = {} + q_filter = {'_id': _id} + # NS/NSI + if self._is_service_type_HA(topic): + update_dict = {'detailed-status': step} + # VIM/WIM/SDN + elif self._is_account_type_HA(topic): + _, op_index = self._get_account_and_op_HA(op_id) + update_dict = {'_admin.operations.{}.detailed-status'.format(op_index): step} + self.logger.debug("Task {} operation={} {}".format(topic, _id, step)) + self.db.set_one(db_table_name, + q_filter=q_filter, + update_dict=update_dict, + fail_on_empty=False) old_num_related_tasks = new_num_related_tasks time_left -= interval_wait_for_task if time_left < 0: diff --git a/osm_lcm/vim_sdn.py b/osm_lcm/vim_sdn.py index 3fb24aa..1d782a5 100644 --- a/osm_lcm/vim_sdn.py +++ b/osm_lcm/vim_sdn.py @@ -16,7 +16,6 @@ # under the License. ## -import asyncio import logging import logging.handlers import ROclient @@ -47,20 +46,37 @@ class VimLcm(LcmBase): super().__init__(db, msg, fs, self.logger) async def create(self, vim_content, order_id): + + # HA tasks and backward compatibility: + # If 'vim_content' does not include 'op_id', we a running a legacy NBI version. + # In such a case, HA is not supported by NBI, 'op_id' is None, and lock_HA() will do nothing. + # Register 'create' task here for related future HA operations + op_id = vim_content.pop('op_id', None) + if not self.lcm_tasks.lock_HA('vim', 'create', op_id): + return + vim_id = vim_content["_id"] vim_content.pop("op_id", None) logging_text = "Task vim_create={} ".format(vim_id) self.logger.debug(logging_text + "Enter") + db_vim = None db_vim_update = {} exc = None RO_sdn_id = None + operationState_HA = '' + detailed_status_HA = '' try: step = "Getting vim-id='{}' from db".format(vim_id) db_vim = self.db.get_one("vim_accounts", {"_id": vim_id}) if vim_content.get("config") and vim_content["config"].get("sdn-controller"): step = "Getting sdn-controller-id='{}' from db".format(vim_content["config"]["sdn-controller"]) db_sdn = self.db.get_one("sdns", {"_id": vim_content["config"]["sdn-controller"]}) + + # If the VIM account has an associated SDN account, also + # wait for any previous tasks in process for the SDN + await self.lcm_tasks.waitfor_related_HA('sdn', 'ANY', db_sdn["_id"]) + if db_sdn.get("_admin") and db_sdn["_admin"].get("deployed") and db_sdn["_admin"]["deployed"].get("RO"): RO_sdn_id = db_sdn["_admin"]["deployed"]["RO"] else: @@ -118,6 +134,9 @@ class VimLcm(LcmBase): db_vim_update["_admin.deployed.RO-account"] = desc["uuid"] db_vim_update["_admin.operationalState"] = "ENABLED" db_vim_update["_admin.detailed-status"] = "Done" + # Mark the VIM 'create' HA task as successful + operationState_HA = 'COMPLETED' + detailed_status_HA = 'Done' # await asyncio.sleep(15) # TODO remove. This is for test self.logger.debug(logging_text + "Exit Ok VIM account created at RO_vim_account_id={}".format(desc["uuid"])) @@ -133,52 +152,58 @@ class VimLcm(LcmBase): if exc and db_vim: db_vim_update["_admin.operationalState"] = "ERROR" db_vim_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + # Mark the VIM 'create' HA task as erroneous + operationState_HA = 'FAILED' + detailed_status_HA = "ERROR {}: {}".format(step, exc) try: if db_vim_update: self.update_db_2("vim_accounts", vim_id, db_vim_update) + # Register the VIM 'create' HA task either + # succesful or erroneous, or do nothing (if legacy NBI) + self.lcm_tasks.register_HA('vim', 'create', op_id, + operationState=operationState_HA, + detailed_status=detailed_status_HA) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("vim_account", vim_id, order_id) async def edit(self, vim_content, order_id): + + # HA tasks and backward compatibility: + # If 'vim_content' does not include 'op_id', we a running a legacy NBI version. + # In such a case, HA is not supported by NBI, and the HA check always returns True + op_id = vim_content.pop('op_id', None) + if not self.lcm_tasks.lock_HA('vim', 'edit', op_id): + return + vim_id = vim_content["_id"] vim_content.pop("op_id", None) logging_text = "Task vim_edit={} ".format(vim_id) self.logger.debug(logging_text + "Enter") + db_vim = None exc = None RO_sdn_id = None RO_vim_id = None db_vim_update = {} + operationState_HA = '' + detailed_status_HA = '' step = "Getting vim-id='{}' from db".format(vim_id) try: - db_vim = self.db.get_one("vim_accounts", {"_id": vim_id}) + # wait for any previous tasks in process + await self.lcm_tasks.waitfor_related_HA('vim', 'edit', op_id) - # look if previous tasks in process - task_name, task_dependency = self.lcm_tasks.lookfor_related("vim_account", vim_id, order_id) - if task_dependency: - step = "Waiting for related tasks to be completed: {}".format(task_name) - self.logger.debug(logging_text + step) - # TODO write this to database - _, pending = await asyncio.wait(task_dependency, timeout=3600) - if pending: - raise LcmException("Timeout waiting related tasks to be completed") + db_vim = self.db.get_one("vim_accounts", {"_id": vim_id}) if db_vim.get("_admin") and db_vim["_admin"].get("deployed") and db_vim["_admin"]["deployed"].get("RO"): if vim_content.get("config") and vim_content["config"].get("sdn-controller"): step = "Getting sdn-controller-id='{}' from db".format(vim_content["config"]["sdn-controller"]) db_sdn = self.db.get_one("sdns", {"_id": vim_content["config"]["sdn-controller"]}) - # look if previous tasks in process - task_name, task_dependency = self.lcm_tasks.lookfor_related("sdn", db_sdn["_id"]) - if task_dependency: - step = "Waiting for related tasks to be completed: {}".format(task_name) - self.logger.debug(logging_text + step) - # TODO write this to database - _, pending = await asyncio.wait(task_dependency, timeout=3600) - if pending: - raise LcmException("Timeout waiting related tasks to be completed") + # If the VIM account has an associated SDN account, also + # wait for any previous tasks in process for the SDN + await self.lcm_tasks.waitfor_related_HA('sdn', 'ANY', db_sdn["_id"]) if db_sdn.get("_admin") and db_sdn["_admin"].get("deployed") and db_sdn["_admin"]["deployed"].get( "RO"): @@ -240,6 +265,9 @@ class VimLcm(LcmBase): # vim_thread. RO will remove and relaunch a new thread for this vim_account await RO.edit("vim_account", RO_vim_id, descriptor=vim_account_RO) db_vim_update["_admin.operationalState"] = "ENABLED" + # Mark the VIM 'edit' HA task as successful + operationState_HA = 'COMPLETED' + detailed_status_HA = 'Done' self.logger.debug(logging_text + "Exit Ok RO_vim_id={}".format(RO_vim_id)) return @@ -254,22 +282,45 @@ class VimLcm(LcmBase): if exc and db_vim: db_vim_update["_admin.operationalState"] = "ERROR" db_vim_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + # Mark the VIM 'edit' HA task as erroneous + operationState_HA = 'FAILED' + detailed_status_HA = "ERROR {}: {}".format(step, exc) try: if db_vim_update: self.update_db_2("vim_accounts", vim_id, db_vim_update) + # Register the VIM 'edit' HA task either + # succesful or erroneous, or do nothing (if legacy NBI) + self.lcm_tasks.register_HA('vim', 'edit', op_id, + operationState=operationState_HA, + detailed_status=detailed_status_HA) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("vim_account", vim_id, order_id) - async def delete(self, vim_id, order_id): + async def delete(self, vim_content, order_id): + + # HA tasks and backward compatibility: + # If 'vim_content' does not include 'op_id', we a running a legacy NBI version. + # In such a case, HA is not supported by NBI, and the HA check always returns True + op_id = vim_content.pop('op_id', None) + if not self.lcm_tasks.lock_HA('vim', 'delete', op_id): + return + + vim_id = vim_content["_id"] logging_text = "Task vim_delete={} ".format(vim_id) self.logger.debug(logging_text + "Enter") + db_vim = None db_vim_update = {} exc = None + operationState_HA = '' + detailed_status_HA = '' step = "Getting vim from db" try: + # wait for any previous tasks in process + await self.lcm_tasks.waitfor_related_HA('vim', 'delete', op_id) + db_vim = self.db.get_one("vim_accounts", {"_id": vim_id}) if db_vim.get("_admin") and db_vim["_admin"].get("deployed") and db_vim["_admin"]["deployed"].get("RO"): RO_vim_id = db_vim["_admin"]["deployed"]["RO"] @@ -293,7 +344,7 @@ class VimLcm(LcmBase): raise else: # nothing to delete - self.logger.error(logging_text + "Nohing to remove at RO") + self.logger.error(logging_text + "Nothing to remove at RO") self.db.del_one("vim_accounts", {"_id": vim_id}) db_vim = None self.logger.debug(logging_text + "Exit Ok") @@ -310,9 +361,17 @@ class VimLcm(LcmBase): if exc and db_vim: db_vim_update["_admin.operationalState"] = "ERROR" db_vim_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + # Mark the VIM 'delete' HA task as erroneous + operationState_HA = 'FAILED' + detailed_status_HA = "ERROR {}: {}".format(step, exc) + self.lcm_tasks.register_HA('vim', 'delete', op_id, + operationState=operationState_HA, + detailed_status=detailed_status_HA) try: if db_vim and db_vim_update: self.update_db_2("vim_accounts", vim_id, db_vim_update) + # If the VIM 'delete' HA task was succesful, the DB entry has been deleted, + # which means that there is nowhere to register this task, so do nothing here. except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("vim_account", vim_id, order_id) @@ -337,13 +396,24 @@ class WimLcm(LcmBase): super().__init__(db, msg, fs, self.logger) async def create(self, wim_content, order_id): + + # HA tasks and backward compatibility: + # If 'wim_content' does not include 'op_id', we a running a legacy NBI version. + # In such a case, HA is not supported by NBI, 'op_id' is None, and lock_HA() will do nothing. + # Register 'create' task here for related future HA operations + op_id = wim_content.pop('op_id', None) + self.lcm_tasks.lock_HA('wim', 'create', op_id) + wim_id = wim_content["_id"] wim_content.pop("op_id", None) logging_text = "Task wim_create={} ".format(wim_id) self.logger.debug(logging_text + "Enter") + db_wim = None db_wim_update = {} exc = None + operationState_HA = '' + detailed_status_HA = '' try: step = "Getting wim-id='{}' from db".format(wim_id) db_wim = self.db.get_one("wim_accounts", {"_id": wim_id}) @@ -393,6 +463,9 @@ class WimLcm(LcmBase): db_wim_update["_admin.deployed.RO-account"] = desc["uuid"] db_wim_update["_admin.operationalState"] = "ENABLED" db_wim_update["_admin.detailed-status"] = "Done" + # Mark the WIM 'create' HA task as successful + operationState_HA = 'COMPLETED' + detailed_status_HA = 'Done' self.logger.debug(logging_text + "Exit Ok WIM account created at RO_wim_account_id={}".format(desc["uuid"])) return @@ -407,35 +480,47 @@ class WimLcm(LcmBase): if exc and db_wim: db_wim_update["_admin.operationalState"] = "ERROR" db_wim_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + # Mark the WIM 'create' HA task as erroneous + operationState_HA = 'FAILED' + detailed_status_HA = "ERROR {}: {}".format(step, exc) try: if db_wim_update: self.update_db_2("wim_accounts", wim_id, db_wim_update) + # Register the WIM 'create' HA task either + # succesful or erroneous, or do nothing (if legacy NBI) + self.lcm_tasks.register_HA('wim', 'create', op_id, + operationState=operationState_HA, + detailed_status=detailed_status_HA) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("wim_account", wim_id, order_id) async def edit(self, wim_content, order_id): + + # HA tasks and backward compatibility: + # If 'wim_content' does not include 'op_id', we a running a legacy NBI version. + # In such a case, HA is not supported by NBI, and the HA check always returns True + op_id = wim_content.pop('op_id', None) + if not self.lcm_tasks.lock_HA('wim', 'edit', op_id): + return + wim_id = wim_content["_id"] wim_content.pop("op_id", None) logging_text = "Task wim_edit={} ".format(wim_id) self.logger.debug(logging_text + "Enter") + db_wim = None exc = None RO_wim_id = None db_wim_update = {} step = "Getting wim-id='{}' from db".format(wim_id) + operationState_HA = '' + detailed_status_HA = '' try: - db_wim = self.db.get_one("wim_accounts", {"_id": wim_id}) + # wait for any previous tasks in process + await self.lcm_tasks.waitfor_related_HA('wim', 'edit', op_id) - # look if previous tasks in process - task_name, task_dependency = self.lcm_tasks.lookfor_related("wim_account", wim_id, order_id) - if task_dependency: - step = "Waiting for related tasks to be completed: {}".format(task_name) - self.logger.debug(logging_text + step) - # TODO write this to database - _, pending = await asyncio.wait(task_dependency, timeout=3600) - if pending: - raise LcmException("Timeout waiting related tasks to be completed") + db_wim = self.db.get_one("wim_accounts", {"_id": wim_id}) if db_wim.get("_admin") and db_wim["_admin"].get("deployed") and db_wim["_admin"]["deployed"].get("RO"): @@ -486,6 +571,9 @@ class WimLcm(LcmBase): # wim_thread. RO will remove and relaunch a new thread for this wim_account await RO.edit("wim_account", RO_wim_id, descriptor=wim_account_RO) db_wim_update["_admin.operationalState"] = "ENABLED" + # Mark the WIM 'edit' HA task as successful + operationState_HA = 'COMPLETED' + detailed_status_HA = 'Done' self.logger.debug(logging_text + "Exit Ok RO_wim_id={}".format(RO_wim_id)) return @@ -500,21 +588,44 @@ class WimLcm(LcmBase): if exc and db_wim: db_wim_update["_admin.operationalState"] = "ERROR" db_wim_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + # Mark the WIM 'edit' HA task as erroneous + operationState_HA = 'FAILED' + detailed_status_HA = "ERROR {}: {}".format(step, exc) try: if db_wim_update: self.update_db_2("wim_accounts", wim_id, db_wim_update) + # Register the WIM 'edit' HA task either + # succesful or erroneous, or do nothing (if legacy NBI) + self.lcm_tasks.register_HA('wim', 'edit', op_id, + operationState=operationState_HA, + detailed_status=detailed_status_HA) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("wim_account", wim_id, order_id) - async def delete(self, wim_id, order_id): + async def delete(self, wim_content, order_id): + + # HA tasks and backward compatibility: + # If 'vim_content' does not include 'op_id', we a running a legacy NBI version. + # In such a case, HA is not supported by NBI, and the HA check always returns True + op_id = wim_content.pop('op_id', None) + if not self.lcm_tasks.lock_HA('wim', 'delete', op_id): + return + + wim_id = wim_content["_id"] logging_text = "Task wim_delete={} ".format(wim_id) self.logger.debug(logging_text + "Enter") + db_wim = None db_wim_update = {} exc = None step = "Getting wim from db" + operationState_HA = '' + detailed_status_HA = '' try: + # wait for any previous tasks in process + await self.lcm_tasks.waitfor_related_HA('wim', 'delete', op_id) + db_wim = self.db.get_one("wim_accounts", {"_id": wim_id}) if db_wim.get("_admin") and db_wim["_admin"].get("deployed") and db_wim["_admin"]["deployed"].get("RO"): RO_wim_id = db_wim["_admin"]["deployed"]["RO"] @@ -555,9 +666,17 @@ class WimLcm(LcmBase): if exc and db_wim: db_wim_update["_admin.operationalState"] = "ERROR" db_wim_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + # Mark the WIM 'delete' HA task as erroneous + operationState_HA = 'FAILED' + detailed_status_HA = "ERROR {}: {}".format(step, exc) + self.lcm_tasks.register_HA('wim', 'delete', op_id, + operationState=operationState_HA, + detailed_status=detailed_status_HA) try: if db_wim and db_wim_update: self.update_db_2("wim_accounts", wim_id, db_wim_update) + # If the WIM 'delete' HA task was succesful, the DB entry has been deleted, + # which means that there is nowhere to register this task, so do nothing here. except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("wim_account", wim_id, order_id) @@ -580,14 +699,25 @@ class SdnLcm(LcmBase): super().__init__(db, msg, fs, self.logger) async def create(self, sdn_content, order_id): + + # HA tasks and backward compatibility: + # If 'sdn_content' does not include 'op_id', we a running a legacy NBI version. + # In such a case, HA is not supported by NBI, 'op_id' is None, and lock_HA() will do nothing. + # Register 'create' task here for related future HA operations + op_id = sdn_content.pop('op_id', None) + self.lcm_tasks.lock_HA('sdn', 'create', op_id) + sdn_id = sdn_content["_id"] sdn_content.pop("op_id", None) logging_text = "Task sdn_create={} ".format(sdn_id) self.logger.debug(logging_text + "Enter") + db_sdn = None db_sdn_update = {} RO_sdn_id = None exc = None + operationState_HA = '' + detailed_status_HA = '' try: step = "Getting sdn from db" db_sdn = self.db.get_one("sdns", {"_id": sdn_id}) @@ -612,6 +742,9 @@ class SdnLcm(LcmBase): db_sdn_update["_admin.deployed.RO"] = RO_sdn_id db_sdn_update["_admin.operationalState"] = "ENABLED" self.logger.debug(logging_text + "Exit Ok RO_sdn_id={}".format(RO_sdn_id)) + # Mark the SDN 'create' HA task as successful + operationState_HA = 'COMPLETED' + detailed_status_HA = 'Done' return except (ROclient.ROClientException, DbException) as e: @@ -624,23 +757,45 @@ class SdnLcm(LcmBase): if exc and db_sdn: db_sdn_update["_admin.operationalState"] = "ERROR" db_sdn_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + # Mark the SDN 'create' HA task as erroneous + operationState_HA = 'FAILED' + detailed_status_HA = "ERROR {}: {}".format(step, exc) try: if db_sdn and db_sdn_update: self.update_db_2("sdns", sdn_id, db_sdn_update) + # Register the SDN 'create' HA task either + # succesful or erroneous, or do nothing (if legacy NBI) + self.lcm_tasks.register_HA('sdn', 'create', op_id, + operationState=operationState_HA, + detailed_status=detailed_status_HA) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("sdn", sdn_id, order_id) async def edit(self, sdn_content, order_id): + + # HA tasks and backward compatibility: + # If 'sdn_content' does not include 'op_id', we a running a legacy NBI version. + # In such a case, HA is not supported by NBI, and the HA check always returns True + op_id = sdn_content.pop('op_id', None) + if not self.lcm_tasks.lock_HA('sdn', 'edit', op_id): + return + sdn_id = sdn_content["_id"] sdn_content.pop("op_id", None) logging_text = "Task sdn_edit={} ".format(sdn_id) self.logger.debug(logging_text + "Enter") + db_sdn = None db_sdn_update = {} exc = None + operationState_HA = '' + detailed_status_HA = '' step = "Getting sdn from db" try: + # wait for any previous tasks in process + await self.lcm_tasks.waitfor_related_HA('sdn', 'edit', op_id) + db_sdn = self.db.get_one("sdns", {"_id": sdn_id}) RO_sdn_id = None if db_sdn.get("_admin") and db_sdn["_admin"].get("deployed") and db_sdn["_admin"]["deployed"].get("RO"): @@ -658,6 +813,9 @@ class SdnLcm(LcmBase): if sdn_RO: await RO.edit("sdn", RO_sdn_id, descriptor=sdn_RO) db_sdn_update["_admin.operationalState"] = "ENABLED" + # Mark the SDN 'edit' HA task as successful + operationState_HA = 'COMPLETED' + detailed_status_HA = 'Done' self.logger.debug(logging_text + "Exit Ok RO_sdn_id={}".format(RO_sdn_id)) return @@ -672,21 +830,44 @@ class SdnLcm(LcmBase): if exc and db_sdn: db_sdn["_admin.operationalState"] = "ERROR" db_sdn["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + # Mark the SDN 'edit' HA task as erroneous + operationState_HA = 'FAILED' + detailed_status_HA = "ERROR {}: {}".format(step, exc) try: if db_sdn_update: self.update_db_2("sdns", sdn_id, db_sdn_update) + # Register the SDN 'edit' HA task either + # succesful or erroneous, or do nothing (if legacy NBI) + self.lcm_tasks.register_HA('sdn', 'edit', op_id, + operationState=operationState_HA, + detailed_status=detailed_status_HA) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("sdn", sdn_id, order_id) - async def delete(self, sdn_id, order_id): + async def delete(self, sdn_content, order_id): + + # HA tasks and backward compatibility: + # If 'vim_content' does not include 'op_id', we a running a legacy NBI version. + # In such a case, HA is not supported by NBI, and the HA check always returns True + op_id = sdn_content.pop('op_id', None) + if not self.lcm_tasks.lock_HA('sdn', 'delete', op_id): + return + + sdn_id = sdn_content["_id"] logging_text = "Task sdn_delete={} ".format(sdn_id) self.logger.debug(logging_text + "Enter") + db_sdn = None db_sdn_update = {} exc = None + operationState_HA = '' + detailed_status_HA = '' step = "Getting sdn from db" try: + # wait for any previous tasks in process + await self.lcm_tasks.waitfor_related_HA('sdn', 'delete', op_id) + db_sdn = self.db.get_one("sdns", {"_id": sdn_id}) if db_sdn.get("_admin") and db_sdn["_admin"].get("deployed") and db_sdn["_admin"]["deployed"].get("RO"): RO_sdn_id = db_sdn["_admin"]["deployed"]["RO"] @@ -717,9 +898,17 @@ class SdnLcm(LcmBase): if exc and db_sdn: db_sdn["_admin.operationalState"] = "ERROR" db_sdn["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + # Mark the SDN 'delete' HA task as erroneous + operationState_HA = 'FAILED' + detailed_status_HA = "ERROR {}: {}".format(step, exc) + self.lcm_tasks.register_HA('sdn', 'delete', op_id, + operationState=operationState_HA, + detailed_status=detailed_status_HA) try: if db_sdn and db_sdn_update: self.update_db_2("sdns", sdn_id, db_sdn_update) + # If the SDN 'delete' HA task was succesful, the DB entry has been deleted, + # which means that there is nowhere to register this task, so do nothing here. except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("sdn", sdn_id, order_id)