X-Git-Url: https://osm.etsi.org/gitweb/?p=osm%2FLCM.git;a=blobdiff_plain;f=osm_lcm%2Fvim_sdn.py;h=a44aa9bd48f137aa9e9d647a64b7eeb2d9981900;hp=3fb24aa594529dd8daafeb2bd05877db0f60a229;hb=2357f4e54515ce965fcc5723d274c12ec12c01c7;hpb=2d9f6f537a05514b93dea9fd54a8001a17afcc53 diff --git a/osm_lcm/vim_sdn.py b/osm_lcm/vim_sdn.py index 3fb24aa..a44aa9b 100644 --- a/osm_lcm/vim_sdn.py +++ b/osm_lcm/vim_sdn.py @@ -16,13 +16,19 @@ # under the License. ## +import yaml import asyncio import logging import logging.handlers -import ROclient -from lcm_utils import LcmException, LcmBase +from osm_lcm import ROclient +from osm_lcm.lcm_utils import LcmException, LcmBase, deep_get +from n2vc.k8s_helm_conn import K8sHelmConnector +from n2vc.k8s_helm3_conn import K8sHelm3Connector +from n2vc.k8s_juju_conn import K8sJujuConnector +from n2vc.exceptions import K8sException, N2VCException from osm_common.dbbase import DbException from copy import deepcopy +from time import time __author__ = "Alfonso Tierno" @@ -32,7 +38,7 @@ class VimLcm(LcmBase): vim_config_encrypted = {"1.1": ("admin_password", "nsx_password", "vcenter_password"), "default": ("admin_password", "nsx_password", "vcenter_password", "vrops_password")} - def __init__(self, db, msg, fs, lcm_tasks, ro_config, loop): + def __init__(self, db, msg, fs, lcm_tasks, config, loop): """ Init, Connect to database, filesystem storage, and messaging :param config: two level dictionary with configuration. Top level should contain 'database', 'storage', @@ -42,15 +48,24 @@ class VimLcm(LcmBase): self.logger = logging.getLogger('lcm.vim') self.loop = loop self.lcm_tasks = lcm_tasks - self.ro_config = ro_config + self.ro_config = config["ro_config"] super().__init__(db, msg, fs, self.logger) async def create(self, vim_content, order_id): + + # HA tasks and backward compatibility: + # If 'vim_content' does not include 'op_id', we a running a legacy NBI version. + # In such a case, HA is not supported by NBI, 'op_id' is None, and lock_HA() will do nothing. + # Register 'create' task here for related future HA operations + op_id = vim_content.pop('op_id', None) + if not self.lcm_tasks.lock_HA('vim', 'create', op_id): + return + vim_id = vim_content["_id"] - vim_content.pop("op_id", None) logging_text = "Task vim_create={} ".format(vim_id) self.logger.debug(logging_text + "Enter") + db_vim = None db_vim_update = {} exc = None @@ -61,6 +76,11 @@ class VimLcm(LcmBase): if vim_content.get("config") and vim_content["config"].get("sdn-controller"): step = "Getting sdn-controller-id='{}' from db".format(vim_content["config"]["sdn-controller"]) db_sdn = self.db.get_one("sdns", {"_id": vim_content["config"]["sdn-controller"]}) + + # If the VIM account has an associated SDN account, also + # wait for any previous tasks in process for the SDN + await self.lcm_tasks.waitfor_related_HA('sdn', 'ANY', db_sdn["_id"]) + if db_sdn.get("_admin") and db_sdn["_admin"].get("deployed") and db_sdn["_admin"]["deployed"].get("RO"): RO_sdn_id = db_sdn["_admin"]["deployed"]["RO"] else: @@ -118,12 +138,14 @@ class VimLcm(LcmBase): db_vim_update["_admin.deployed.RO-account"] = desc["uuid"] db_vim_update["_admin.operationalState"] = "ENABLED" db_vim_update["_admin.detailed-status"] = "Done" + # Mark the VIM 'create' HA task as successful + operation_state = 'COMPLETED' + operation_details = 'Done' - # await asyncio.sleep(15) # TODO remove. This is for test self.logger.debug(logging_text + "Exit Ok VIM account created at RO_vim_account_id={}".format(desc["uuid"])) return - except (ROclient.ROClientException, DbException) as e: + except (ROclient.ROClientException, DbException, asyncio.CancelledError) as e: self.logger.error(logging_text + "Exit Exception {}".format(e)) exc = e except Exception as e: @@ -133,19 +155,35 @@ class VimLcm(LcmBase): if exc and db_vim: db_vim_update["_admin.operationalState"] = "ERROR" db_vim_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + # Mark the VIM 'create' HA task as erroneous + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) try: if db_vim_update: self.update_db_2("vim_accounts", vim_id, db_vim_update) + # Register the VIM 'create' HA task either + # succesful or erroneous, or do nothing (if legacy NBI) + self.lcm_tasks.unlock_HA('vim', 'create', op_id, + operationState=operation_state, + detailed_status=operation_details) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("vim_account", vim_id, order_id) async def edit(self, vim_content, order_id): + + # HA tasks and backward compatibility: + # If 'vim_content' does not include 'op_id', we a running a legacy NBI version. + # In such a case, HA is not supported by NBI, and the HA check always returns True + op_id = vim_content.pop('op_id', None) + if not self.lcm_tasks.lock_HA('vim', 'edit', op_id): + return + vim_id = vim_content["_id"] - vim_content.pop("op_id", None) logging_text = "Task vim_edit={} ".format(vim_id) self.logger.debug(logging_text + "Enter") + db_vim = None exc = None RO_sdn_id = None @@ -153,32 +191,19 @@ class VimLcm(LcmBase): db_vim_update = {} step = "Getting vim-id='{}' from db".format(vim_id) try: - db_vim = self.db.get_one("vim_accounts", {"_id": vim_id}) + # wait for any previous tasks in process + await self.lcm_tasks.waitfor_related_HA('vim', 'edit', op_id) - # look if previous tasks in process - task_name, task_dependency = self.lcm_tasks.lookfor_related("vim_account", vim_id, order_id) - if task_dependency: - step = "Waiting for related tasks to be completed: {}".format(task_name) - self.logger.debug(logging_text + step) - # TODO write this to database - _, pending = await asyncio.wait(task_dependency, timeout=3600) - if pending: - raise LcmException("Timeout waiting related tasks to be completed") + db_vim = self.db.get_one("vim_accounts", {"_id": vim_id}) if db_vim.get("_admin") and db_vim["_admin"].get("deployed") and db_vim["_admin"]["deployed"].get("RO"): if vim_content.get("config") and vim_content["config"].get("sdn-controller"): step = "Getting sdn-controller-id='{}' from db".format(vim_content["config"]["sdn-controller"]) db_sdn = self.db.get_one("sdns", {"_id": vim_content["config"]["sdn-controller"]}) - # look if previous tasks in process - task_name, task_dependency = self.lcm_tasks.lookfor_related("sdn", db_sdn["_id"]) - if task_dependency: - step = "Waiting for related tasks to be completed: {}".format(task_name) - self.logger.debug(logging_text + step) - # TODO write this to database - _, pending = await asyncio.wait(task_dependency, timeout=3600) - if pending: - raise LcmException("Timeout waiting related tasks to be completed") + # If the VIM account has an associated SDN account, also + # wait for any previous tasks in process for the SDN + await self.lcm_tasks.waitfor_related_HA('sdn', 'ANY', db_sdn["_id"]) if db_sdn.get("_admin") and db_sdn["_admin"].get("deployed") and db_sdn["_admin"]["deployed"].get( "RO"): @@ -202,7 +227,7 @@ class VimLcm(LcmBase): vim_RO.pop("vim_password", None) if RO_sdn_id: vim_RO["config"]["sdn-controller"] = RO_sdn_id - # TODO make a deep update of sdn-port-mapping + # TODO make a deep update of sdn-port-mapping if vim_RO: await RO.edit("vim", RO_vim_id, descriptor=vim_RO) @@ -240,11 +265,14 @@ class VimLcm(LcmBase): # vim_thread. RO will remove and relaunch a new thread for this vim_account await RO.edit("vim_account", RO_vim_id, descriptor=vim_account_RO) db_vim_update["_admin.operationalState"] = "ENABLED" + # Mark the VIM 'edit' HA task as successful + operation_state = 'COMPLETED' + operation_details = 'Done' self.logger.debug(logging_text + "Exit Ok RO_vim_id={}".format(RO_vim_id)) return - except (ROclient.ROClientException, DbException) as e: + except (ROclient.ROClientException, DbException, asyncio.CancelledError) as e: self.logger.error(logging_text + "Exit Exception {}".format(e)) exc = e except Exception as e: @@ -254,52 +282,73 @@ class VimLcm(LcmBase): if exc and db_vim: db_vim_update["_admin.operationalState"] = "ERROR" db_vim_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + # Mark the VIM 'edit' HA task as erroneous + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) try: if db_vim_update: self.update_db_2("vim_accounts", vim_id, db_vim_update) + # Register the VIM 'edit' HA task either + # succesful or erroneous, or do nothing (if legacy NBI) + self.lcm_tasks.unlock_HA('vim', 'edit', op_id, + operationState=operation_state, + detailed_status=operation_details) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("vim_account", vim_id, order_id) - async def delete(self, vim_id, order_id): + async def delete(self, vim_content, order_id): + + # HA tasks and backward compatibility: + # If 'vim_content' does not include 'op_id', we a running a legacy NBI version. + # In such a case, HA is not supported by NBI, and the HA check always returns True + op_id = vim_content.pop('op_id', None) + if not self.lcm_tasks.lock_HA('vim', 'delete', op_id): + return + + vim_id = vim_content["_id"] logging_text = "Task vim_delete={} ".format(vim_id) self.logger.debug(logging_text + "Enter") + db_vim = None db_vim_update = {} exc = None step = "Getting vim from db" try: - db_vim = self.db.get_one("vim_accounts", {"_id": vim_id}) - if db_vim.get("_admin") and db_vim["_admin"].get("deployed") and db_vim["_admin"]["deployed"].get("RO"): - RO_vim_id = db_vim["_admin"]["deployed"]["RO"] - RO = ROclient.ROClient(self.loop, **self.ro_config) - step = "Detaching vim from RO tenant" - try: - await RO.detach("vim_account", RO_vim_id) - except ROclient.ROClientException as e: - if e.http_code == 404: # not found - self.logger.debug(logging_text + "RO_vim_id={} already detached".format(RO_vim_id)) - else: - raise - - step = "Deleting vim from RO" - try: - await RO.delete("vim", RO_vim_id) - except ROclient.ROClientException as e: - if e.http_code == 404: # not found - self.logger.debug(logging_text + "RO_vim_id={} already deleted".format(RO_vim_id)) - else: - raise - else: - # nothing to delete - self.logger.error(logging_text + "Nohing to remove at RO") + # wait for any previous tasks in process + await self.lcm_tasks.waitfor_related_HA('vim', 'delete', op_id) + if not self.ro_config.get("ng"): + db_vim = self.db.get_one("vim_accounts", {"_id": vim_id}) + if db_vim.get("_admin") and db_vim["_admin"].get("deployed") and db_vim["_admin"]["deployed"].get("RO"): + RO_vim_id = db_vim["_admin"]["deployed"]["RO"] + RO = ROclient.ROClient(self.loop, **self.ro_config) + step = "Detaching vim from RO tenant" + try: + await RO.detach("vim_account", RO_vim_id) + except ROclient.ROClientException as e: + if e.http_code == 404: # not found + self.logger.debug(logging_text + "RO_vim_id={} already detached".format(RO_vim_id)) + else: + raise + + step = "Deleting vim from RO" + try: + await RO.delete("vim", RO_vim_id) + except ROclient.ROClientException as e: + if e.http_code == 404: # not found + self.logger.debug(logging_text + "RO_vim_id={} already deleted".format(RO_vim_id)) + else: + raise + else: + # nothing to delete + self.logger.debug(logging_text + "Nothing to remove at RO") self.db.del_one("vim_accounts", {"_id": vim_id}) db_vim = None self.logger.debug(logging_text + "Exit Ok") return - except (ROclient.ROClientException, DbException) as e: + except (ROclient.ROClientException, DbException, asyncio.CancelledError) as e: self.logger.error(logging_text + "Exit Exception {}".format(e)) exc = e except Exception as e: @@ -310,9 +359,17 @@ class VimLcm(LcmBase): if exc and db_vim: db_vim_update["_admin.operationalState"] = "ERROR" db_vim_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + # Mark the VIM 'delete' HA task as erroneous + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) + self.lcm_tasks.unlock_HA('vim', 'delete', op_id, + operationState=operation_state, + detailed_status=operation_details) try: if db_vim and db_vim_update: self.update_db_2("vim_accounts", vim_id, db_vim_update) + # If the VIM 'delete' HA task was succesful, the DB entry has been deleted, + # which means that there is nowhere to register this task, so do nothing here. except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("vim_account", vim_id, order_id) @@ -322,7 +379,7 @@ class WimLcm(LcmBase): # values that are encrypted at wim config because they are passwords wim_config_encrypted = () - def __init__(self, db, msg, fs, lcm_tasks, ro_config, loop): + def __init__(self, db, msg, fs, lcm_tasks, config, loop): """ Init, Connect to database, filesystem storage, and messaging :param config: two level dictionary with configuration. Top level should contain 'database', 'storage', @@ -332,15 +389,23 @@ class WimLcm(LcmBase): self.logger = logging.getLogger('lcm.vim') self.loop = loop self.lcm_tasks = lcm_tasks - self.ro_config = ro_config + self.ro_config = config["ro_config"] super().__init__(db, msg, fs, self.logger) async def create(self, wim_content, order_id): + + # HA tasks and backward compatibility: + # If 'wim_content' does not include 'op_id', we a running a legacy NBI version. + # In such a case, HA is not supported by NBI, 'op_id' is None, and lock_HA() will do nothing. + # Register 'create' task here for related future HA operations + op_id = wim_content.pop('op_id', None) + self.lcm_tasks.lock_HA('wim', 'create', op_id) + wim_id = wim_content["_id"] - wim_content.pop("op_id", None) logging_text = "Task wim_create={} ".format(wim_id) self.logger.debug(logging_text + "Enter") + db_wim = None db_wim_update = {} exc = None @@ -393,11 +458,14 @@ class WimLcm(LcmBase): db_wim_update["_admin.deployed.RO-account"] = desc["uuid"] db_wim_update["_admin.operationalState"] = "ENABLED" db_wim_update["_admin.detailed-status"] = "Done" + # Mark the WIM 'create' HA task as successful + operation_state = 'COMPLETED' + operation_details = 'Done' self.logger.debug(logging_text + "Exit Ok WIM account created at RO_wim_account_id={}".format(desc["uuid"])) return - except (ROclient.ROClientException, DbException) as e: + except (ROclient.ROClientException, DbException, asyncio.CancelledError) as e: self.logger.error(logging_text + "Exit Exception {}".format(e)) exc = e except Exception as e: @@ -407,35 +475,44 @@ class WimLcm(LcmBase): if exc and db_wim: db_wim_update["_admin.operationalState"] = "ERROR" db_wim_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + # Mark the WIM 'create' HA task as erroneous + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) try: if db_wim_update: self.update_db_2("wim_accounts", wim_id, db_wim_update) + # Register the WIM 'create' HA task either + # succesful or erroneous, or do nothing (if legacy NBI) + self.lcm_tasks.unlock_HA('wim', 'create', op_id, + operationState=operation_state, + detailed_status=operation_details) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("wim_account", wim_id, order_id) async def edit(self, wim_content, order_id): + + # HA tasks and backward compatibility: + # If 'wim_content' does not include 'op_id', we a running a legacy NBI version. + # In such a case, HA is not supported by NBI, and the HA check always returns True + op_id = wim_content.pop('op_id', None) + if not self.lcm_tasks.lock_HA('wim', 'edit', op_id): + return + wim_id = wim_content["_id"] - wim_content.pop("op_id", None) logging_text = "Task wim_edit={} ".format(wim_id) self.logger.debug(logging_text + "Enter") + db_wim = None exc = None RO_wim_id = None db_wim_update = {} step = "Getting wim-id='{}' from db".format(wim_id) try: - db_wim = self.db.get_one("wim_accounts", {"_id": wim_id}) + # wait for any previous tasks in process + await self.lcm_tasks.waitfor_related_HA('wim', 'edit', op_id) - # look if previous tasks in process - task_name, task_dependency = self.lcm_tasks.lookfor_related("wim_account", wim_id, order_id) - if task_dependency: - step = "Waiting for related tasks to be completed: {}".format(task_name) - self.logger.debug(logging_text + step) - # TODO write this to database - _, pending = await asyncio.wait(task_dependency, timeout=3600) - if pending: - raise LcmException("Timeout waiting related tasks to be completed") + db_wim = self.db.get_one("wim_accounts", {"_id": wim_id}) if db_wim.get("_admin") and db_wim["_admin"].get("deployed") and db_wim["_admin"]["deployed"].get("RO"): @@ -486,11 +563,14 @@ class WimLcm(LcmBase): # wim_thread. RO will remove and relaunch a new thread for this wim_account await RO.edit("wim_account", RO_wim_id, descriptor=wim_account_RO) db_wim_update["_admin.operationalState"] = "ENABLED" + # Mark the WIM 'edit' HA task as successful + operation_state = 'COMPLETED' + operation_details = 'Done' self.logger.debug(logging_text + "Exit Ok RO_wim_id={}".format(RO_wim_id)) return - except (ROclient.ROClientException, DbException) as e: + except (ROclient.ROClientException, DbException, asyncio.CancelledError) as e: self.logger.error(logging_text + "Exit Exception {}".format(e)) exc = e except Exception as e: @@ -500,21 +580,42 @@ class WimLcm(LcmBase): if exc and db_wim: db_wim_update["_admin.operationalState"] = "ERROR" db_wim_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + # Mark the WIM 'edit' HA task as erroneous + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) try: if db_wim_update: self.update_db_2("wim_accounts", wim_id, db_wim_update) + # Register the WIM 'edit' HA task either + # succesful or erroneous, or do nothing (if legacy NBI) + self.lcm_tasks.unlock_HA('wim', 'edit', op_id, + operationState=operation_state, + detailed_status=operation_details) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("wim_account", wim_id, order_id) - async def delete(self, wim_id, order_id): + async def delete(self, wim_content, order_id): + + # HA tasks and backward compatibility: + # If 'vim_content' does not include 'op_id', we a running a legacy NBI version. + # In such a case, HA is not supported by NBI, and the HA check always returns True + op_id = wim_content.pop('op_id', None) + if not self.lcm_tasks.lock_HA('wim', 'delete', op_id): + return + + wim_id = wim_content["_id"] logging_text = "Task wim_delete={} ".format(wim_id) self.logger.debug(logging_text + "Enter") + db_wim = None db_wim_update = {} exc = None step = "Getting wim from db" try: + # wait for any previous tasks in process + await self.lcm_tasks.waitfor_related_HA('wim', 'delete', op_id) + db_wim = self.db.get_one("wim_accounts", {"_id": wim_id}) if db_wim.get("_admin") and db_wim["_admin"].get("deployed") and db_wim["_admin"]["deployed"].get("RO"): RO_wim_id = db_wim["_admin"]["deployed"]["RO"] @@ -544,7 +645,7 @@ class WimLcm(LcmBase): self.logger.debug(logging_text + "Exit Ok") return - except (ROclient.ROClientException, DbException) as e: + except (ROclient.ROClientException, DbException, asyncio.CancelledError) as e: self.logger.error(logging_text + "Exit Exception {}".format(e)) exc = e except Exception as e: @@ -555,9 +656,17 @@ class WimLcm(LcmBase): if exc and db_wim: db_wim_update["_admin.operationalState"] = "ERROR" db_wim_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + # Mark the WIM 'delete' HA task as erroneous + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) + self.lcm_tasks.unlock_HA('wim', 'delete', op_id, + operationState=operation_state, + detailed_status=operation_details) try: if db_wim and db_wim_update: self.update_db_2("wim_accounts", wim_id, db_wim_update) + # If the WIM 'delete' HA task was succesful, the DB entry has been deleted, + # which means that there is nowhere to register this task, so do nothing here. except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("wim_account", wim_id, order_id) @@ -565,7 +674,7 @@ class WimLcm(LcmBase): class SdnLcm(LcmBase): - def __init__(self, db, msg, fs, lcm_tasks, ro_config, loop): + def __init__(self, db, msg, fs, lcm_tasks, config, loop): """ Init, Connect to database, filesystem storage, and messaging :param config: two level dictionary with configuration. Top level should contain 'database', 'storage', @@ -575,15 +684,23 @@ class SdnLcm(LcmBase): self.logger = logging.getLogger('lcm.sdn') self.loop = loop self.lcm_tasks = lcm_tasks - self.ro_config = ro_config + self.ro_config = config["ro_config"] super().__init__(db, msg, fs, self.logger) async def create(self, sdn_content, order_id): + + # HA tasks and backward compatibility: + # If 'sdn_content' does not include 'op_id', we a running a legacy NBI version. + # In such a case, HA is not supported by NBI, 'op_id' is None, and lock_HA() will do nothing. + # Register 'create' task here for related future HA operations + op_id = sdn_content.pop('op_id', None) + self.lcm_tasks.lock_HA('sdn', 'create', op_id) + sdn_id = sdn_content["_id"] - sdn_content.pop("op_id", None) logging_text = "Task sdn_create={} ".format(sdn_id) self.logger.debug(logging_text + "Enter") + db_sdn = None db_sdn_update = {} RO_sdn_id = None @@ -612,9 +729,12 @@ class SdnLcm(LcmBase): db_sdn_update["_admin.deployed.RO"] = RO_sdn_id db_sdn_update["_admin.operationalState"] = "ENABLED" self.logger.debug(logging_text + "Exit Ok RO_sdn_id={}".format(RO_sdn_id)) + # Mark the SDN 'create' HA task as successful + operation_state = 'COMPLETED' + operation_details = 'Done' return - except (ROclient.ROClientException, DbException) as e: + except (ROclient.ROClientException, DbException, asyncio.CancelledError) as e: self.logger.error(logging_text + "Exit Exception {}".format(e)) exc = e except Exception as e: @@ -624,23 +744,42 @@ class SdnLcm(LcmBase): if exc and db_sdn: db_sdn_update["_admin.operationalState"] = "ERROR" db_sdn_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + # Mark the SDN 'create' HA task as erroneous + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) try: if db_sdn and db_sdn_update: self.update_db_2("sdns", sdn_id, db_sdn_update) + # Register the SDN 'create' HA task either + # succesful or erroneous, or do nothing (if legacy NBI) + self.lcm_tasks.unlock_HA('sdn', 'create', op_id, + operationState=operation_state, + detailed_status=operation_details) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("sdn", sdn_id, order_id) async def edit(self, sdn_content, order_id): + + # HA tasks and backward compatibility: + # If 'sdn_content' does not include 'op_id', we a running a legacy NBI version. + # In such a case, HA is not supported by NBI, and the HA check always returns True + op_id = sdn_content.pop('op_id', None) + if not self.lcm_tasks.lock_HA('sdn', 'edit', op_id): + return + sdn_id = sdn_content["_id"] - sdn_content.pop("op_id", None) logging_text = "Task sdn_edit={} ".format(sdn_id) self.logger.debug(logging_text + "Enter") + db_sdn = None db_sdn_update = {} exc = None step = "Getting sdn from db" try: + # wait for any previous tasks in process + await self.lcm_tasks.waitfor_related_HA('sdn', 'edit', op_id) + db_sdn = self.db.get_one("sdns", {"_id": sdn_id}) RO_sdn_id = None if db_sdn.get("_admin") and db_sdn["_admin"].get("deployed") and db_sdn["_admin"]["deployed"].get("RO"): @@ -658,11 +797,14 @@ class SdnLcm(LcmBase): if sdn_RO: await RO.edit("sdn", RO_sdn_id, descriptor=sdn_RO) db_sdn_update["_admin.operationalState"] = "ENABLED" + # Mark the SDN 'edit' HA task as successful + operation_state = 'COMPLETED' + operation_details = 'Done' self.logger.debug(logging_text + "Exit Ok RO_sdn_id={}".format(RO_sdn_id)) return - except (ROclient.ROClientException, DbException) as e: + except (ROclient.ROClientException, DbException, asyncio.CancelledError) as e: self.logger.error(logging_text + "Exit Exception {}".format(e)) exc = e except Exception as e: @@ -672,21 +814,42 @@ class SdnLcm(LcmBase): if exc and db_sdn: db_sdn["_admin.operationalState"] = "ERROR" db_sdn["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + # Mark the SDN 'edit' HA task as erroneous + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) try: if db_sdn_update: self.update_db_2("sdns", sdn_id, db_sdn_update) + # Register the SDN 'edit' HA task either + # succesful or erroneous, or do nothing (if legacy NBI) + self.lcm_tasks.unlock_HA('sdn', 'edit', op_id, + operationState=operation_state, + detailed_status=operation_details) except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("sdn", sdn_id, order_id) - async def delete(self, sdn_id, order_id): + async def delete(self, sdn_content, order_id): + + # HA tasks and backward compatibility: + # If 'vim_content' does not include 'op_id', we a running a legacy NBI version. + # In such a case, HA is not supported by NBI, and the HA check always returns True + op_id = sdn_content.pop('op_id', None) + if not self.lcm_tasks.lock_HA('sdn', 'delete', op_id): + return + + sdn_id = sdn_content["_id"] logging_text = "Task sdn_delete={} ".format(sdn_id) self.logger.debug(logging_text + "Enter") + db_sdn = None db_sdn_update = {} exc = None step = "Getting sdn from db" try: + # wait for any previous tasks in process + await self.lcm_tasks.waitfor_related_HA('sdn', 'delete', op_id) + db_sdn = self.db.get_one("sdns", {"_id": sdn_id}) if db_sdn.get("_admin") and db_sdn["_admin"].get("deployed") and db_sdn["_admin"]["deployed"].get("RO"): RO_sdn_id = db_sdn["_admin"]["deployed"]["RO"] @@ -707,7 +870,7 @@ class SdnLcm(LcmBase): self.logger.debug("sdn_delete task sdn_id={} Exit Ok".format(sdn_id)) return - except (ROclient.ROClientException, DbException) as e: + except (ROclient.ROClientException, DbException, asyncio.CancelledError) as e: self.logger.error(logging_text + "Exit Exception {}".format(e)) exc = e except Exception as e: @@ -717,9 +880,393 @@ class SdnLcm(LcmBase): if exc and db_sdn: db_sdn["_admin.operationalState"] = "ERROR" db_sdn["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + # Mark the SDN 'delete' HA task as erroneous + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) + self.lcm_tasks.unlock_HA('sdn', 'delete', op_id, + operationState=operation_state, + detailed_status=operation_details) try: if db_sdn and db_sdn_update: self.update_db_2("sdns", sdn_id, db_sdn_update) + # If the SDN 'delete' HA task was succesful, the DB entry has been deleted, + # which means that there is nowhere to register this task, so do nothing here. except DbException as e: self.logger.error(logging_text + "Cannot update database: {}".format(e)) self.lcm_tasks.remove("sdn", sdn_id, order_id) + + +class K8sClusterLcm(LcmBase): + timeout_create = 300 + + def __init__(self, db, msg, fs, lcm_tasks, config, loop): + """ + Init, Connect to database, filesystem storage, and messaging + :param config: two level dictionary with configuration. Top level should contain 'database', 'storage', + :return: None + """ + + self.logger = logging.getLogger('lcm.k8scluster') + self.loop = loop + self.lcm_tasks = lcm_tasks + self.vca_config = config["VCA"] + self.fs = fs + self.db = db + + self.helm2_k8scluster = K8sHelmConnector( + kubectl_command=self.vca_config.get("kubectlpath"), + helm_command=self.vca_config.get("helmpath"), + fs=self.fs, + log=self.logger, + db=self.db, + on_update_db=None + ) + + self.helm3_k8scluster = K8sHelm3Connector( + kubectl_command=self.vca_config.get("kubectlpath"), + helm_command=self.vca_config.get("helm3path"), + fs=self.fs, + log=self.logger, + db=self.db, + on_update_db=None + ) + + self.juju_k8scluster = K8sJujuConnector( + kubectl_command=self.vca_config.get("kubectlpath"), + juju_command=self.vca_config.get("jujupath"), + fs=self.fs, + log=self.logger, + db=self.db, + loop=self.loop, + on_update_db=None, + vca_config=self.vca_config, + ) + self.k8s_map = { + "helm-chart": self.helm2_k8scluster, + "helm-chart-v3": self.helm3_k8scluster, + "juju-bundle": self.juju_k8scluster, + } + + super().__init__(db, msg, fs, self.logger) + + async def create(self, k8scluster_content, order_id): + + op_id = k8scluster_content.pop('op_id', None) + if not self.lcm_tasks.lock_HA('k8scluster', 'create', op_id): + return + + k8scluster_id = k8scluster_content["_id"] + logging_text = "Task k8scluster_create={} ".format(k8scluster_id) + self.logger.debug(logging_text + "Enter") + + db_k8scluster = None + db_k8scluster_update = {} + exc = None + try: + step = "Getting k8scluster-id='{}' from db".format(k8scluster_id) + self.logger.debug(logging_text + step) + db_k8scluster = self.db.get_one("k8sclusters", {"_id": k8scluster_id}) + self.db.encrypt_decrypt_fields(db_k8scluster.get("credentials"), 'decrypt', ['password', 'secret'], + schema_version=db_k8scluster["schema_version"], salt=db_k8scluster["_id"]) + k8s_credentials = yaml.safe_dump(db_k8scluster.get("credentials")) + pending_tasks = [] + task2name = {} + init_target = deep_get(db_k8scluster, ("_admin", "init")) + step = "Launching k8scluster init tasks" + for task_name in ("helm-chart", "juju-bundle", "helm-chart-v3"): + if init_target and task_name not in init_target: + continue + task = asyncio.ensure_future(self.k8s_map[task_name].init_env(k8s_credentials, + reuse_cluster_uuid=k8scluster_id)) + pending_tasks.append(task) + task2name[task] = task_name + + error_text_list = [] + tasks_name_ok = [] + reached_timeout = False + now = time() + + while pending_tasks: + _timeout = max(1, self.timeout_create - (time() - now)) # ensure not negative with max + step = "Waiting for k8scluster init tasks" + done, pending_tasks = await asyncio.wait(pending_tasks, timeout=_timeout, + return_when=asyncio.FIRST_COMPLETED) + if not done: + # timeout. Set timeout is reached and process pending as if they hase been finished + done = pending_tasks + pending_tasks = None + reached_timeout = True + for task in done: + task_name = task2name[task] + if reached_timeout: + exc = "Timeout" + elif task.cancelled(): + exc = "Cancelled" + else: + exc = task.exception() + + if exc: + error_text_list.append("Failing init {}: {}".format(task_name, exc)) + db_k8scluster_update["_admin.{}.error_msg".format(task_name)] = str(exc) + db_k8scluster_update["_admin.{}.id".format(task_name)] = None + db_k8scluster_update["_admin.{}.operationalState".format(task_name)] = "ERROR" + self.logger.error(logging_text + "{} init fail: {}".format(task_name, exc), + exc_info=not isinstance(exc, (N2VCException, str))) + else: + k8s_id, uninstall_sw = task.result() + tasks_name_ok.append(task_name) + self.logger.debug(logging_text + "{} init success. id={} created={}".format( + task_name, k8s_id, uninstall_sw)) + db_k8scluster_update["_admin.{}.error_msg".format(task_name)] = None + db_k8scluster_update["_admin.{}.id".format(task_name)] = k8s_id + db_k8scluster_update["_admin.{}.created".format(task_name)] = uninstall_sw + db_k8scluster_update["_admin.{}.operationalState".format(task_name)] = "ENABLED" + # update database + step = "Updating database for " + task_name + self.update_db_2("k8sclusters", k8scluster_id, db_k8scluster_update) + if tasks_name_ok: + operation_details = "ready for " + ", ".join(tasks_name_ok) + operation_state = "COMPLETED" + db_k8scluster_update["_admin.operationalState"] = "ENABLED" if not error_text_list else "DEGRADED" + operation_details += "; " + ";".join(error_text_list) + else: + db_k8scluster_update["_admin.operationalState"] = "ERROR" + operation_state = "FAILED" + operation_details = ";".join(error_text_list) + db_k8scluster_update["_admin.detailed-status"] = operation_details + self.logger.debug(logging_text + "Done. Result: " + operation_state) + exc = None + + except Exception as e: + if isinstance(e, (LcmException, DbException, K8sException, N2VCException, asyncio.CancelledError)): + self.logger.error(logging_text + "Exit Exception {}".format(e)) + else: + self.logger.critical(logging_text + "Exit Exception {}".format(e), exc_info=True) + exc = e + finally: + if exc and db_k8scluster: + db_k8scluster_update["_admin.operationalState"] = "ERROR" + db_k8scluster_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) + try: + if db_k8scluster and db_k8scluster_update: + self.update_db_2("k8sclusters", k8scluster_id, db_k8scluster_update) + + # Register the operation and unlock + self.lcm_tasks.unlock_HA('k8scluster', 'create', op_id, + operationState=operation_state, + detailed_status=operation_details) + except DbException as e: + self.logger.error(logging_text + "Cannot update database: {}".format(e)) + self.lcm_tasks.remove("k8scluster", k8scluster_id, order_id) + + async def delete(self, k8scluster_content, order_id): + + # HA tasks and backward compatibility: + # If 'vim_content' does not include 'op_id', we a running a legacy NBI version. + # In such a case, HA is not supported by NBI, 'op_id' is None, and lock_HA() will do nothing. + # Register 'delete' task here for related future HA operations + op_id = k8scluster_content.pop('op_id', None) + if not self.lcm_tasks.lock_HA('k8scluster', 'delete', op_id): + return + + k8scluster_id = k8scluster_content["_id"] + logging_text = "Task k8scluster_delete={} ".format(k8scluster_id) + self.logger.debug(logging_text + "Enter") + + db_k8scluster = None + db_k8scluster_update = {} + exc = None + try: + step = "Getting k8scluster='{}' from db".format(k8scluster_id) + self.logger.debug(logging_text + step) + db_k8scluster = self.db.get_one("k8sclusters", {"_id": k8scluster_id}) + k8s_hc_id = deep_get(db_k8scluster, ("_admin", "helm-chart", "id")) + k8s_h3c_id = deep_get(db_k8scluster, ("_admin", "helm-chart-v3", "id")) + k8s_jb_id = deep_get(db_k8scluster, ("_admin", "juju-bundle", "id")) + + cluster_removed = True + if k8s_jb_id: # delete in reverse order of creation + step = "Removing juju-bundle '{}'".format(k8s_jb_id) + uninstall_sw = deep_get(db_k8scluster, ("_admin", "juju-bundle", "created")) or False + cluster_removed = await self.juju_k8scluster.reset(cluster_uuid=k8s_jb_id, uninstall_sw=uninstall_sw) + db_k8scluster_update["_admin.juju-bundle.id"] = None + db_k8scluster_update["_admin.juju-bundle.operationalState"] = "DISABLED" + + if k8s_hc_id: + step = "Removing helm-chart '{}'".format(k8s_hc_id) + uninstall_sw = deep_get(db_k8scluster, ("_admin", "helm-chart", "created")) or False + cluster_removed = await self.helm2_k8scluster.reset(cluster_uuid=k8s_hc_id, uninstall_sw=uninstall_sw) + db_k8scluster_update["_admin.helm-chart.id"] = None + db_k8scluster_update["_admin.helm-chart.operationalState"] = "DISABLED" + + if k8s_h3c_id: + step = "Removing helm-chart-v3 '{}'".format(k8s_hc_id) + uninstall_sw = deep_get(db_k8scluster, ("_admin", "helm-chart-v3", "created")) or False + cluster_removed = await self.helm3_k8scluster.reset(cluster_uuid=k8s_hc_id, uninstall_sw=uninstall_sw) + db_k8scluster_update["_admin.helm-chart-v3.id"] = None + db_k8scluster_update["_admin.helm-chart-v3.operationalState"] = "DISABLED" + + # Try to remove from cluster_inserted to clean old versions + if k8s_hc_id and cluster_removed: + step = "Removing k8scluster='{}' from k8srepos".format(k8scluster_id) + self.logger.debug(logging_text + step) + db_k8srepo_list = self.db.get_list("k8srepos", {"_admin.cluster-inserted": k8s_hc_id}) + for k8srepo in db_k8srepo_list: + try: + cluster_list = k8srepo["_admin"]["cluster-inserted"] + cluster_list.remove(k8s_hc_id) + self.update_db_2("k8srepos", k8srepo["_id"], {"_admin.cluster-inserted": cluster_list}) + except Exception as e: + self.logger.error("{}: {}".format(step, e)) + self.db.del_one("k8sclusters", {"_id": k8scluster_id}) + db_k8scluster_update = None + self.logger.debug(logging_text + "Done") + + except Exception as e: + if isinstance(e, (LcmException, DbException, K8sException, N2VCException, asyncio.CancelledError)): + self.logger.error(logging_text + "Exit Exception {}".format(e)) + else: + self.logger.critical(logging_text + "Exit Exception {}".format(e), exc_info=True) + exc = e + finally: + if exc and db_k8scluster: + db_k8scluster_update["_admin.operationalState"] = "ERROR" + db_k8scluster_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + # Mark the WIM 'create' HA task as erroneous + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) + else: + operation_state = 'COMPLETED' + operation_details = "deleted" + + try: + if db_k8scluster_update: + self.update_db_2("k8sclusters", k8scluster_id, db_k8scluster_update) + # Register the K8scluster 'delete' HA task either + # succesful or erroneous, or do nothing (if legacy NBI) + self.lcm_tasks.unlock_HA('k8scluster', 'delete', op_id, + operationState=operation_state, + detailed_status=operation_details) + except DbException as e: + self.logger.error(logging_text + "Cannot update database: {}".format(e)) + self.lcm_tasks.remove("k8scluster", k8scluster_id, order_id) + + +class K8sRepoLcm(LcmBase): + + def __init__(self, db, msg, fs, lcm_tasks, config, loop): + """ + Init, Connect to database, filesystem storage, and messaging + :param config: two level dictionary with configuration. Top level should contain 'database', 'storage', + :return: None + """ + + self.logger = logging.getLogger('lcm.k8srepo') + self.loop = loop + self.lcm_tasks = lcm_tasks + self.vca_config = config["VCA"] + self.fs = fs + self.db = db + + super().__init__(db, msg, fs, self.logger) + + async def create(self, k8srepo_content, order_id): + + # HA tasks and backward compatibility: + # If 'vim_content' does not include 'op_id', we a running a legacy NBI version. + # In such a case, HA is not supported by NBI, 'op_id' is None, and lock_HA() will do nothing. + # Register 'create' task here for related future HA operations + + op_id = k8srepo_content.pop('op_id', None) + if not self.lcm_tasks.lock_HA('k8srepo', 'create', op_id): + return + + k8srepo_id = k8srepo_content.get("_id") + logging_text = "Task k8srepo_create={} ".format(k8srepo_id) + self.logger.debug(logging_text + "Enter") + + db_k8srepo = None + db_k8srepo_update = {} + exc = None + operation_state = 'COMPLETED' + operation_details = '' + try: + step = "Getting k8srepo-id='{}' from db".format(k8srepo_id) + self.logger.debug(logging_text + step) + db_k8srepo = self.db.get_one("k8srepos", {"_id": k8srepo_id}) + db_k8srepo_update["_admin.operationalState"] = "ENABLED" + except Exception as e: + self.logger.error(logging_text + "Exit Exception {}".format(e), + exc_info=not isinstance(e, (LcmException, DbException, K8sException, N2VCException, + asyncio.CancelledError))) + exc = e + finally: + if exc and db_k8srepo: + db_k8srepo_update["_admin.operationalState"] = "ERROR" + db_k8srepo_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + # Mark the WIM 'create' HA task as erroneous + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) + try: + if db_k8srepo_update: + self.update_db_2("k8srepos", k8srepo_id, db_k8srepo_update) + # Register the K8srepo 'create' HA task either + # succesful or erroneous, or do nothing (if legacy NBI) + self.lcm_tasks.unlock_HA('k8srepo', 'create', op_id, + operationState=operation_state, + detailed_status=operation_details) + except DbException as e: + self.logger.error(logging_text + "Cannot update database: {}".format(e)) + self.lcm_tasks.remove("k8srepo", k8srepo_id, order_id) + + async def delete(self, k8srepo_content, order_id): + + # HA tasks and backward compatibility: + # If 'vim_content' does not include 'op_id', we a running a legacy NBI version. + # In such a case, HA is not supported by NBI, 'op_id' is None, and lock_HA() will do nothing. + # Register 'delete' task here for related future HA operations + op_id = k8srepo_content.pop('op_id', None) + if not self.lcm_tasks.lock_HA('k8srepo', 'delete', op_id): + return + + k8srepo_id = k8srepo_content.get("_id") + logging_text = "Task k8srepo_delete={} ".format(k8srepo_id) + self.logger.debug(logging_text + "Enter") + + db_k8srepo = None + db_k8srepo_update = {} + + exc = None + operation_state = 'COMPLETED' + operation_details = '' + try: + step = "Getting k8srepo-id='{}' from db".format(k8srepo_id) + self.logger.debug(logging_text + step) + db_k8srepo = self.db.get_one("k8srepos", {"_id": k8srepo_id}) + + except Exception as e: + self.logger.error(logging_text + "Exit Exception {}".format(e), + exc_info=not isinstance(e, (LcmException, DbException, K8sException, N2VCException, + asyncio.CancelledError))) + exc = e + finally: + if exc and db_k8srepo: + db_k8srepo_update["_admin.operationalState"] = "ERROR" + db_k8srepo_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc) + # Mark the WIM 'create' HA task as erroneous + operation_state = 'FAILED' + operation_details = "ERROR {}: {}".format(step, exc) + try: + if db_k8srepo_update: + self.update_db_2("k8srepos", k8srepo_id, db_k8srepo_update) + # Register the K8srepo 'delete' HA task either + # succesful or erroneous, or do nothing (if legacy NBI) + self.lcm_tasks.unlock_HA('k8srepo', 'delete', op_id, + operationState=operation_state, + detailed_status=operation_details) + self.db.del_one("k8srepos", {"_id": k8srepo_id}) + except DbException as e: + self.logger.error(logging_text + "Cannot update database: {}".format(e)) + self.lcm_tasks.remove("k8srepo", k8srepo_id, order_id)