Bug 1152, error if system cluster not properly initialized
[osm/LCM.git] / osm_lcm / vim_sdn.py
index 4d8d83b..6a3deb5 100644 (file)
 # under the License.
 ##
 
-import asyncio
 import yaml
 import logging
 import logging.handlers
 from osm_lcm import ROclient
-from osm_lcm.lcm_utils import LcmException, LcmBase
+from osm_lcm.lcm_utils import LcmException, LcmBase, deep_get
 from n2vc.k8s_helm_conn import K8sHelmConnector
+from n2vc.k8s_juju_conn import K8sJujuConnector
+from n2vc.exceptions import K8sException, N2VCException
 from osm_common.dbbase import DbException
 from copy import deepcopy
 
@@ -34,7 +35,7 @@ class VimLcm(LcmBase):
     vim_config_encrypted = {"1.1": ("admin_password", "nsx_password", "vcenter_password"),
                             "default": ("admin_password", "nsx_password", "vcenter_password", "vrops_password")}
 
-    def __init__(self, db, msg, fs, lcm_tasks, ro_config, loop):
+    def __init__(self, db, msg, fs, lcm_tasks, config, loop):
         """
         Init, Connect to database, filesystem storage, and messaging
         :param config: two level dictionary with configuration. Top level should contain 'database', 'storage',
@@ -44,7 +45,7 @@ class VimLcm(LcmBase):
         self.logger = logging.getLogger('lcm.vim')
         self.loop = loop
         self.lcm_tasks = lcm_tasks
-        self.ro_config = ro_config
+        self.ro_config = config["ro_config"]
 
         super().__init__(db, msg, fs, self.logger)
 
@@ -59,7 +60,6 @@ class VimLcm(LcmBase):
             return
 
         vim_id = vim_content["_id"]
-        vim_content.pop("op_id", None)
         logging_text = "Task vim_create={} ".format(vim_id)
         self.logger.debug(logging_text + "Enter")
 
@@ -181,7 +181,6 @@ class VimLcm(LcmBase):
             return
 
         vim_id = vim_content["_id"]
-        vim_content.pop("op_id", None)
         logging_text = "Task vim_edit={} ".format(vim_id)
         self.logger.debug(logging_text + "Enter")
 
@@ -384,7 +383,7 @@ class WimLcm(LcmBase):
     # values that are encrypted at wim config because they are passwords
     wim_config_encrypted = ()
 
-    def __init__(self, db, msg, fs, lcm_tasks, ro_config, loop):
+    def __init__(self, db, msg, fs, lcm_tasks, config, loop):
         """
         Init, Connect to database, filesystem storage, and messaging
         :param config: two level dictionary with configuration. Top level should contain 'database', 'storage',
@@ -394,7 +393,7 @@ class WimLcm(LcmBase):
         self.logger = logging.getLogger('lcm.vim')
         self.loop = loop
         self.lcm_tasks = lcm_tasks
-        self.ro_config = ro_config
+        self.ro_config = config["ro_config"]
 
         super().__init__(db, msg, fs, self.logger)
 
@@ -408,7 +407,6 @@ class WimLcm(LcmBase):
         self.lcm_tasks.lock_HA('wim', 'create', op_id)
 
         wim_id = wim_content["_id"]
-        wim_content.pop("op_id", None)
         logging_text = "Task wim_create={} ".format(wim_id)
         self.logger.debug(logging_text + "Enter")
 
@@ -508,7 +506,6 @@ class WimLcm(LcmBase):
             return
 
         wim_id = wim_content["_id"]
-        wim_content.pop("op_id", None)
         logging_text = "Task wim_edit={} ".format(wim_id)
         self.logger.debug(logging_text + "Enter")
 
@@ -687,7 +684,7 @@ class WimLcm(LcmBase):
 
 class SdnLcm(LcmBase):
 
-    def __init__(self, db, msg, fs, lcm_tasks, ro_config, loop):
+    def __init__(self, db, msg, fs, lcm_tasks, config, loop):
         """
         Init, Connect to database, filesystem storage, and messaging
         :param config: two level dictionary with configuration. Top level should contain 'database', 'storage',
@@ -697,7 +694,7 @@ class SdnLcm(LcmBase):
         self.logger = logging.getLogger('lcm.sdn')
         self.loop = loop
         self.lcm_tasks = lcm_tasks
-        self.ro_config = ro_config
+        self.ro_config = config["ro_config"]
 
         super().__init__(db, msg, fs, self.logger)
 
@@ -711,7 +708,6 @@ class SdnLcm(LcmBase):
         self.lcm_tasks.lock_HA('sdn', 'create', op_id)
 
         sdn_id = sdn_content["_id"]
-        sdn_content.pop("op_id", None)
         logging_text = "Task sdn_create={} ".format(sdn_id)
         self.logger.debug(logging_text + "Enter")
 
@@ -785,7 +781,6 @@ class SdnLcm(LcmBase):
             return
 
         sdn_id = sdn_content["_id"]
-        sdn_content.pop("op_id", None)
         logging_text = "Task sdn_edit={} ".format(sdn_id)
         self.logger.debug(logging_text + "Enter")
 
@@ -919,7 +914,7 @@ class SdnLcm(LcmBase):
 
 class K8sClusterLcm(LcmBase):
 
-    def __init__(self, db, msg, fs, lcm_tasks, vca_config, loop):
+    def __init__(self, db, msg, fs, lcm_tasks, config, loop):
         """
         Init, Connect to database, filesystem storage, and messaging
         :param config: two level dictionary with configuration. Top level should contain 'database', 'storage',
@@ -929,11 +924,11 @@ class K8sClusterLcm(LcmBase):
         self.logger = logging.getLogger('lcm.k8scluster')
         self.loop = loop
         self.lcm_tasks = lcm_tasks
-        self.vca_config = vca_config
+        self.vca_config = config["VCA"]
         self.fs = fs
         self.db = db
 
-        self.k8scluster = K8sHelmConnector(
+        self.helm_k8scluster = K8sHelmConnector(
             kubectl_command=self.vca_config.get("kubectlpath"),
             helm_command=self.vca_config.get("helmpath"),
             fs=self.fs,
@@ -942,6 +937,15 @@ class K8sClusterLcm(LcmBase):
             on_update_db=None
         )
 
+        self.juju_k8scluster = K8sJujuConnector(
+            kubectl_command=self.vca_config.get("kubectlpath"),
+            juju_command=self.vca_config.get("jujupath"),
+            fs=self.fs,
+            log=self.logger,
+            db=self.db,
+            on_update_db=None
+        )
+
         super().__init__(db, msg, fs, self.logger)
 
     async def create(self, k8scluster_content, order_id):
@@ -955,7 +959,6 @@ class K8sClusterLcm(LcmBase):
             return
 
         k8scluster_id = k8scluster_content["_id"]
-        k8scluster_content.pop("op_id", None)
         logging_text = "Task k8scluster_create={} ".format(k8scluster_id)
         self.logger.debug(logging_text + "Enter")
 
@@ -971,41 +974,48 @@ class K8sClusterLcm(LcmBase):
             db_k8scluster = self.db.get_one("k8sclusters", {"_id": k8scluster_id})
             self.db.encrypt_decrypt_fields(db_k8scluster.get("credentials"), 'decrypt', ['password', 'secret'],
                                            schema_version=db_k8scluster["schema_version"], salt=db_k8scluster["_id"])
-            print(db_k8scluster.get("credentials"))
-            print("\n\n\n    FIN CREDENTIALS")
-            print(yaml.safe_dump(db_k8scluster.get("credentials")))
-            print("\n\n\n    FIN OUTPUT")
-            cluster_uuid, uninstall_sw = await self.k8scluster.init_env(yaml.safe_dump(db_k8scluster.
-                                                                                       get("credentials")))
-            db_k8scluster_update["cluster-uuid"] = cluster_uuid
-            if uninstall_sw:
-                db_k8scluster_update["uninstall-sw"] = uninstall_sw
-            step = "Getting the list of repos"
-            self.logger.debug(logging_text + step)
-            task_list = []
-            db_k8srepo_list = self.db.get_list("k8srepos", {})
-            for repo in db_k8srepo_list:
-                step = "Adding repo {} to cluster: {}".format(repo["name"], cluster_uuid)
-                self.logger.debug(logging_text + step)
-                task = asyncio.ensure_future(self.k8scluster.repo_add(cluster_uuid=cluster_uuid,
-                                                                      name=repo["name"], url=repo["url"],
-                                                                      repo_type="chart"))
-                task_list.append(task)
-                if not repo["_admin"].get("cluster-inserted"):
-                    repo["_admin"]["cluster-inserted"] = []
-                repo["_admin"]["cluster-inserted"].append(cluster_uuid)
-                self.update_db_2("k8srepos", repo["_id"], repo)
-
-            done = None
-            pending = None
-            if len(task_list) > 0:
-                self.logger.debug('Waiting for terminate pending tasks...')
-                done, pending = await asyncio.wait(task_list, timeout=3600)
-                if not pending:
-                    self.logger.debug('All tasks finished...')
-                else:
-                    self.logger.info('There are pending tasks: {}'.format(pending))
-            db_k8scluster_update["_admin.operationalState"] = "ENABLED"
+            k8s_credentials = yaml.safe_dump(db_k8scluster.get("credentials"))
+            error_text_list = []
+            init_target = deep_get(db_k8scluster, ("_admin", "init"))
+            # helm-chart
+            if not init_target or "helm-chart" in init_target:
+                k8s_hc_id = None
+                try:
+                    k8s_hc_id, uninstall_sw = await self.helm_k8scluster.init_env(k8s_credentials,
+                                                                                  reuse_cluster_uuid=k8scluster_id)
+                    db_k8scluster_update["_admin.helm-chart.id"] = k8s_hc_id
+                    db_k8scluster_update["_admin.helm-chart.created"] = uninstall_sw
+                except Exception as e:
+                    error_text_list.append("Failing init helm-chart: {}".format(e))
+                    db_k8scluster_update["_admin.helm-chart.error_msg"] = str(e)
+                    if isinstance(e, K8sException):
+                        self.logger.error(logging_text + "Failing init helm-chart: {}".format(e))
+                    else:
+                        self.logger.error(logging_text + "Failing init helm-chart: {}".format(e), exc_info=True)
+
+            if not init_target or "juju-bundle" in init_target:
+                # Juju/k8s cluster
+                k8s_jb_id = None
+                try:
+                    k8s_jb_id, uninstall_sw = await self.juju_k8scluster.init_env(k8s_credentials)
+                    db_k8scluster_update["_admin.juju-bundle.id"] = k8s_jb_id
+                    db_k8scluster_update["_admin.juju-bundle.created"] = uninstall_sw
+                except Exception as e:
+                    error_text_list.append("Failing init juju-bundle: {}".format(e))
+                    db_k8scluster_update["_admin.juju-bundle.error_msg"] = str(e)
+                    if isinstance(e, N2VCException):
+                        self.logger.error(logging_text + "Failing init juju-bundle: {}".format(e))
+                    else:
+                        self.logger.error(logging_text + "Failing init juju-bundle: {}".format(e), exc_info=True)
+
+            # mark as an error if both helm-chart and juju-bundle have been failed
+            if k8s_hc_id or k8s_jb_id:
+                self.logger.debug(logging_text + "successfully created")
+                db_k8scluster_update["_admin.operationalState"] = "ENABLED"
+            else:
+                self.logger.debug(logging_text + "created with errors")
+                db_k8scluster_update["_admin.operationalState"] = "ERROR"
+                db_k8scluster_update["_admin.detailed-status"] = ";".join(error_text_list)
 
         except Exception as e:
             self.logger.critical(logging_text + "Exit Exception {}".format(e), exc_info=True)
@@ -1014,12 +1024,14 @@ class K8sClusterLcm(LcmBase):
             if exc and db_k8scluster:
                 db_k8scluster_update["_admin.operationalState"] = "ERROR"
                 db_k8scluster_update["_admin.detailed-status"] = "ERROR {}: {}".format(step, exc)
+
                 # Mark the k8scluster 'create' HA task as erroneous
                 operationState_HA = 'FAILED'
                 detailed_status_HA = "ERROR {}: {}".format(step, exc)
             try:
                 if db_k8scluster_update:
                     self.update_db_2("k8sclusters", k8scluster_id, db_k8scluster_update)
+
                 # Register the K8scluster 'create' HA task either
                 # succesful or erroneous, or do nothing (if legacy NBI)
                 self.lcm_tasks.register_HA('k8scluster', 'create', op_id,
@@ -1027,7 +1039,7 @@ class K8sClusterLcm(LcmBase):
                                            detailed_status=detailed_status_HA)
             except DbException as e:
                 self.logger.error(logging_text + "Cannot update database: {}".format(e))
-            self.lcm_tasks.remove("k8sclusters", k8scluster_id, order_id)
+            self.lcm_tasks.remove("k8scluster", k8scluster_id, order_id)
 
     async def delete(self, k8scluster_content, order_id):
 
@@ -1040,7 +1052,6 @@ class K8sClusterLcm(LcmBase):
             return
 
         k8scluster_id = k8scluster_content["_id"]
-        k8scluster_content.pop("op_id", None)
         logging_text = "Task k8scluster_delete={} ".format(k8scluster_id)
         self.logger.debug(logging_text + "Enter")
 
@@ -1053,32 +1064,44 @@ class K8sClusterLcm(LcmBase):
             step = "Getting k8scluster='{}' from db".format(k8scluster_id)
             self.logger.debug(logging_text + step)
             db_k8scluster = self.db.get_one("k8sclusters", {"_id": k8scluster_id})
-            uninstall_sw = db_k8scluster.get("uninstall-sw")
-            if uninstall_sw is False or uninstall_sw is None:
-                uninstall_sw = False
-            cluster_removed = await self.k8scluster.reset(cluster_uuid=db_k8scluster.get("cluster-uuid"),
-                                                          uninstall_sw=uninstall_sw)
-
-            if cluster_removed:
-                step = "Removing k8scluster='{}' from db".format(k8scluster_id)
+            k8s_hc_id = deep_get(db_k8scluster, ("_admin", "helm-chart", "id"))
+            k8s_jb_id = deep_get(db_k8scluster, ("_admin", "juju-bundle", "id"))
+
+            uninstall_sw = deep_get(db_k8scluster, ("_admin", "helm-chart", "created"))
+            cluster_removed = True
+            if k8s_jb_id:  # delete in reverse order of creation
+                step = "Removing juju-bundle '{}'".format(k8s_jb_id)
+                uninstall_sw = uninstall_sw or False
+                cluster_removed = await self.juju_k8scluster.reset(cluster_uuid=k8s_jb_id, uninstall_sw=uninstall_sw)
+                db_k8scluster_update["_admin.juju-bundle.id"] = None
+
+            if k8s_hc_id:
+                step = "Removing helm-chart '{}'".format(k8s_hc_id)
+                uninstall_sw = uninstall_sw or False
+                cluster_removed = await self.helm_k8scluster.reset(cluster_uuid=k8s_hc_id, uninstall_sw=uninstall_sw)
+                db_k8scluster_update["_admin.helm-chart.id"] = None
+
+            # Try to remove from cluster_inserted to clean old versions
+            if k8s_hc_id and cluster_removed:
+                step = "Removing k8scluster='{}' from k8srepos".format(k8scluster_id)
                 self.logger.debug(logging_text + step)
-                db_k8srepo_list = self.db.get_list("k8srepos", {})
+                db_k8srepo_list = self.db.get_list("k8srepos", {"_admin.cluster-inserted": k8s_hc_id})
                 for k8srepo in db_k8srepo_list:
-                    index = 0
-                    for cluster in k8srepo["_admin"]["cluster-inserted"]:
-                        if db_k8scluster.get("cluster-uuid") == cluster:
-                            del(k8srepo["_admin"]["cluster-inserted"][index])
-                            break
-                        index += 1
-                self.update_db_2("k8srepos", k8srepo["_id"], k8srepo)
-                self.db.del_one("k8sclusters", {"_id": k8scluster_id})
-            else:
-                raise LcmException("An error happened during the reset of the k8s cluster '{}'".format(k8scluster_id))
-            # if not cluster_removed:
-            #     raise Exception("K8scluster was not properly removed")
+                    try:
+                        cluster_list = k8srepo["_admin"]["cluster-inserted"]
+                        cluster_list.remove(k8s_hc_id)
+                        self.update_db_2("k8srepos", k8srepo["_id"], {"_admin.cluster-inserted": cluster_list})
+                    except Exception as e:
+                        self.logger.error("{}: {}".format(step, e))
+            self.db.del_one("k8sclusters", {"_id": k8scluster_id})
+            db_k8scluster_update = None
+            self.logger.debug(logging_text + "Done")
 
         except Exception as e:
-            self.logger.critical(logging_text + "Exit Exception {}".format(e), exc_info=True)
+            if isinstance(e, (LcmException, DbException, K8sException, N2VCException)):
+                self.logger.error(logging_text + "Exit Exception {}".format(e))
+            else:
+                self.logger.critical(logging_text + "Exit Exception {}".format(e), exc_info=True)
             exc = e
         finally:
             if exc and db_k8scluster:
@@ -1097,12 +1120,12 @@ class K8sClusterLcm(LcmBase):
                                            detailed_status=detailed_status_HA)
             except DbException as e:
                 self.logger.error(logging_text + "Cannot update database: {}".format(e))
-            self.lcm_tasks.remove("k8sclusters", k8scluster_id, order_id)
+            self.lcm_tasks.remove("k8scluster", k8scluster_id, order_id)
 
 
 class K8sRepoLcm(LcmBase):
 
-    def __init__(self, db, msg, fs, lcm_tasks, vca_config, loop):
+    def __init__(self, db, msg, fs, lcm_tasks, config, loop):
         """
         Init, Connect to database, filesystem storage, and messaging
         :param config: two level dictionary with configuration. Top level should contain 'database', 'storage',
@@ -1112,7 +1135,7 @@ class K8sRepoLcm(LcmBase):
         self.logger = logging.getLogger('lcm.k8srepo')
         self.loop = loop
         self.lcm_tasks = lcm_tasks
-        self.vca_config = vca_config
+        self.vca_config = config["VCA"]
         self.fs = fs
         self.db = db
 
@@ -1151,29 +1174,6 @@ class K8sRepoLcm(LcmBase):
             step = "Getting k8srepo-id='{}' from db".format(k8srepo_id)
             self.logger.debug(logging_text + step)
             db_k8srepo = self.db.get_one("k8srepos", {"_id": k8srepo_id})
-            step = "Getting k8scluster_list from db"
-            self.logger.debug(logging_text + step)
-            db_k8scluster_list = self.db.get_list("k8sclusters", {})
-            db_k8srepo_update["_admin.cluster-inserted"] = []
-            task_list = []
-            for k8scluster in db_k8scluster_list:
-                step = "Adding repo to cluster: {}".format(k8scluster["cluster-uuid"])
-                self.logger.debug(logging_text + step)
-                task = asyncio.ensure_future(self.k8srepo.repo_add(cluster_uuid=k8scluster["cluster-uuid"],
-                                                                   name=db_k8srepo["name"], url=db_k8srepo["url"],
-                                                                   repo_type="chart"))
-                task_list.append(task)
-                db_k8srepo_update["_admin.cluster-inserted"].append(k8scluster["cluster-uuid"])
-
-            done = None
-            pending = None
-            if len(task_list) > 0:
-                self.logger.debug('Waiting for terminate pending tasks...')
-                done, pending = await asyncio.wait(task_list, timeout=3600)
-                if not pending:
-                    self.logger.debug('All tasks finished...')
-                else:
-                    self.logger.info('There are pending tasks: {}'.format(pending))
             db_k8srepo_update["_admin.operationalState"] = "ENABLED"
         except Exception as e:
             self.logger.critical(logging_text + "Exit Exception {}".format(e), exc_info=True)
@@ -1214,31 +1214,13 @@ class K8sRepoLcm(LcmBase):
         db_k8srepo = None
         db_k8srepo_update = {}
 
+        exc = None
         operationState_HA = ''
         detailed_status_HA = ''
         try:
             step = "Getting k8srepo-id='{}' from db".format(k8srepo_id)
             self.logger.debug(logging_text + step)
             db_k8srepo = self.db.get_one("k8srepos", {"_id": k8srepo_id})
-            step = "Getting k8scluster_list from db"
-            self.logger.debug(logging_text + step)
-            db_k8scluster_list = self.db.get_list("k8sclusters", {})
-
-            task_list = []
-            for k8scluster in db_k8scluster_list:
-                task = asyncio.ensure_future(self.k8srepo.repo_remove(cluster_uuid=k8scluster["cluster-uuid"],
-                                                                      name=db_k8srepo["name"]))
-                task_list.append(task)
-            done = None
-            pending = None
-            if len(task_list) > 0:
-                self.logger.debug('Waiting for terminate pending tasks...')
-                done, pending = await asyncio.wait(task_list, timeout=3600)
-                if not pending:
-                    self.logger.debug('All tasks finished...')
-                else:
-                    self.logger.info('There are pending tasks: {}'.format(pending))
-            self.db.del_one("k8srepos", {"_id": k8srepo_id})
 
         except Exception as e:
             self.logger.critical(logging_text + "Exit Exception {}".format(e), exc_info=True)
@@ -1258,6 +1240,7 @@ class K8sRepoLcm(LcmBase):
                 self.lcm_tasks.register_HA('k8srepo', 'delete', op_id,
                                            operationState=operationState_HA,
                                            detailed_status=detailed_status_HA)
+                self.db.del_one("k8srepos", {"_id": k8srepo_id})
             except DbException as e:
                 self.logger.error(logging_text + "Cannot update database: {}".format(e))
             self.lcm_tasks.remove("k8srepo", k8srepo_id, order_id)