Fix bug 2060: Add logic to delete models gracefully
[osm/N2VC.git] / n2vc / libjuju.py
index 028cea8..4ee0abf 100644 (file)
@@ -122,7 +122,10 @@ class Libjuju:
             )
             if controller:
                 await self.disconnect_controller(controller)
-            raise JujuControllerFailedConnecting(e)
+
+            raise JujuControllerFailedConnecting(
+                f"Error connecting to Juju controller: {e}"
+            )
 
     async def disconnect(self):
         """Disconnect"""
@@ -805,6 +808,153 @@ class Libjuju:
 
         return application
 
+    async def upgrade_charm(
+        self,
+        application_name: str,
+        path: str,
+        model_name: str,
+        total_timeout: float = None,
+        **kwargs,
+    ):
+        """Upgrade Charm
+
+        :param: application_name:   Application name
+        :param: model_name:         Model name
+        :param: path:               Local path to the charm
+        :param: total_timeout:      Timeout for the entity to be active
+
+        :return: (str, str): (output and status)
+        """
+
+        self.log.debug(
+            "Upgrading charm {} in model {} from path {}".format(
+                application_name, model_name, path
+            )
+        )
+
+        await self.resolve_application(
+            model_name=model_name, application_name=application_name
+        )
+
+        # Get controller
+        controller = await self.get_controller()
+
+        # Get model
+        model = await self.get_model(controller, model_name)
+
+        try:
+            # Get application
+            application = self._get_application(
+                model,
+                application_name=application_name,
+            )
+            if application is None:
+                raise JujuApplicationNotFound(
+                    "Cannot find application {} to upgrade".format(application_name)
+                )
+
+            await application.refresh(path=path)
+
+            self.log.debug(
+                "Wait until charm upgrade is completed for application {} (model={})".format(
+                    application_name, model_name
+                )
+            )
+
+            await JujuModelWatcher.ensure_units_idle(
+                model=model, application=application
+            )
+
+            if application.status == "error":
+                error_message = "Unknown"
+                for unit in application.units:
+                    if (
+                        unit.workload_status == "error"
+                        and unit.workload_status_message != ""
+                    ):
+                        error_message = unit.workload_status_message
+
+                message = "Application {} failed update in {}: {}".format(
+                    application_name, model_name, error_message
+                )
+                self.log.error(message)
+                raise JujuError(message=message)
+
+            self.log.debug(
+                "Application {} is ready in model {}".format(
+                    application_name, model_name
+                )
+            )
+
+        finally:
+            await self.disconnect_model(model)
+            await self.disconnect_controller(controller)
+
+        return application
+
+    async def resolve_application(self, model_name: str, application_name: str):
+
+        controller = await self.get_controller()
+        model = await self.get_model(controller, model_name)
+
+        try:
+            application = self._get_application(
+                model,
+                application_name=application_name,
+            )
+            if application is None:
+                raise JujuApplicationNotFound(
+                    "Cannot find application {} to resolve".format(application_name)
+                )
+
+            while application.status == "error":
+                for unit in application.units:
+                    if unit.workload_status == "error":
+                        self.log.debug(
+                            "Model {}, Application {}, Unit {} in error state, resolving".format(
+                                model_name, application_name, unit.entity_id
+                            )
+                        )
+                        try:
+                            await unit.resolved(retry=False)
+                        except Exception:
+                            pass
+
+                await asyncio.sleep(1)
+
+        finally:
+            await self.disconnect_model(model)
+            await self.disconnect_controller(controller)
+
+    async def resolve(self, model_name: str):
+
+        controller = await self.get_controller()
+        model = await self.get_model(controller, model_name)
+        all_units_active = False
+        try:
+            while not all_units_active:
+                all_units_active = True
+                for application_name, application in model.applications.items():
+                    if application.status == "error":
+                        for unit in application.units:
+                            if unit.workload_status == "error":
+                                self.log.debug(
+                                    "Model {}, Application {}, Unit {} in error state, resolving".format(
+                                        model_name, application_name, unit.entity_id
+                                    )
+                                )
+                                try:
+                                    await unit.resolved(retry=False)
+                                    all_units_active = False
+                                except Exception:
+                                    pass
+
+                if not all_units_active:
+                    await asyncio.sleep(5)
+        finally:
+            await self.disconnect_model(model)
+            await self.disconnect_controller(controller)
+
     async def scale_application(
         self,
         model_name: str,
@@ -1208,28 +1358,38 @@ class Libjuju:
         model = None
         try:
             if not await self.model_exists(model_name, controller=controller):
+                self.log.warn(f"Model {model_name} doesn't exist")
                 return
 
-            self.log.debug("Destroying model {}".format(model_name))
-
+            self.log.debug(f"Getting model {model_name} to be destroyed")
             model = await self.get_model(controller, model_name)
+            self.log.debug(f"Destroying manual machines in model {model_name}")
             # Destroy machines that are manually provisioned
             # and still are in pending state
             await self._destroy_pending_machines(model, only_manual=True)
             await self.disconnect_model(model)
 
-            await self._destroy_model(
-                model_name,
-                controller,
+            await asyncio.wait_for(
+                self._destroy_model(model_name, controller),
                 timeout=total_timeout,
             )
+        except Exception as e:
+            if not await self.model_exists(model_name, controller=controller):
+                self.log.warn(
+                    f"Failed deleting model {model_name}: model doesn't exist"
+                )
+                return
+            self.log.warn(f"Failed deleting model {model_name}: {e}")
+            raise e
         finally:
             if model:
                 await self.disconnect_model(model)
             await self.disconnect_controller(controller)
 
     async def _destroy_model(
-        self, model_name: str, controller: Controller, timeout: float = 1800
+        self,
+        model_name: str,
+        controller: Controller,
     ):
         """
         Destroy model from controller
@@ -1238,22 +1398,35 @@ class Libjuju:
         :param: controller: Controller object
         :param: timeout: Timeout in seconds
         """
+        self.log.debug(f"Destroying model {model_name}")
+
+        async def _destroy_model_gracefully(model_name: str, controller: Controller):
+            self.log.info(f"Gracefully deleting model {model_name}")
+            resolved = False
+            while model_name in await controller.list_models():
+                if not resolved:
+                    await self.resolve(model_name)
+                    resolved = True
+                await controller.destroy_model(model_name, destroy_storage=True)
+
+                await asyncio.sleep(5)
+            self.log.info(f"Model {model_name} deleted gracefully")
 
-        async def _destroy_model_loop(model_name: str, controller: Controller):
-            while await self.model_exists(model_name, controller=controller):
+        async def _destroy_model_forcefully(model_name: str, controller: Controller):
+            self.log.info(f"Forcefully deleting model {model_name}")
+            while model_name in await controller.list_models():
                 await controller.destroy_model(
-                    model_name, destroy_storage=True, force=True, max_wait=0
+                    model_name, destroy_storage=True, force=True, max_wait=60
                 )
                 await asyncio.sleep(5)
+            self.log.info(f"Model {model_name} deleted forcefully")
 
         try:
             await asyncio.wait_for(
-                _destroy_model_loop(model_name, controller), timeout=timeout
+                _destroy_model_gracefully(model_name, controller), timeout=120
             )
         except asyncio.TimeoutError:
-            raise Exception(
-                "Timeout waiting for model {} to be destroyed".format(model_name)
-            )
+            await _destroy_model_forcefully(model_name, controller)
         except juju.errors.JujuError as e:
             if any("has been removed" in error for error in e.errors):
                 return