Feature 9751: Centralized VCA for KNFs
[osm/N2VC.git] / n2vc / libjuju.py
index d2c725f..aa7afa1 100644 (file)
@@ -22,6 +22,7 @@ from juju.errors import JujuAPIError
 from juju.model import Model
 from juju.machine import Machine
 from juju.application import Application
+from juju.unit import Unit
 from juju.client._definitions import (
     FullStatus,
     QueryApplicationOffersResults,
@@ -80,9 +81,12 @@ class Libjuju:
         self.log = log or logging.getLogger("Libjuju")
         self.db = db
         db_endpoints = self._get_api_endpoints_db()
-        self.endpoints = db_endpoints or [endpoint]
-        if db_endpoints is None:
+        self.endpoints = None
+        if (db_endpoints and endpoint not in db_endpoints) or not db_endpoints:
+            self.endpoints = [endpoint]
             self._update_api_endpoints_db(self.endpoints)
+        else:
+            self.endpoints = db_endpoints
         self.api_proxy = api_proxy
         self.username = username
         self.password = password
@@ -103,7 +107,10 @@ class Libjuju:
         self.models = set()
         self.log.debug("Libjuju initialized!")
 
-        self.health_check_task = self.loop.create_task(self.health_check())
+        self.health_check_task = self._create_health_check_task()
+
+    def _create_health_check_task(self):
+        return self.loop.create_task(self.health_check())
 
     async def get_controller(self, timeout: float = 5.0) -> Controller:
         """
@@ -158,14 +165,17 @@ class Libjuju:
 
         :param: controller: Controller that will be disconnected
         """
-        await controller.disconnect()
+        if controller:
+            await controller.disconnect()
 
-    async def add_model(self, model_name: str, cloud_name: str):
+    async def add_model(self, model_name: str, cloud_name: str, credential_name=None):
         """
         Create model
 
         :param: model_name: Model name
         :param: cloud_name: Cloud name
+        :param: credential_name: Credential name to use for adding the model
+                                 If not specified, same name as the cloud will be used.
         """
 
         # Get controller
@@ -193,7 +203,7 @@ class Libjuju:
                     model_name,
                     config=self.model_config,
                     cloud_name=cloud_name,
-                    credential_name=cloud_name,
+                    credential_name=credential_name or cloud_name,
                 )
                 self.models.add(model_name)
         finally:
@@ -482,6 +492,28 @@ class Libjuju:
 
         return machine_id
 
+    async def deploy(
+        self, uri: str, model_name: str, wait: bool = True, timeout: float = 3600
+    ):
+        """
+        Deploy bundle or charm: Similar to the juju CLI command `juju deploy`
+
+        :param: uri:            Path or Charm Store uri in which the charm or bundle can be found
+        :param: model_name:     Model name
+        :param: wait:           Indicates whether to wait or not until all applications are active
+        :param: timeout:        Time in seconds to wait until all applications are active
+        """
+        controller = await self.get_controller()
+        model = await self.get_model(controller, model_name)
+        try:
+            await model.deploy(uri)
+            if wait:
+                await JujuModelWatcher.wait_for_model(model, timeout=timeout)
+                self.log.debug("All units active in model {}".format(model_name))
+        finally:
+            await self.disconnect_model(model)
+            await self.disconnect_controller(controller)
+
     async def deploy_charm(
         self,
         application_name: str,
@@ -627,11 +659,21 @@ class Libjuju:
             if application is None:
                 raise JujuApplicationNotFound("Cannot execute action")
 
-            # Get unit
+            # Get leader unit
+            # Racing condition:
+            #   Ocassionally, self._get_leader_unit() will return None
+            #   because the leader elected hook has not been triggered yet.
+            #   Therefore, we are doing some retries. If it happens again,
+            #   re-open bug 1236
+            attempts = 3
+            time_between_retries = 10
             unit = None
-            for u in application.units:
-                if await u.is_leader_from_status():
-                    unit = u
+            for _ in range(attempts):
+                unit = await self._get_leader_unit(application)
+                if unit is None:
+                    await asyncio.sleep(time_between_retries)
+                else:
+                    break
             if unit is None:
                 raise JujuLeaderUnitNotFound(
                     "Cannot execute action: leader unit not found"
@@ -804,17 +846,9 @@ class Libjuju:
             self.log.debug("Destroying model {}".format(model_name))
             uuid = model.info.uuid
 
-            # Destroy machines
-            machines = await model.get_machines()
-            for machine_id in machines:
-                try:
-                    await self.destroy_machine(
-                        model, machine_id=machine_id, total_timeout=total_timeout,
-                    )
-                except asyncio.CancelledError:
-                    raise
-                except Exception:
-                    pass
+            # Destroy machines that are manually provisioned
+            # and still are in pending state
+            await self._destroy_pending_machines(model, only_manual=True)
 
             # Disconnect model
             await self.disconnect_model(model)
@@ -823,32 +857,24 @@ class Libjuju:
             if model_name in self.models:
                 self.models.remove(model_name)
 
-            await controller.destroy_model(uuid)
+            await controller.destroy_model(uuid, force=True, max_wait=0)
 
             # Wait until model is destroyed
             self.log.debug("Waiting for model {} to be destroyed...".format(model_name))
-            last_exception = ""
 
             if total_timeout is None:
                 total_timeout = 3600
             end = time.time() + total_timeout
             while time.time() < end:
-                try:
-                    models = await controller.list_models()
-                    if model_name not in models:
-                        self.log.debug(
-                            "The model {} ({}) was destroyed".format(model_name, uuid)
-                        )
-                        return
-                except asyncio.CancelledError:
-                    raise
-                except Exception as e:
-                    last_exception = e
+                models = await controller.list_models()
+                if model_name not in models:
+                    self.log.debug(
+                        "The model {} ({}) was destroyed".format(model_name, uuid)
+                    )
+                    return
                 await asyncio.sleep(5)
             raise Exception(
-                "Timeout waiting for model {} to be destroyed {}".format(
-                    model_name, last_exception
-                )
+                "Timeout waiting for model {} to be destroyed".format(model_name)
             )
         finally:
             await self.disconnect_controller(controller)
@@ -871,32 +897,49 @@ class Libjuju:
         else:
             self.log.warning("Application not found: {}".format(application_name))
 
-    async def destroy_machine(
-        self, model: Model, machine_id: str, total_timeout: float = 3600
-    ):
+    async def _destroy_pending_machines(self, model: Model, only_manual: bool = False):
         """
-        Destroy machine
+        Destroy pending machines in a given model
 
-        :param: model:          Model object
-        :param: machine_id:     Machine id
-        :param: total_timeout:  Timeout in seconds
+        :param: only_manual:    Bool that indicates only manually provisioned
+                                machines should be destroyed (if True), or that
+                                all pending machines should be destroyed
         """
-        machines = await model.get_machines()
-        if machine_id in machines:
-            machine = machines[machine_id]
-            await machine.destroy(force=True)
-            # max timeout
-            end = time.time() + total_timeout
-
-            # wait for machine removal
-            machines = await model.get_machines()
-            while machine_id in machines and time.time() < end:
-                self.log.debug("Waiting for machine {} is destroyed".format(machine_id))
-                await asyncio.sleep(0.5)
-                machines = await model.get_machines()
-            self.log.debug("Machine destroyed: {}".format(machine_id))
-        else:
-            self.log.debug("Machine not found: {}".format(machine_id))
+        status = await model.get_status()
+        for machine_id in status.machines:
+            machine_status = status.machines[machine_id]
+            if machine_status.agent_status.status == "pending":
+                if only_manual and not machine_status.instance_id.startswith("manual:"):
+                    break
+                machine = model.machines[machine_id]
+                await machine.destroy(force=True)
+
+    # async def destroy_machine(
+    #     self, model: Model, machine_id: str, total_timeout: float = 3600
+    # ):
+    #     """
+    #     Destroy machine
+
+    #     :param: model:          Model object
+    #     :param: machine_id:     Machine id
+    #     :param: total_timeout:  Timeout in seconds
+    #     """
+    #     machines = await model.get_machines()
+    #     if machine_id in machines:
+    #         machine = machines[machine_id]
+    #         await machine.destroy(force=True)
+    #         # max timeout
+    #         end = time.time() + total_timeout
+
+    #         # wait for machine removal
+    #         machines = await model.get_machines()
+    #         while machine_id in machines and time.time() < end:
+    #             self.log.debug("Waiting for machine {} is destroyed".format(machine_id))
+    #             await asyncio.sleep(0.5)
+    #             machines = await model.get_machines()
+    #         self.log.debug("Machine destroyed: {}".format(machine_id))
+    #     else:
+    #         self.log.debug("Machine not found: {}".format(machine_id))
 
     async def configure_application(
         self, model_name: str, application_name: str, config: dict = None
@@ -981,6 +1024,7 @@ class Libjuju:
 
         :param: interval: Time in seconds between checks
         """
+        controller = None
         while True:
             try:
                 controller = await self.get_controller()
@@ -1023,16 +1067,21 @@ class Libjuju:
             await self.disconnect_controller(controller)
 
     async def add_k8s(
-        self, name: str, configuration: Configuration, storage_class: str
+        self,
+        name: str,
+        configuration: Configuration,
+        storage_class: str,
+        credential_name: str = None,
     ):
         """
         Add a Kubernetes cloud to the controller
 
         Similar to the `juju add-k8s` command in the CLI
 
-        :param: name:           Name for the K8s cloud
-        :param: configuration:  Kubernetes configuration object
-        :param: storage_class:  Storage Class to use in the cloud
+        :param: name:               Name for the K8s cloud
+        :param: configuration:      Kubernetes configuration object
+        :param: storage_class:      Storage Class to use in the cloud
+        :param: credential_name:    Storage Class to use in the cloud
         """
 
         if not storage_class:
@@ -1060,7 +1109,9 @@ class Libjuju:
             },
         )
 
-        return await self.add_cloud(name, cloud, credential)
+        return await self.add_cloud(
+            name, cloud, credential, credential_name=credential_name
+        )
 
     def get_k8s_cloud_credential(
         self, configuration: Configuration,
@@ -1114,23 +1165,31 @@ class Libjuju:
             auth_type = "certificate"
         else:
             raise JujuInvalidK8sConfiguration("authentication method not supported")
-        return client.CloudCredential(auth_type=auth_type, attrs=attrs,)
+        return client.CloudCredential(auth_type=auth_type, attrs=attrs)
 
     async def add_cloud(
-        self, name: str, cloud: Cloud, credential: CloudCredential = None
+        self,
+        name: str,
+        cloud: Cloud,
+        credential: CloudCredential = None,
+        credential_name: str = None,
     ) -> Cloud:
         """
         Add cloud to the controller
 
-        :param: name:   Name of the cloud to be added
-        :param: cloud:  Cloud object
-        :param: credential:   CloudCredentials object for the cloud
+        :param: name:               Name of the cloud to be added
+        :param: cloud:              Cloud object
+        :param: credential:         CloudCredentials object for the cloud
+        :param: credential_name:    Credential name.
+                                    If not defined, cloud of the name will be used.
         """
         controller = await self.get_controller()
         try:
             _ = await controller.add_cloud(name, cloud)
             if credential:
-                await controller.add_credential(name, credential=credential, cloud=name)
+                await controller.add_credential(
+                    credential_name or name, credential=credential, cloud=name
+                )
             # Need to return the object returned by the controller.add_cloud() function
             # I'm returning the original value now until this bug is fixed:
             #   https://github.com/juju/python-libjuju/issues/443
@@ -1149,3 +1208,11 @@ class Libjuju:
             await controller.remove_cloud(name)
         finally:
             await self.disconnect_controller(controller)
+
+    async def _get_leader_unit(self, application: Application) -> Unit:
+        unit = None
+        for u in application.units:
+            if await u.is_leader_from_status():
+                unit = u
+                break
+        return unit