X-Git-Url: https://osm.etsi.org/gitweb/?p=osm%2FN2VC.git;a=blobdiff_plain;f=n2vc%2Flibjuju.py;h=47024149e1bcf024edfc461eeea644b51e2b2d17;hp=12730fde98bb795d51de9011fde8c066f1062e57;hb=a4f57d6260e6520aa6a89e86f9d1b2ca5e0a3a08;hpb=12b29244e5d333341166ea92760b8eb245c16b27 diff --git a/n2vc/libjuju.py b/n2vc/libjuju.py index 12730fd..4702414 100644 --- a/n2vc/libjuju.py +++ b/n2vc/libjuju.py @@ -22,6 +22,7 @@ from juju.errors import JujuAPIError from juju.model import Model from juju.machine import Machine from juju.application import Application +from juju.unit import Unit from juju.client._definitions import ( FullStatus, QueryApplicationOffersResults, @@ -39,9 +40,11 @@ from n2vc.exceptions import ( JujuModelAlreadyExists, JujuControllerFailedConnecting, JujuApplicationExists, + JujuInvalidK8sConfiguration, ) from n2vc.utils import DB_DATA from osm_common.dbbase import DbException +from kubernetes.client.configuration import Configuration class Libjuju: @@ -78,9 +81,12 @@ class Libjuju: self.log = log or logging.getLogger("Libjuju") self.db = db db_endpoints = self._get_api_endpoints_db() - self.endpoints = db_endpoints or [endpoint] - if db_endpoints is None: + self.endpoints = None + if (db_endpoints and endpoint not in db_endpoints) or not db_endpoints: + self.endpoints = [endpoint] self._update_api_endpoints_db(self.endpoints) + else: + self.endpoints = db_endpoints self.api_proxy = api_proxy self.username = username self.password = password @@ -101,7 +107,10 @@ class Libjuju: self.models = set() self.log.debug("Libjuju initialized!") - self.health_check_task = self.loop.create_task(self.health_check()) + self.health_check_task = self._create_health_check_task() + + def _create_health_check_task(self): + return self.loop.create_task(self.health_check()) async def get_controller(self, timeout: float = 5.0) -> Controller: """ @@ -158,12 +167,14 @@ class Libjuju: """ await controller.disconnect() - async def add_model(self, model_name: str, cloud_name: str): + async def add_model(self, model_name: str, cloud_name: str, credential_name=None): """ Create model :param: model_name: Model name :param: cloud_name: Cloud name + :param: credential_name: Credential name to use for adding the model + If not specified, same name as the cloud will be used. """ # Get controller @@ -191,7 +202,7 @@ class Libjuju: model_name, config=self.model_config, cloud_name=cloud_name, - credential_name=cloud_name, + credential_name=credential_name or cloud_name, ) self.models.add(model_name) finally: @@ -625,11 +636,21 @@ class Libjuju: if application is None: raise JujuApplicationNotFound("Cannot execute action") - # Get unit + # Get leader unit + # Racing condition: + # Ocassionally, self._get_leader_unit() will return None + # because the leader elected hook has not been triggered yet. + # Therefore, we are doing some retries. If it happens again, + # re-open bug 1236 + attempts = 3 + time_between_retries = 10 unit = None - for u in application.units: - if await u.is_leader_from_status(): - unit = u + for _ in range(attempts): + unit = await self._get_leader_unit(application) + if unit is None: + await asyncio.sleep(time_between_retries) + else: + break if unit is None: raise JujuLeaderUnitNotFound( "Cannot execute action: leader unit not found" @@ -711,6 +732,26 @@ class Libjuju: await self.disconnect_model(model) await self.disconnect_controller(controller) + async def get_metrics(self, model_name: str, application_name: str) -> dict: + """Get the metrics collected by the VCA. + + :param model_name The name or unique id of the network service + :param application_name The name of the application + """ + if not model_name or not application_name: + raise Exception("model_name and application_name must be non-empty strings") + metrics = {} + controller = await self.get_controller() + model = await self.get_model(controller, model_name) + try: + application = self._get_application(model, application_name) + if application is not None: + metrics = await application.get_metrics() + finally: + self.disconnect_model(model) + self.disconnect_controller(controller) + return metrics + async def add_relation( self, model_name: str, endpoint_1: str, endpoint_2: str, ): @@ -782,17 +823,9 @@ class Libjuju: self.log.debug("Destroying model {}".format(model_name)) uuid = model.info.uuid - # Destroy machines - machines = await model.get_machines() - for machine_id in machines: - try: - await self.destroy_machine( - model, machine_id=machine_id, total_timeout=total_timeout, - ) - except asyncio.CancelledError: - raise - except Exception: - pass + # Destroy machines that are manually provisioned + # and still are in pending state + await self._destroy_pending_machines(model, only_manual=True) # Disconnect model await self.disconnect_model(model) @@ -801,32 +834,24 @@ class Libjuju: if model_name in self.models: self.models.remove(model_name) - await controller.destroy_model(uuid) + await controller.destroy_model(uuid, force=True, max_wait=0) # Wait until model is destroyed self.log.debug("Waiting for model {} to be destroyed...".format(model_name)) - last_exception = "" if total_timeout is None: total_timeout = 3600 end = time.time() + total_timeout while time.time() < end: - try: - models = await controller.list_models() - if model_name not in models: - self.log.debug( - "The model {} ({}) was destroyed".format(model_name, uuid) - ) - return - except asyncio.CancelledError: - raise - except Exception as e: - last_exception = e + models = await controller.list_models() + if model_name not in models: + self.log.debug( + "The model {} ({}) was destroyed".format(model_name, uuid) + ) + return await asyncio.sleep(5) raise Exception( - "Timeout waiting for model {} to be destroyed {}".format( - model_name, last_exception - ) + "Timeout waiting for model {} to be destroyed".format(model_name) ) finally: await self.disconnect_controller(controller) @@ -849,32 +874,49 @@ class Libjuju: else: self.log.warning("Application not found: {}".format(application_name)) - async def destroy_machine( - self, model: Model, machine_id: str, total_timeout: float = 3600 - ): + async def _destroy_pending_machines(self, model: Model, only_manual: bool = False): """ - Destroy machine + Destroy pending machines in a given model - :param: model: Model object - :param: machine_id: Machine id - :param: total_timeout: Timeout in seconds + :param: only_manual: Bool that indicates only manually provisioned + machines should be destroyed (if True), or that + all pending machines should be destroyed """ - machines = await model.get_machines() - if machine_id in machines: - machine = machines[machine_id] - await machine.destroy(force=True) - # max timeout - end = time.time() + total_timeout - - # wait for machine removal - machines = await model.get_machines() - while machine_id in machines and time.time() < end: - self.log.debug("Waiting for machine {} is destroyed".format(machine_id)) - await asyncio.sleep(0.5) - machines = await model.get_machines() - self.log.debug("Machine destroyed: {}".format(machine_id)) - else: - self.log.debug("Machine not found: {}".format(machine_id)) + status = await model.get_status() + for machine_id in status.machines: + machine_status = status.machines[machine_id] + if machine_status.agent_status.status == "pending": + if only_manual and not machine_status.instance_id.startswith("manual:"): + break + machine = model.machines[machine_id] + await machine.destroy(force=True) + + # async def destroy_machine( + # self, model: Model, machine_id: str, total_timeout: float = 3600 + # ): + # """ + # Destroy machine + + # :param: model: Model object + # :param: machine_id: Machine id + # :param: total_timeout: Timeout in seconds + # """ + # machines = await model.get_machines() + # if machine_id in machines: + # machine = machines[machine_id] + # await machine.destroy(force=True) + # # max timeout + # end = time.time() + total_timeout + + # # wait for machine removal + # machines = await model.get_machines() + # while machine_id in machines and time.time() < end: + # self.log.debug("Waiting for machine {} is destroyed".format(machine_id)) + # await asyncio.sleep(0.5) + # machines = await model.get_machines() + # self.log.debug("Machine destroyed: {}".format(machine_id)) + # else: + # self.log.debug("Machine not found: {}".format(machine_id)) async def configure_application( self, model_name: str, application_name: str, config: dict = None @@ -1000,75 +1042,130 @@ class Libjuju: finally: await self.disconnect_controller(controller) - async def add_k8s(self, name: str, auth_data: dict, storage_class: str): + async def add_k8s( + self, + name: str, + configuration: Configuration, + storage_class: str, + credential_name: str = None, + ): """ Add a Kubernetes cloud to the controller Similar to the `juju add-k8s` command in the CLI - :param: name: Name for the K8s cloud - :param: auth_data: Dictionary with needed credentials. Format: - { - "server": "192.168.0.21:16443", - "cacert": "-----BEGIN CERTIFI...", - "token": "clhkRExRem5Xd1dCdnFEVXdvRGt...", - - } - :param: storage_class: Storage Class to use in the cloud + :param: name: Name for the K8s cloud + :param: configuration: Kubernetes configuration object + :param: storage_class: Storage Class to use in the cloud + :param: credential_name: Storage Class to use in the cloud """ - required_auth_data_keys = ["server", "cacert", "token"] - missing_keys = [] - for k in required_auth_data_keys: - if k not in auth_data: - missing_keys.append(k) - if missing_keys: - raise Exception( - "missing keys in auth_data: {}".format(",".join(missing_keys)) - ) if not storage_class: raise Exception("storage_class must be a non-empty string") if not name: raise Exception("name must be a non-empty string") - - endpoint = auth_data["server"] - cacert = auth_data["cacert"] - token = auth_data["token"] - region_name = "{}-region".format(name) - + if not configuration: + raise Exception("configuration must be provided") + + endpoint = configuration.host + credential = self.get_k8s_cloud_credential(configuration) + ca_certificates = ( + [credential.attrs["ClientCertificateData"]] + if "ClientCertificateData" in credential.attrs + else [] + ) cloud = client.Cloud( - auth_types=["certificate"], - ca_certificates=[cacert], + type_="kubernetes", + auth_types=[credential.auth_type], endpoint=endpoint, + ca_certificates=ca_certificates, config={ "operator-storage": storage_class, "workload-storage": storage_class, }, - regions=[client.CloudRegion(endpoint=endpoint, name=region_name)], - type_="kubernetes", ) - cred = client.CloudCredential( - auth_type="certificate", - attrs={"ClientCertificateData": cacert, "Token": token}, + return await self.add_cloud( + name, cloud, credential, credential_name=credential_name ) - return await self.add_cloud(name, cloud, cred) + + def get_k8s_cloud_credential( + self, configuration: Configuration, + ) -> client.CloudCredential: + attrs = {} + ca_cert = configuration.ssl_ca_cert or configuration.cert_file + key = configuration.key_file + api_key = configuration.api_key + token = None + username = configuration.username + password = configuration.password + + if "authorization" in api_key: + authorization = api_key["authorization"] + if "Bearer " in authorization: + bearer_list = authorization.split(" ") + if len(bearer_list) == 2: + [_, token] = bearer_list + else: + raise JujuInvalidK8sConfiguration("unknown format of api_key") + else: + token = authorization + if ca_cert: + attrs["ClientCertificateData"] = open(ca_cert, "r").read() + if key: + attrs["ClientKeyData"] = open(key, "r").read() + if token: + if username or password: + raise JujuInvalidK8sConfiguration("Cannot set both token and user/pass") + attrs["Token"] = token + + auth_type = None + if key: + auth_type = "oauth2" + if not token: + raise JujuInvalidK8sConfiguration( + "missing token for auth type {}".format(auth_type) + ) + elif username: + if not password: + self.log.debug( + "credential for user {} has empty password".format(username) + ) + attrs["username"] = username + attrs["password"] = password + if ca_cert: + auth_type = "userpasswithcert" + else: + auth_type = "userpass" + elif ca_cert and token: + auth_type = "certificate" + else: + raise JujuInvalidK8sConfiguration("authentication method not supported") + return client.CloudCredential(auth_type=auth_type, attrs=attrs,) async def add_cloud( - self, name: str, cloud: Cloud, credential: CloudCredential = None + self, + name: str, + cloud: Cloud, + credential: CloudCredential = None, + credential_name: str = None, ) -> Cloud: """ Add cloud to the controller - :param: name: Name of the cloud to be added - :param: cloud: Cloud object - :param: credential: CloudCredentials object for the cloud + :param: name: Name of the cloud to be added + :param: cloud: Cloud object + :param: credential: CloudCredentials object for the cloud + :param: credential_name: Credential name. + If not defined, cloud of the name will be used. """ controller = await self.get_controller() try: _ = await controller.add_cloud(name, cloud) if credential: - await controller.add_credential(name, credential=credential, cloud=name) + await controller.add_credential( + credential_name or name, credential=credential, cloud=name + ) # Need to return the object returned by the controller.add_cloud() function # I'm returning the original value now until this bug is fixed: # https://github.com/juju/python-libjuju/issues/443 @@ -1087,3 +1184,11 @@ class Libjuju: await controller.remove_cloud(name) finally: await self.disconnect_controller(controller) + + async def _get_leader_unit(self, application: Application) -> Unit: + unit = None + for u in application.units: + if await u.is_leader_from_status(): + unit = u + break + return unit