src/osm_ngsa/osm_mon/vim_connectors/azure.py

   1 #######################################################################################
   2 # Copyright ETSI Contributors and Others.
   3 #
   4 # Licensed under the Apache License, Version 2.0 (the "License");
   5 # you may not use this file except in compliance with the License.
   6 # You may obtain a copy of the License at
   7 #
   8 #    http://www.apache.org/licenses/LICENSE-2.0
   9 #
  10 # Unless required by applicable law or agreed to in writing, software
  11 # distributed under the License is distributed on an "AS IS" BASIS,
  12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
  13 # implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16 #######################################################################################
  17 import datetime
  18 import logging
  19 from typing import Dict, List
  20
  21 from azure.identity import ClientSecretCredential
  22 from azure.mgmt.compute import ComputeManagementClient
  23 from azure.mgmt.monitor import MonitorManagementClient
  24 from azure.profiles import ProfileDefinition
  25 from osm_mon.vim_connectors.base_vim import VIMConnector
  26
  27
  28 log = logging.getLogger(__name__)
  29
  30
  31 METRIC_MAPPINGS = {
  32     "cpu_utilization": {
  33         "metricname": "Percentage CPU",
  34         "aggregation": "Average",
  35     },
  36     "average_memory_utilization": {
  37         "metricname": "Available Memory Bytes",
  38         "aggregation": "Average",
  39     },
  40     "disk_read_ops": {
  41         "metricname": "Disk Read Operations/Sec",
  42         "aggregation": "Average",
  43     },
  44     "disk_write_ops": {
  45         "metricname": "Disk Write Operations/Sec",
  46         "aggregation": "Average",
  47     },
  48     "disk_read_bytes": {
  49         "metricname": "Disk Read Bytes",
  50         "aggregation": "Total",
  51     },
  52     "disk_write_bytes": {
  53         "metricname": "Disk Write Bytes",
  54         "aggregation": "Total",
  55     },
  56     # "packets_in_dropped": {},
  57     # "packets_out_dropped": {},
  58     # "packets_received": {},
  59     # "packets_sent": {},
  60 }
  61
  62
  63 class AzureCollector(VIMConnector):
  64     # Translate azure provisioning state to OSM provision state.
  65     # The first three ones are the transitional status once a user initiated
  66     # action has been requested. Once the operation is complete, it will
  67     # transition into the states Succeeded or Failed
  68     # https://docs.microsoft.com/en-us/azure/virtual-machines/windows/states-lifecycle
  69     provision_state2osm = {
  70         "Creating": "BUILD",
  71         "Updating": "BUILD",
  72         "Deleting": "INACTIVE",
  73         "Succeeded": "ACTIVE",
  74         "Failed": "ERROR",
  75     }
  76
  77     # Translate azure power state to OSM provision state
  78     power_state2osm = {
  79         "starting": "INACTIVE",
  80         "running": "ACTIVE",
  81         "stopping": "INACTIVE",
  82         "stopped": "INACTIVE",
  83         "unknown": "OTHER",
  84         "deallocated": "BUILD",
  85         "deallocating": "BUILD",
  86     }
  87
  88     AZURE_COMPUTE_MGMT_CLIENT_API_VERSION = "2021-03-01"
  89     AZURE_COMPUTE_MGMT_PROFILE_TAG = "azure.mgmt.compute.ComputeManagementClient"
  90     AZURE_COMPUTE_MGMT_PROFILE = ProfileDefinition(
  91         {
  92             AZURE_COMPUTE_MGMT_PROFILE_TAG: {
  93                 None: AZURE_COMPUTE_MGMT_CLIENT_API_VERSION,
  94                 "availability_sets": "2020-12-01",
  95                 "dedicated_host_groups": "2020-12-01",
  96                 "dedicated_hosts": "2020-12-01",
  97                 "disk_accesses": "2020-12-01",
  98                 "disk_encryption_sets": "2020-12-01",
  99                 "disk_restore_point": "2020-12-01",
 100                 "disks": "2020-12-01",
 101                 "galleries": "2020-09-30",
 102                 "gallery_application_versions": "2020-09-30",
 103                 "gallery_applications": "2020-09-30",
 104                 "gallery_image_versions": "2020-09-30",
 105                 "gallery_images": "2020-09-30",
 106                 "gallery_sharing_profile": "2020-09-30",
 107                 "images": "2020-12-01",
 108                 "log_analytics": "2020-12-01",
 109                 "operations": "2020-12-01",
 110                 "proximity_placement_groups": "2020-12-01",
 111                 "resource_skus": "2019-04-01",
 112                 "shared_galleries": "2020-09-30",
 113                 "shared_gallery_image_versions": "2020-09-30",
 114                 "shared_gallery_images": "2020-09-30",
 115                 "snapshots": "2020-12-01",
 116                 "ssh_public_keys": "2020-12-01",
 117                 "usage": "2020-12-01",
 118                 "virtual_machine_extension_images": "2020-12-01",
 119                 "virtual_machine_extensions": "2020-12-01",
 120                 "virtual_machine_images": "2020-12-01",
 121                 "virtual_machine_images_edge_zone": "2020-12-01",
 122                 "virtual_machine_run_commands": "2020-12-01",
 123                 "virtual_machine_scale_set_extensions": "2020-12-01",
 124                 "virtual_machine_scale_set_rolling_upgrades": "2020-12-01",
 125                 "virtual_machine_scale_set_vm_extensions": "2020-12-01",
 126                 "virtual_machine_scale_set_vm_run_commands": "2020-12-01",
 127                 "virtual_machine_scale_set_vms": "2020-12-01",
 128                 "virtual_machine_scale_sets": "2020-12-01",
 129                 "virtual_machine_sizes": "2020-12-01",
 130                 "virtual_machines": "2020-12-01",
 131             }
 132         },
 133         AZURE_COMPUTE_MGMT_PROFILE_TAG + " osm",
 134     )
 135
 136     def __init__(self, vim_account: Dict):
 137         self.vim_account = vim_account
 138         self.reload_client = True
 139         self.vm_sizes = {}
 140
 141         # Store config to create azure subscription later
 142         self._config = {
 143             "user": vim_account["vim_user"],
 144             "passwd": vim_account["vim_password"],
 145             "tenant": vim_account["vim_tenant_name"],
 146         }
 147
 148         # SUBSCRIPTION
 149         config = vim_account["config"]
 150         if "subscription_id" in config:
 151             self._config["subscription_id"] = config.get("subscription_id")
 152             log.info("Subscription: %s", self._config["subscription_id"])
 153         else:
 154             log.error("Subscription not specified")
 155             return
 156
 157         # RESOURCE_GROUP
 158         if "resource_group" in config:
 159             self.resource_group = config.get("resource_group")
 160         else:
 161             log.error("Azure resource_group is not specified at config")
 162             return
 163
 164         # REGION_NAME
 165         if "region_name" in config:
 166             self.region = config.get("region_name")
 167         else:
 168             log.error("Azure region_name is not specified at config")
 169             return
 170
 171     def _reload_connection(self):
 172         if self.reload_client:
 173             log.debug("reloading azure client")
 174             try:
 175                 self.credentials = ClientSecretCredential(
 176                     client_id=self._config["user"],
 177                     client_secret=self._config["passwd"],
 178                     tenant_id=self._config["tenant"],
 179                 )
 180                 self.conn_compute = ComputeManagementClient(
 181                     self.credentials,
 182                     self._config["subscription_id"],
 183                     profile=self.AZURE_COMPUTE_MGMT_PROFILE,
 184                 )
 185                 # create client
 186                 self.conn_monitor = MonitorManagementClient(
 187                     self.credentials,
 188                     self._config["subscription_id"],
 189                 )
 190                 # Set to client created
 191                 self.reload_client = False
 192             except Exception as e:
 193                 log.error(e)
 194
 195     def _get_region_vm_sizes(self):
 196         if len(self.vm_sizes) == 0:
 197             log.debug("getting VM sizes available in region")
 198             try:
 199                 for size in self.conn_compute.virtual_machine_sizes.list(self.region):
 200                     self.vm_sizes[size.name] = size
 201             except Exception as e:
 202                 log.error(e)
 203
 204     def collect_servers_status(self) -> List[Dict]:
 205         servers = []
 206         log.debug("collect_servers_status")
 207         self._reload_connection()
 208         try:
 209             for vm in self.conn_compute.virtual_machines.list(self.resource_group):
 210                 id = vm.id
 211                 array = id.split("/")
 212                 name = array[-1]
 213                 status = self.provision_state2osm.get(vm.provisioning_state, "OTHER")
 214                 if vm.provisioning_state == "Succeeded":
 215                     # check if machine is running or stopped
 216                     instance_view = self.conn_compute.virtual_machines.instance_view(
 217                         self.resource_group, name
 218                     )
 219                     for status in instance_view.statuses:
 220                         splitted_status = status.code.split("/")
 221                         if (
 222                             len(splitted_status) == 2
 223                             and splitted_status[0] == "PowerState"
 224                         ):
 225                             status = self.power_state2osm.get(
 226                                 splitted_status[1], "OTHER"
 227                             )
 228                 vm = {
 229                     "id": id,
 230                     "name": name,
 231                     "status": (1 if (status == "ACTIVE") else 0),
 232                 }
 233                 servers.append(vm)
 234         except Exception as e:
 235             log.error(e)
 236
 237         return servers
 238
 239     def is_vim_ok(self) -> bool:
 240         status = False
 241         self.reload_client = True
 242         try:
 243             self._reload_connection()
 244             status = True
 245         except Exception as e:
 246             log.error(e)
 247         return status
 248
 249     def collect_metrics(self, metric_list: List[Dict]) -> List[Dict]:
 250         log.debug("collect_metrics")
 251         self._reload_connection()
 252
 253         metric_results = []
 254         # VMs RAM cache for calculating "average_memory_utilization" metric
 255         cache = {}
 256         for metric in metric_list:
 257             server = metric["vm_id"]
 258             metric_name = metric["metric"]
 259             metric_mapping = METRIC_MAPPINGS.get(metric_name)
 260             if not metric_mapping:
 261                 continue
 262             if metric_name == "average_memory_utilization" and len(cache) == 0:
 263                 # storing VMs RAM sizes in cache
 264                 self._get_region_vm_sizes()
 265                 try:
 266                     for vm in self.conn_compute.virtual_machines.list(
 267                         self.resource_group
 268                     ):
 269                         id = vm.id
 270                         size_name = vm.hardware_profile.vm_size
 271                         vm_size = self.vm_sizes.get(size_name)
 272                         if vm_size:
 273                             ram = vm_size.memory_in_mb
 274                             cache[id] = ram
 275                 except Exception as e:
 276                     log.error(e)
 277             azure_metric_name = metric_mapping["metricname"]
 278             azure_aggregation = metric_mapping["aggregation"]
 279             end = datetime.datetime.now()
 280             init = end - datetime.timedelta(minutes=5)
 281             try:
 282                 metrics_data = self.conn_monitor.metrics.list(
 283                     server,
 284                     timespan="{}/{}".format(init, end),
 285                     interval="PT1M",
 286                     metricnames=azure_metric_name,
 287                     aggregation=azure_aggregation,
 288                 )
 289             except Exception as e:
 290                 log.error(e)
 291                 continue
 292             total = 0
 293             n_metrics = 0
 294             for item in metrics_data.value:
 295                 log.info("{} ({})".format(item.name.localized_value, item.unit))
 296                 for timeserie in item.timeseries:
 297                     for data in timeserie.data:
 298                         if azure_aggregation == "Average":
 299                             val = data.average
 300                         elif azure_aggregation == "Total":
 301                             val = data.total
 302                         else:
 303                             val = None
 304                         log.info("{}: {}".format(data.time_stamp, val))
 305                         if val is not None:
 306                             total += val
 307                             n_metrics += 1
 308             if n_metrics > 0:
 309                 value = total / n_metrics
 310                 if metric_name == "average_memory_utilization":
 311                     ram = cache.get(server)
 312                     if ram:
 313                         log.info(f"VM RAM = {ram}")
 314                         value = ram - (value / 1048576)
 315                     else:
 316                         log.error(f"Not found RAM value for server {server}")
 317                         value = None
 318                 if value is not None:
 319                     log.info(f"value = {value}")
 320                     metric["value"] = value
 321                     metric_results.append(metric)
 322             else:
 323                 log.info("No metric available")
 324
 325         return metric_results