Feature 11016: Service KPI Metric Based Scaling of VNF using exporter endpoint in...
[osm/NG-SA.git] / src / osm_ngsa / osm_mon / vim_connectors / azure.py
1 #######################################################################################
2 # Copyright ETSI Contributors and Others.
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 # http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13 # implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #######################################################################################
17 import datetime
18 import logging
19 from typing import Dict, List
20
21 from azure.identity import ClientSecretCredential
22 from azure.mgmt.compute import ComputeManagementClient
23 from azure.mgmt.monitor import MonitorManagementClient
24 from azure.profiles import ProfileDefinition
25 from osm_mon.vim_connectors.base_vim import VIMConnector
26
27
28 log = logging.getLogger(__name__)
29
30
31 METRIC_MAPPINGS = {
32 "cpu_utilization": {
33 "metricname": "Percentage CPU",
34 "aggregation": "Average",
35 },
36 "average_memory_utilization": {
37 "metricname": "Available Memory Bytes",
38 "aggregation": "Average",
39 },
40 "disk_read_ops": {
41 "metricname": "Disk Read Operations/Sec",
42 "aggregation": "Average",
43 },
44 "disk_write_ops": {
45 "metricname": "Disk Write Operations/Sec",
46 "aggregation": "Average",
47 },
48 "disk_read_bytes": {
49 "metricname": "Disk Read Bytes",
50 "aggregation": "Total",
51 },
52 "disk_write_bytes": {
53 "metricname": "Disk Write Bytes",
54 "aggregation": "Total",
55 },
56 # "packets_in_dropped": {},
57 # "packets_out_dropped": {},
58 # "packets_received": {},
59 # "packets_sent": {},
60 }
61
62
63 class AzureCollector(VIMConnector):
64 # Translate azure provisioning state to OSM provision state.
65 # The first three ones are the transitional status once a user initiated
66 # action has been requested. Once the operation is complete, it will
67 # transition into the states Succeeded or Failed
68 # https://docs.microsoft.com/en-us/azure/virtual-machines/windows/states-lifecycle
69 provision_state2osm = {
70 "Creating": "BUILD",
71 "Updating": "BUILD",
72 "Deleting": "INACTIVE",
73 "Succeeded": "ACTIVE",
74 "Failed": "ERROR",
75 }
76
77 # Translate azure power state to OSM provision state
78 power_state2osm = {
79 "starting": "INACTIVE",
80 "running": "ACTIVE",
81 "stopping": "INACTIVE",
82 "stopped": "INACTIVE",
83 "unknown": "OTHER",
84 "deallocated": "BUILD",
85 "deallocating": "BUILD",
86 }
87
88 AZURE_COMPUTE_MGMT_CLIENT_API_VERSION = "2021-03-01"
89 AZURE_COMPUTE_MGMT_PROFILE_TAG = "azure.mgmt.compute.ComputeManagementClient"
90 AZURE_COMPUTE_MGMT_PROFILE = ProfileDefinition(
91 {
92 AZURE_COMPUTE_MGMT_PROFILE_TAG: {
93 None: AZURE_COMPUTE_MGMT_CLIENT_API_VERSION,
94 "availability_sets": "2020-12-01",
95 "dedicated_host_groups": "2020-12-01",
96 "dedicated_hosts": "2020-12-01",
97 "disk_accesses": "2020-12-01",
98 "disk_encryption_sets": "2020-12-01",
99 "disk_restore_point": "2020-12-01",
100 "disks": "2020-12-01",
101 "galleries": "2020-09-30",
102 "gallery_application_versions": "2020-09-30",
103 "gallery_applications": "2020-09-30",
104 "gallery_image_versions": "2020-09-30",
105 "gallery_images": "2020-09-30",
106 "gallery_sharing_profile": "2020-09-30",
107 "images": "2020-12-01",
108 "log_analytics": "2020-12-01",
109 "operations": "2020-12-01",
110 "proximity_placement_groups": "2020-12-01",
111 "resource_skus": "2019-04-01",
112 "shared_galleries": "2020-09-30",
113 "shared_gallery_image_versions": "2020-09-30",
114 "shared_gallery_images": "2020-09-30",
115 "snapshots": "2020-12-01",
116 "ssh_public_keys": "2020-12-01",
117 "usage": "2020-12-01",
118 "virtual_machine_extension_images": "2020-12-01",
119 "virtual_machine_extensions": "2020-12-01",
120 "virtual_machine_images": "2020-12-01",
121 "virtual_machine_images_edge_zone": "2020-12-01",
122 "virtual_machine_run_commands": "2020-12-01",
123 "virtual_machine_scale_set_extensions": "2020-12-01",
124 "virtual_machine_scale_set_rolling_upgrades": "2020-12-01",
125 "virtual_machine_scale_set_vm_extensions": "2020-12-01",
126 "virtual_machine_scale_set_vm_run_commands": "2020-12-01",
127 "virtual_machine_scale_set_vms": "2020-12-01",
128 "virtual_machine_scale_sets": "2020-12-01",
129 "virtual_machine_sizes": "2020-12-01",
130 "virtual_machines": "2020-12-01",
131 }
132 },
133 AZURE_COMPUTE_MGMT_PROFILE_TAG + " osm",
134 )
135
136 def __init__(self, vim_account: Dict):
137 self.vim_account = vim_account
138 self.reload_client = True
139 self.vm_sizes = {}
140
141 # Store config to create azure subscription later
142 self._config = {
143 "user": vim_account["vim_user"],
144 "passwd": vim_account["vim_password"],
145 "tenant": vim_account["vim_tenant_name"],
146 }
147
148 # SUBSCRIPTION
149 config = vim_account["config"]
150 if "subscription_id" in config:
151 self._config["subscription_id"] = config.get("subscription_id")
152 log.info("Subscription: %s", self._config["subscription_id"])
153 else:
154 log.error("Subscription not specified")
155 return
156
157 # RESOURCE_GROUP
158 if "resource_group" in config:
159 self.resource_group = config.get("resource_group")
160 else:
161 log.error("Azure resource_group is not specified at config")
162 return
163
164 # REGION_NAME
165 if "region_name" in config:
166 self.region = config.get("region_name")
167 else:
168 log.error("Azure region_name is not specified at config")
169 return
170
171 def _reload_connection(self):
172 if self.reload_client:
173 log.debug("reloading azure client")
174 try:
175 self.credentials = ClientSecretCredential(
176 client_id=self._config["user"],
177 client_secret=self._config["passwd"],
178 tenant_id=self._config["tenant"],
179 )
180 self.conn_compute = ComputeManagementClient(
181 self.credentials,
182 self._config["subscription_id"],
183 profile=self.AZURE_COMPUTE_MGMT_PROFILE,
184 )
185 # create client
186 self.conn_monitor = MonitorManagementClient(
187 self.credentials,
188 self._config["subscription_id"],
189 )
190 # Set to client created
191 self.reload_client = False
192 except Exception as e:
193 log.error(e)
194
195 def _get_region_vm_sizes(self):
196 if len(self.vm_sizes) == 0:
197 log.debug("getting VM sizes available in region")
198 try:
199 for size in self.conn_compute.virtual_machine_sizes.list(self.region):
200 self.vm_sizes[size.name] = size
201 except Exception as e:
202 log.error(e)
203
204 def collect_servers_status(self) -> List[Dict]:
205 servers = []
206 log.debug("collect_servers_status")
207 self._reload_connection()
208 try:
209 for vm in self.conn_compute.virtual_machines.list(self.resource_group):
210 id = vm.id
211 array = id.split("/")
212 name = array[-1]
213 status = self.provision_state2osm.get(vm.provisioning_state, "OTHER")
214 if vm.provisioning_state == "Succeeded":
215 # check if machine is running or stopped
216 instance_view = self.conn_compute.virtual_machines.instance_view(
217 self.resource_group, name
218 )
219 for status in instance_view.statuses:
220 splitted_status = status.code.split("/")
221 if (
222 len(splitted_status) == 2
223 and splitted_status[0] == "PowerState"
224 ):
225 status = self.power_state2osm.get(
226 splitted_status[1], "OTHER"
227 )
228 vm = {
229 "id": id,
230 "name": name,
231 "status": (1 if (status == "ACTIVE") else 0),
232 }
233 servers.append(vm)
234 except Exception as e:
235 log.error(e)
236
237 return servers
238
239 def is_vim_ok(self) -> bool:
240 status = False
241 self.reload_client = True
242 try:
243 self._reload_connection()
244 status = True
245 except Exception as e:
246 log.error(e)
247 return status
248
249 def collect_metrics(self, metric_list: List[Dict]) -> List[Dict]:
250 log.debug("collect_metrics")
251 self._reload_connection()
252
253 metric_results = []
254 # VMs RAM cache for calculating "average_memory_utilization" metric
255 cache = {}
256 for metric in metric_list:
257 server = metric["vm_id"]
258 metric_name = metric["metric"]
259 metric_mapping = METRIC_MAPPINGS.get(metric_name)
260 if not metric_mapping:
261 continue
262 if metric_name == "average_memory_utilization" and len(cache) == 0:
263 # storing VMs RAM sizes in cache
264 self._get_region_vm_sizes()
265 try:
266 for vm in self.conn_compute.virtual_machines.list(
267 self.resource_group
268 ):
269 id = vm.id
270 size_name = vm.hardware_profile.vm_size
271 vm_size = self.vm_sizes.get(size_name)
272 if vm_size:
273 ram = vm_size.memory_in_mb
274 cache[id] = ram
275 except Exception as e:
276 log.error(e)
277 azure_metric_name = metric_mapping["metricname"]
278 azure_aggregation = metric_mapping["aggregation"]
279 end = datetime.datetime.now()
280 init = end - datetime.timedelta(minutes=5)
281 try:
282 metrics_data = self.conn_monitor.metrics.list(
283 server,
284 timespan="{}/{}".format(init, end),
285 interval="PT1M",
286 metricnames=azure_metric_name,
287 aggregation=azure_aggregation,
288 )
289 except Exception as e:
290 log.error(e)
291 continue
292 total = 0
293 n_metrics = 0
294 for item in metrics_data.value:
295 log.info("{} ({})".format(item.name.localized_value, item.unit))
296 for timeserie in item.timeseries:
297 for data in timeserie.data:
298 if azure_aggregation == "Average":
299 val = data.average
300 elif azure_aggregation == "Total":
301 val = data.total
302 else:
303 val = None
304 log.info("{}: {}".format(data.time_stamp, val))
305 if val is not None:
306 total += val
307 n_metrics += 1
308 if n_metrics > 0:
309 value = total / n_metrics
310 if metric_name == "average_memory_utilization":
311 ram = cache.get(server)
312 if ram:
313 log.info(f"VM RAM = {ram}")
314 value = ram - (value / 1048576)
315 else:
316 log.error(f"Not found RAM value for server {server}")
317 value = None
318 if value is not None:
319 log.info(f"value = {value}")
320 metric["value"] = value
321 metric_results.append(metric)
322 else:
323 log.info("No metric available")
324
325 return metric_results