a8c36e00f4e343753aaad7e389d4ea71f696176e
[osm/POL.git] / osm_policy_module / autoscaling / service.py
1 # -*- coding: utf-8 -*-
2 # pylint: disable=no-member
3
4 # Copyright 2018 Whitestack, LLC
5 # *************************************************************
6
7 # This file is part of OSM Monitoring module
8 # All Rights Reserved to Whitestack, LLC
9
10 # Licensed under the Apache License, Version 2.0 (the "License"); you may
11 # not use this file except in compliance with the License. You may obtain
12 # a copy of the License at
13
14 # http://www.apache.org/licenses/LICENSE-2.0
15
16 # Unless required by applicable law or agreed to in writing, software
17 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
18 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
19 # License for the specific language governing permissions and limitations
20 # under the License.
21
22 # For those usages not covered by the Apache License, Version 2.0 please
23 # contact: bdiaz@whitestack.com or glavado@whitestack.com
24 ##
25 import asyncio
26 import datetime
27 import json
28 import logging
29
30 from osm_policy_module.common.common_db_client import CommonDbClient
31 from osm_policy_module.common.lcm_client import LcmClient
32 from osm_policy_module.common.mon_client import MonClient
33 from osm_policy_module.core import database
34 from osm_policy_module.core.config import Config
35 from osm_policy_module.core.database import (
36 ScalingGroup,
37 ScalingAlarm,
38 ScalingPolicy,
39 ScalingCriteria,
40 ScalingAlarmRepository,
41 ScalingGroupRepository,
42 ScalingPolicyRepository,
43 ScalingCriteriaRepository,
44 )
45 from osm_policy_module.core.exceptions import VdurNotFound
46
47 log = logging.getLogger(__name__)
48
49
50 class AutoscalingService:
51 def __init__(self, config: Config, loop=None):
52 self.conf = config
53 if not loop:
54 loop = asyncio.get_event_loop()
55 self.loop = loop
56 self.db_client = CommonDbClient(config)
57 self.mon_client = MonClient(config, loop=self.loop)
58 self.lcm_client = LcmClient(config, loop=self.loop)
59
60 async def configure_scaling_groups(self, nsr_id: str):
61 """
62 Configures scaling groups for a network service. Creates records in DB. Creates alarms in MON.
63 :param nsr_id: Network service record id
64 :return:
65 """
66 log.info(
67 "Configuring scaling groups for network service with nsr_id: %s", nsr_id
68 )
69 alarms_created = []
70 database.db.connect()
71 try:
72 with database.db.atomic() as tx:
73 try:
74 vnfrs = self.db_client.get_vnfrs(nsr_id)
75 for vnfr in vnfrs:
76 log.debug("Processing vnfr: %s", vnfr)
77 vnfd = self.db_client.get_vnfd(vnfr["vnfd-id"])
78 # TODO: Change for multiple DF support
79 df = vnfd.get("df", [{}])[0]
80 if "scaling-aspect" not in df:
81 log.debug("No scaling aspect present in vnfd")
82 continue
83 # TODO: Change for multiple instantiation levels support
84 scaling_aspects = df["scaling-aspect"]
85 all_vnfd_monitoring_params = (
86 self._get_all_vnfd_monitoring_params(vnfd)
87 )
88 for scaling_aspect in scaling_aspects:
89 scaling_group_record = self._get_or_create_scaling_group(
90 nsr_id, vnfr["member-vnf-index-ref"], scaling_aspect
91 )
92 vdurs = self._get_monitored_vdurs(
93 scaling_aspect, vnfr["vdur"]
94 )
95 for scaling_policy in scaling_aspect.get(
96 "scaling-policy", ()
97 ):
98 if scaling_policy["scaling-type"] != "automatic":
99 continue
100 scaling_policy_record = (
101 self._get_or_create_scaling_policy(
102 nsr_id, scaling_policy, scaling_group_record
103 )
104 )
105
106 for scaling_criteria in scaling_policy[
107 "scaling-criteria"
108 ]:
109 scaling_criteria_record = (
110 self._get_or_create_scaling_criteria(
111 nsr_id,
112 scaling_criteria,
113 scaling_policy_record,
114 )
115 )
116 monitoring_param_ref = scaling_criteria.get(
117 "vnf-monitoring-param-ref"
118 )
119 vnf_monitoring_param = all_vnfd_monitoring_params[
120 monitoring_param_ref
121 ]
122
123 for vdur in vdurs:
124 vdu_id = vdur["vdu-id-ref"]
125 log.debug("Creating alarm for vdur %s ", vdur)
126 try:
127 ScalingAlarmRepository.get(
128 ScalingAlarm.vdu_name == vdur["name"],
129 ScalingCriteria.name
130 == scaling_criteria["name"],
131 ScalingPolicy.name
132 == scaling_policy["name"],
133 ScalingGroup.nsr_id == nsr_id,
134 join_classes=[
135 ScalingCriteria,
136 ScalingPolicy,
137 ScalingGroup,
138 ],
139 )
140 log.debug(
141 "vdu %s already has an alarm configured",
142 vdur["name"],
143 )
144 continue
145 except ScalingAlarm.DoesNotExist:
146 pass
147 metric_name = self._get_metric_name(
148 vnf_monitoring_param
149 )
150
151 db_nsr = self.db_client.get_nsr(nsr_id)
152 nb_scale_op = 0
153 if db_nsr["_admin"].get("scaling-group"):
154 db_nsr_admin = db_nsr["_admin"][
155 "scaling-group"
156 ]
157 for (
158 admin_scale_index,
159 admin_scale_info,
160 ) in enumerate(db_nsr_admin):
161 if (
162 admin_scale_info["name"]
163 == scaling_aspect["name"]
164 ):
165 nb_scale_op = admin_scale_info.get(
166 "nb-scale-op", 0
167 )
168 break
169 min_instance_count = 1
170 for vdu_profile in df.get("vdu-profile", ()):
171 if vdu_profile.get("id") == vdu_id:
172 min_instance_count = int(
173 vdu_profile.get(
174 "min-number-of-instances ", 1
175 )
176 )
177 break
178 if nb_scale_op >= min_instance_count:
179 alarm_uuid = (
180 await self.mon_client.create_alarm(
181 metric_name=metric_name,
182 ns_id=nsr_id,
183 vdu_name=vdur["name"],
184 vnf_member_index=vnfr[
185 "member-vnf-index-ref"
186 ],
187 threshold=scaling_criteria[
188 "scale-in-threshold"
189 ],
190 operation=scaling_criteria[
191 "scale-in-relational-operation"
192 ],
193 action="scale_in"
194 )
195 )
196 alarm = ScalingAlarmRepository.create(
197 alarm_uuid=alarm_uuid,
198 action="scale_in",
199 vnf_member_index=vnfr[
200 "member-vnf-index-ref"
201 ],
202 vdu_name=vdur["name"],
203 scaling_criteria=scaling_criteria_record,
204 )
205 alarms_created.append(alarm)
206 alarm_uuid = await self.mon_client.create_alarm(
207 metric_name=metric_name,
208 ns_id=nsr_id,
209 vdu_name=vdur["name"],
210 vnf_member_index=vnfr[
211 "member-vnf-index-ref"
212 ],
213 threshold=scaling_criteria[
214 "scale-out-threshold"
215 ],
216 operation=scaling_criteria[
217 "scale-out-relational-operation"
218 ],
219 action="scale_out",
220 )
221 alarm = ScalingAlarmRepository.create(
222 alarm_uuid=alarm_uuid,
223 action="scale_out",
224 vnf_member_index=vnfr[
225 "member-vnf-index-ref"
226 ],
227 vdu_name=vdur["name"],
228 scaling_criteria=scaling_criteria_record,
229 )
230 alarms_created.append(alarm)
231
232 except Exception as e:
233 log.exception("Error configuring scaling groups:")
234 tx.rollback()
235 if len(alarms_created) > 0:
236 log.info("Cleaning alarm resources in MON")
237 for alarm in alarms_created:
238 await self.mon_client.delete_alarm(
239 alarm.scaling_criteria.scaling_policy.scaling_group.nsr_id,
240 alarm.vnf_member_index,
241 alarm.vdu_name,
242 alarm.alarm_uuid,
243 )
244 raise e
245 finally:
246 database.db.close()
247
248 async def delete_scaling_groups(self, nsr_id: str):
249 log.debug("Deleting scaling groups for network service %s", nsr_id)
250 database.db.connect()
251 try:
252 with database.db.atomic() as tx:
253 try:
254 for scaling_group in ScalingGroupRepository.list(
255 ScalingGroup.nsr_id == nsr_id
256 ):
257 for scaling_policy in scaling_group.scaling_policies:
258 for scaling_criteria in scaling_policy.scaling_criterias:
259 for alarm in scaling_criteria.scaling_alarms:
260 try:
261 await self.mon_client.delete_alarm(
262 alarm.scaling_criteria.scaling_policy.scaling_group.nsr_id,
263 alarm.vnf_member_index,
264 alarm.vdu_name,
265 alarm.alarm_uuid,
266 )
267 except ValueError:
268 log.exception(
269 "Error deleting alarm in MON %s",
270 alarm.alarm_uuid,
271 )
272 alarm.delete_instance()
273 scaling_criteria.delete_instance()
274 scaling_policy.delete_instance()
275 scaling_group.delete_instance()
276
277 except Exception as e:
278 log.exception("Error deleting scaling groups and alarms:")
279 tx.rollback()
280 raise e
281 finally:
282 database.db.close()
283
284 async def delete_orphaned_alarms(self, nsr_id):
285 log.info("Deleting orphaned scaling alarms for network service %s", nsr_id)
286 database.db.connect()
287 try:
288 with database.db.atomic() as tx:
289 try:
290 for scaling_group in ScalingGroupRepository.list(
291 ScalingGroup.nsr_id == nsr_id
292 ):
293 for scaling_policy in scaling_group.scaling_policies:
294 for scaling_criteria in scaling_policy.scaling_criterias:
295 for alarm in scaling_criteria.scaling_alarms:
296 try:
297 self.db_client.get_vdur(
298 nsr_id,
299 alarm.vnf_member_index,
300 alarm.vdu_name,
301 )
302 except VdurNotFound:
303 log.debug(
304 "Deleting orphaned scaling alarm %s",
305 alarm.alarm_uuid,
306 )
307 try:
308 await self.mon_client.delete_alarm(
309 alarm.scaling_criteria.scaling_policy.scaling_group.nsr_id,
310 alarm.vnf_member_index,
311 alarm.vdu_name,
312 alarm.alarm_uuid,
313 )
314 except ValueError:
315 log.exception(
316 "Error deleting alarm in MON %s",
317 alarm.alarm_uuid,
318 )
319 alarm.delete_instance()
320
321 except Exception as e:
322 log.exception("Error deleting orphaned alarms:")
323 tx.rollback()
324 raise e
325 finally:
326 database.db.close()
327
328 async def handle_alarm(self, alarm_uuid: str, status: str):
329 await self.update_alarm_status(alarm_uuid, status)
330 await self.evaluate_policy(alarm_uuid)
331
332 async def update_alarm_status(self, alarm_uuid: str, status: str):
333 database.db.connect()
334 try:
335 with database.db.atomic():
336 alarm = ScalingAlarmRepository.get(
337 ScalingAlarm.alarm_uuid == alarm_uuid
338 )
339 alarm.last_status = status
340 alarm.save()
341 except ScalingAlarm.DoesNotExist:
342 log.debug(
343 "There is no autoscaling action configured for alarm %s.", alarm_uuid
344 )
345 finally:
346 database.db.close()
347
348 async def evaluate_policy(self, alarm_uuid):
349 database.db.connect()
350 try:
351 with database.db.atomic():
352 alarm = ScalingAlarmRepository.get(
353 ScalingAlarm.alarm_uuid == alarm_uuid
354 )
355 vnf_member_index = alarm.vnf_member_index
356 action = alarm.action
357 scaling_policy = alarm.scaling_criteria.scaling_policy
358 if not scaling_policy.enabled:
359 return
360 if action == "scale_in":
361 operation = scaling_policy.scale_in_operation
362 elif action == "scale_out":
363 operation = scaling_policy.scale_out_operation
364 else:
365 raise Exception("Unknown alarm action {}".format(alarm.action))
366 alarms = ScalingAlarmRepository.list(
367 ScalingAlarm.scaling_criteria == alarm.scaling_criteria,
368 ScalingAlarm.action == alarm.action,
369 ScalingAlarm.vnf_member_index == vnf_member_index,
370 ScalingAlarm.vdu_name == alarm.vdu_name,
371 )
372 statuses = []
373 for alarm in alarms:
374 statuses.append(alarm.last_status)
375 if (operation == "AND" and set(statuses) == {"alarm"}) or (
376 operation == "OR" and "alarm" in statuses
377 ):
378 delta = datetime.datetime.now() - scaling_policy.last_scale
379 if delta.total_seconds() > scaling_policy.cooldown_time:
380 log.info(
381 "Sending %s action message for ns: %s",
382 alarm.action,
383 scaling_policy.scaling_group.nsr_id,
384 )
385 await self.lcm_client.scale(
386 scaling_policy.scaling_group.nsr_id,
387 scaling_policy.scaling_group.name,
388 vnf_member_index,
389 action,
390 )
391 scaling_policy.last_scale = datetime.datetime.now()
392 scaling_policy.save()
393
394 except ScalingAlarm.DoesNotExist:
395 log.debug(
396 "There is no autoscaling action configured for alarm %s.", alarm_uuid
397 )
398 finally:
399 database.db.close()
400
401 def _get_all_vnfd_monitoring_params(self, vnfd):
402 all_monitoring_params = {}
403 for ivld in vnfd.get("int-virtual-link-desc", ()):
404 for mp in ivld.get("monitoring-parameters", ()):
405 all_monitoring_params[mp.get("id")] = mp
406
407 for vdu in vnfd.get("vdu", ()):
408 for mp in vdu.get("monitoring-parameter", ()):
409 all_monitoring_params[mp.get("id")] = mp
410
411 for df in vnfd.get("df", ()):
412 for mp in df.get("monitoring-parameter", ()):
413 all_monitoring_params[mp.get("id")] = mp
414
415 return all_monitoring_params
416
417 def _get_or_create_scaling_group(
418 self, nsr_id: str, vnf_member_index: str, scaling_aspect: dict
419 ):
420 try:
421 scaling_group_record = ScalingGroupRepository.get(
422 ScalingGroup.nsr_id == nsr_id,
423 ScalingGroup.vnf_member_index == vnf_member_index,
424 ScalingGroup.name == scaling_aspect["name"],
425 )
426 log.debug("Found existing scaling group record in DB...")
427 except ScalingGroup.DoesNotExist:
428 log.debug("Creating scaling group record in DB...")
429 scaling_group_record = ScalingGroupRepository.create(
430 nsr_id=nsr_id,
431 vnf_member_index=vnf_member_index,
432 name=scaling_aspect["name"],
433 content=json.dumps(scaling_aspect),
434 )
435 log.debug(
436 "Created scaling group record in DB : nsr_id=%s, vnf_member_index=%s, name=%s",
437 scaling_group_record.nsr_id,
438 scaling_group_record.vnf_member_index,
439 scaling_group_record.name,
440 )
441 return scaling_group_record
442
443 def _get_or_create_scaling_policy(
444 self, nsr_id: str, scaling_policy: dict, scaling_group_record: ScalingGroup
445 ):
446 try:
447 scaling_policy_record = ScalingPolicyRepository.get(
448 ScalingPolicy.name == scaling_policy["name"],
449 ScalingGroup.id == scaling_group_record.id,
450 join_classes=[ScalingGroup],
451 )
452 log.debug("Found existing scaling policy record in DB...")
453 except ScalingPolicy.DoesNotExist:
454 log.debug("Creating scaling policy record in DB...")
455 scaling_policy_record = ScalingPolicyRepository.create(
456 nsr_id=nsr_id,
457 name=scaling_policy["name"],
458 cooldown_time=scaling_policy["cooldown-time"],
459 scaling_group=scaling_group_record,
460 )
461 if "scale-in-operation-type" in scaling_policy:
462 scaling_policy_record.scale_in_operation = scaling_policy[
463 "scale-in-operation-type"
464 ]
465 if "scale-out-operation-type" in scaling_policy:
466 scaling_policy_record.scale_out_operation = scaling_policy[
467 "scale-out-operation-type"
468 ]
469 if "enabled" in scaling_policy:
470 scaling_policy_record.enabled = scaling_policy["enabled"]
471 scaling_policy_record.save()
472 log.debug(
473 "Created scaling policy record in DB : name=%s, scaling_group.name=%s",
474 scaling_policy_record.name,
475 scaling_policy_record.scaling_group.name,
476 )
477 return scaling_policy_record
478
479 def _get_or_create_scaling_criteria(
480 self, nsr_id: str, scaling_criteria: dict, scaling_policy_record: ScalingPolicy
481 ):
482 try:
483 scaling_criteria_record = ScalingCriteriaRepository.get(
484 ScalingPolicy.id == scaling_policy_record.id,
485 ScalingCriteria.name == scaling_criteria["name"],
486 join_classes=[ScalingPolicy],
487 )
488 log.debug("Found existing scaling criteria record in DB...")
489 except ScalingCriteria.DoesNotExist:
490 log.debug("Creating scaling criteria record in DB...")
491 scaling_criteria_record = ScalingCriteriaRepository.create(
492 nsr_id=nsr_id,
493 name=scaling_criteria["name"],
494 scaling_policy=scaling_policy_record,
495 )
496 log.debug(
497 "Created scaling criteria record in DB : name=%s, scaling_policy.name=%s",
498 scaling_criteria_record.name,
499 scaling_criteria_record.scaling_policy.name,
500 )
501 return scaling_criteria_record
502
503 def _get_monitored_vdurs(self, scaling_aspect: dict, vdurs):
504 all_monitored_vdus = set()
505 for delta in scaling_aspect.get("aspect-delta-details", {}).get("deltas", ()):
506 for vdu_delta in delta.get("vdu-delta", ()):
507 all_monitored_vdus.add(vdu_delta.get("id"))
508
509 monitored_vdurs = list(
510 filter(lambda vdur: vdur["vdu-id-ref"] in all_monitored_vdus, vdurs)
511 )
512
513 if not monitored_vdurs:
514 log.warning(
515 "Scaling criteria is referring to a vnf-monitoring-param that does not "
516 "contain a reference to a vdu or vnf metric."
517 )
518 return monitored_vdurs
519
520 def _get_metric_name(self, vnf_monitoring_param: dict):
521 if "performance-metric" in vnf_monitoring_param:
522 return vnf_monitoring_param["performance-metric"]
523 raise ValueError(
524 "No metric name found for vnf_monitoring_param %s"
525 % vnf_monitoring_param["id"]
526 )