Feature 10986: Autoheal switch and Autoscale switch
[osm/POL.git] / osm_policy_module / autoscaling / service.py
1 # -*- coding: utf-8 -*-
2 # pylint: disable=no-member
3
4 # Copyright 2018 Whitestack, LLC
5 # *************************************************************
6
7 # This file is part of OSM Monitoring module
8 # All Rights Reserved to Whitestack, LLC
9
10 # Licensed under the Apache License, Version 2.0 (the "License"); you may
11 # not use this file except in compliance with the License. You may obtain
12 # a copy of the License at
13
14 # http://www.apache.org/licenses/LICENSE-2.0
15
16 # Unless required by applicable law or agreed to in writing, software
17 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
18 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
19 # License for the specific language governing permissions and limitations
20 # under the License.
21
22 # For those usages not covered by the Apache License, Version 2.0 please
23 # contact: bdiaz@whitestack.com or glavado@whitestack.com
24 ##
25 import datetime
26 import json
27 import logging
28 import operator
29 import functools
30
31 from osm_policy_module.common.common_db_client import CommonDbClient
32 from osm_policy_module.common.lcm_client import LcmClient
33 from osm_policy_module.common.mon_client import MonClient
34 from osm_policy_module.core import database
35 from osm_policy_module.core.config import Config
36 from osm_policy_module.core.database import (
37 ScalingGroup,
38 ScalingAlarm,
39 ScalingPolicy,
40 ScalingCriteria,
41 ScalingAlarmRepository,
42 ScalingGroupRepository,
43 ScalingPolicyRepository,
44 ScalingCriteriaRepository,
45 )
46 from osm_policy_module.core.exceptions import VdurNotFound
47
48 log = logging.getLogger(__name__)
49
50
51 class AutoscalingService:
52 def __init__(self, config: Config):
53 self.conf = config
54 self.db_client = CommonDbClient(config)
55 self.mon_client = MonClient(config)
56 self.lcm_client = LcmClient(config)
57
58 async def configure_scaling_groups(self, nsr_id: str, vnf_member_index=None):
59 """
60 Configures scaling groups for a network service. Creates records in DB. Creates alarms in MON.
61 :param nsr_id: Network service record id
62 :return:
63 """
64 log.info(
65 "Configuring scaling groups for network service with nsr_id: %s", nsr_id
66 )
67 alarms_created = []
68 database.db.connect()
69 try:
70 with database.db.atomic() as tx:
71 try:
72 if vnf_member_index is None:
73 vnfrs = self.db_client.get_vnfrs(nsr_id)
74 else:
75 vnfrs = []
76 vnfr = self.db_client.get_vnfr(nsr_id, vnf_member_index)
77 vnfrs.append(vnfr)
78 # vnfrs = self.db_client.get_vnfrs(nsr_id)
79 for vnfr in vnfrs:
80 log.debug("Processing vnfr: %s", vnfr)
81 vnfd = self.db_client.get_vnfd(vnfr["vnfd-id"])
82 # TODO: Change for multiple DF support
83 df = vnfd.get("df", [{}])[0]
84 if "scaling-aspect" not in df:
85 log.debug("No scaling aspect present in vnfd")
86 continue
87 # TODO: Change for multiple instantiation levels support
88 scaling_aspects = df["scaling-aspect"]
89 all_vnfd_monitoring_params = (
90 self._get_all_vnfd_monitoring_params(vnfd)
91 )
92 for scaling_aspect in scaling_aspects:
93 scaling_group_record = self._get_or_create_scaling_group(
94 nsr_id, vnfr["member-vnf-index-ref"], scaling_aspect
95 )
96 vdurs = self._get_monitored_vdurs(
97 scaling_aspect, vnfr["vdur"]
98 )
99 for scaling_policy in scaling_aspect.get(
100 "scaling-policy", ()
101 ):
102 if scaling_policy["scaling-type"] != "automatic":
103 continue
104 scaling_policy_record = (
105 self._get_or_create_scaling_policy(
106 nsr_id, scaling_policy, scaling_group_record
107 )
108 )
109
110 for scaling_criteria in scaling_policy[
111 "scaling-criteria"
112 ]:
113 scaling_criteria_record = (
114 self._get_or_create_scaling_criteria(
115 nsr_id,
116 scaling_criteria,
117 scaling_policy_record,
118 )
119 )
120 monitoring_param_ref = scaling_criteria.get(
121 "vnf-monitoring-param-ref"
122 )
123 vnf_monitoring_param = all_vnfd_monitoring_params[
124 monitoring_param_ref
125 ]
126
127 for vdur in vdurs:
128 vdu_id = vdur["vdu-id-ref"]
129 log.debug("Creating alarm for vdur %s ", vdur)
130 try:
131 ScalingAlarmRepository.get(
132 ScalingAlarm.vdu_name == vdur["name"],
133 ScalingCriteria.name
134 == scaling_criteria["name"],
135 ScalingPolicy.name
136 == scaling_policy["name"],
137 ScalingGroup.nsr_id == nsr_id,
138 join_classes=[
139 ScalingCriteria,
140 ScalingPolicy,
141 ScalingGroup,
142 ],
143 )
144 log.debug(
145 "vdu %s already has an alarm configured",
146 vdur["name"],
147 )
148 continue
149 except ScalingAlarm.DoesNotExist:
150 pass
151 metric_name = self._get_metric_name(
152 vnf_monitoring_param
153 )
154
155 db_nsr = self.db_client.get_nsr(nsr_id)
156 nb_scale_op = 0
157 if db_nsr["_admin"].get("scaling-group"):
158 db_nsr_admin = db_nsr["_admin"][
159 "scaling-group"
160 ]
161 for (
162 admin_scale_index,
163 admin_scale_info,
164 ) in enumerate(db_nsr_admin):
165 if (
166 admin_scale_info["name"]
167 == scaling_aspect["name"]
168 ):
169 nb_scale_op = admin_scale_info.get(
170 "nb-scale-op", 0
171 )
172 break
173 min_instance_count = 1
174 for vdu_profile in df.get("vdu-profile", ()):
175 if vdu_profile.get("id") == vdu_id:
176 min_instance_count = int(
177 vdu_profile.get(
178 "min-number-of-instances ", 1
179 )
180 )
181 break
182 if nb_scale_op >= min_instance_count:
183 alarm_uuid = (
184 await self.mon_client.create_alarm(
185 metric_name=metric_name,
186 ns_id=nsr_id,
187 vdu_name=vdur["name"],
188 vnf_member_index=vnfr[
189 "member-vnf-index-ref"
190 ],
191 threshold=scaling_criteria[
192 "scale-in-threshold"
193 ],
194 operation=scaling_criteria[
195 "scale-in-relational-operation"
196 ],
197 action="scale_in",
198 )
199 )
200 alarm = ScalingAlarmRepository.create(
201 alarm_uuid=alarm_uuid,
202 action="scale_in",
203 vnf_member_index=vnfr[
204 "member-vnf-index-ref"
205 ],
206 vdu_name=vdur["name"],
207 scaling_criteria=scaling_criteria_record,
208 )
209 alarms_created.append(alarm)
210 alarm_uuid = await self.mon_client.create_alarm(
211 metric_name=metric_name,
212 ns_id=nsr_id,
213 vdu_name=vdur["name"],
214 vnf_member_index=vnfr[
215 "member-vnf-index-ref"
216 ],
217 threshold=scaling_criteria[
218 "scale-out-threshold"
219 ],
220 operation=scaling_criteria[
221 "scale-out-relational-operation"
222 ],
223 action="scale_out",
224 )
225 alarm = ScalingAlarmRepository.create(
226 alarm_uuid=alarm_uuid,
227 action="scale_out",
228 vnf_member_index=vnfr[
229 "member-vnf-index-ref"
230 ],
231 vdu_name=vdur["name"],
232 scaling_criteria=scaling_criteria_record,
233 )
234 alarms_created.append(alarm)
235
236 except Exception as e:
237 log.exception("Error configuring scaling groups:")
238 tx.rollback()
239 if len(alarms_created) > 0:
240 log.info("Cleaning alarm resources in MON")
241 for alarm in alarms_created:
242 await self.mon_client.delete_alarm(
243 alarm.scaling_criteria.scaling_policy.scaling_group.nsr_id,
244 alarm.vnf_member_index,
245 alarm.vdu_name,
246 alarm.alarm_uuid,
247 )
248 raise e
249 finally:
250 database.db.close()
251
252 async def delete_scaling_groups(self, nsr_id: str, vnf_member_index=None):
253 log.debug("Deleting scaling groups for network service %s", nsr_id)
254 database.db.connect()
255 try:
256 with database.db.atomic() as tx:
257 try:
258 if vnf_member_index is None:
259 scale_conditions = ScalingGroup.nsr_id == nsr_id
260 else:
261 query_list = [
262 ScalingGroup.nsr_id == nsr_id,
263 ScalingGroup.vnf_member_index == vnf_member_index,
264 ]
265 scale_conditions = functools.reduce(operator.and_, query_list)
266 for scaling_group in ScalingGroupRepository.list(scale_conditions):
267 for scaling_policy in scaling_group.scaling_policies:
268 for scaling_criteria in scaling_policy.scaling_criterias:
269 for alarm in scaling_criteria.scaling_alarms:
270 try:
271 await self.mon_client.delete_alarm(
272 alarm.scaling_criteria.scaling_policy.scaling_group.nsr_id,
273 alarm.vnf_member_index,
274 alarm.vdu_name,
275 alarm.alarm_uuid,
276 )
277 except ValueError:
278 log.exception(
279 "Error deleting alarm in MON %s",
280 alarm.alarm_uuid,
281 )
282 alarm.delete_instance()
283 scaling_criteria.delete_instance()
284 scaling_policy.delete_instance()
285 scaling_group.delete_instance()
286
287 except Exception as e:
288 log.exception("Error deleting scaling groups and alarms:")
289 tx.rollback()
290 raise e
291 finally:
292 database.db.close()
293
294 async def delete_orphaned_alarms(self, nsr_id):
295 log.info("Deleting orphaned scaling alarms for network service %s", nsr_id)
296 database.db.connect()
297 try:
298 with database.db.atomic() as tx:
299 try:
300 for scaling_group in ScalingGroupRepository.list(
301 ScalingGroup.nsr_id == nsr_id
302 ):
303 for scaling_policy in scaling_group.scaling_policies:
304 for scaling_criteria in scaling_policy.scaling_criterias:
305 for alarm in scaling_criteria.scaling_alarms:
306 try:
307 self.db_client.get_vdur(
308 nsr_id,
309 alarm.vnf_member_index,
310 alarm.vdu_name,
311 )
312 except VdurNotFound:
313 log.debug(
314 "Deleting orphaned scaling alarm %s",
315 alarm.alarm_uuid,
316 )
317 try:
318 await self.mon_client.delete_alarm(
319 alarm.scaling_criteria.scaling_policy.scaling_group.nsr_id,
320 alarm.vnf_member_index,
321 alarm.vdu_name,
322 alarm.alarm_uuid,
323 )
324 except ValueError:
325 log.exception(
326 "Error deleting alarm in MON %s",
327 alarm.alarm_uuid,
328 )
329 alarm.delete_instance()
330
331 except Exception as e:
332 log.exception("Error deleting orphaned alarms:")
333 tx.rollback()
334 raise e
335 finally:
336 database.db.close()
337
338 async def handle_alarm(self, alarm_uuid: str, status: str):
339 await self.update_alarm_status(alarm_uuid, status)
340 await self.evaluate_policy(alarm_uuid)
341
342 async def update_alarm_status(self, alarm_uuid: str, status: str):
343 database.db.connect()
344 try:
345 with database.db.atomic():
346 alarm = ScalingAlarmRepository.get(
347 ScalingAlarm.alarm_uuid == alarm_uuid
348 )
349 alarm.last_status = status
350 alarm.save()
351 except ScalingAlarm.DoesNotExist:
352 log.debug(
353 "There is no autoscaling action configured for alarm %s.", alarm_uuid
354 )
355 finally:
356 database.db.close()
357
358 async def evaluate_policy(self, alarm_uuid):
359 database.db.connect()
360 try:
361 if self.conf.get("autoscale", "enabled") == "True":
362 with database.db.atomic():
363 alarm = ScalingAlarmRepository.get(
364 ScalingAlarm.alarm_uuid == alarm_uuid
365 )
366 vnf_member_index = alarm.vnf_member_index
367 action = alarm.action
368 scaling_policy = alarm.scaling_criteria.scaling_policy
369 if not scaling_policy.enabled:
370 return
371 if action == "scale_in":
372 operation = scaling_policy.scale_in_operation
373 elif action == "scale_out":
374 operation = scaling_policy.scale_out_operation
375 else:
376 raise Exception("Unknown alarm action {}".format(alarm.action))
377 alarms = ScalingAlarmRepository.list(
378 ScalingAlarm.scaling_criteria == alarm.scaling_criteria,
379 ScalingAlarm.action == alarm.action,
380 ScalingAlarm.vnf_member_index == vnf_member_index,
381 ScalingAlarm.vdu_name == alarm.vdu_name,
382 )
383 statuses = []
384 for alarm in alarms:
385 statuses.append(alarm.last_status)
386 if (operation == "AND" and set(statuses) == {"alarm"}) or (
387 operation == "OR" and "alarm" in statuses
388 ):
389 delta = datetime.datetime.now() - scaling_policy.last_scale
390 if delta.total_seconds() > scaling_policy.cooldown_time:
391 log.info(
392 "Sending %s action message for ns: %s",
393 alarm.action,
394 scaling_policy.scaling_group.nsr_id,
395 )
396 await self.lcm_client.scale(
397 scaling_policy.scaling_group.nsr_id,
398 scaling_policy.scaling_group.name,
399 vnf_member_index,
400 action,
401 )
402 scaling_policy.last_scale = datetime.datetime.now()
403 scaling_policy.save()
404
405 except ScalingAlarm.DoesNotExist:
406 log.debug(
407 "There is no autoscaling action configured for alarm %s.", alarm_uuid
408 )
409 finally:
410 database.db.close()
411
412 def _get_all_vnfd_monitoring_params(self, vnfd):
413 all_monitoring_params = {}
414 for ivld in vnfd.get("int-virtual-link-desc", ()):
415 for mp in ivld.get("monitoring-parameters", ()):
416 all_monitoring_params[mp.get("id")] = mp
417
418 for vdu in vnfd.get("vdu", ()):
419 for mp in vdu.get("monitoring-parameter", ()):
420 all_monitoring_params[mp.get("id")] = mp
421
422 for df in vnfd.get("df", ()):
423 for mp in df.get("monitoring-parameter", ()):
424 all_monitoring_params[mp.get("id")] = mp
425
426 return all_monitoring_params
427
428 def _get_or_create_scaling_group(
429 self, nsr_id: str, vnf_member_index: str, scaling_aspect: dict
430 ):
431 try:
432 scaling_group_record = ScalingGroupRepository.get(
433 ScalingGroup.nsr_id == nsr_id,
434 ScalingGroup.vnf_member_index == vnf_member_index,
435 ScalingGroup.name == scaling_aspect["name"],
436 )
437 log.debug("Found existing scaling group record in DB...")
438 except ScalingGroup.DoesNotExist:
439 log.debug("Creating scaling group record in DB...")
440 scaling_group_record = ScalingGroupRepository.create(
441 nsr_id=nsr_id,
442 vnf_member_index=vnf_member_index,
443 name=scaling_aspect["name"],
444 content=json.dumps(scaling_aspect),
445 )
446 log.debug(
447 "Created scaling group record in DB : nsr_id=%s, vnf_member_index=%s, name=%s",
448 scaling_group_record.nsr_id,
449 scaling_group_record.vnf_member_index,
450 scaling_group_record.name,
451 )
452 return scaling_group_record
453
454 def _get_or_create_scaling_policy(
455 self, nsr_id: str, scaling_policy: dict, scaling_group_record: ScalingGroup
456 ):
457 try:
458 scaling_policy_record = ScalingPolicyRepository.get(
459 ScalingPolicy.name == scaling_policy["name"],
460 ScalingGroup.id == scaling_group_record.id,
461 join_classes=[ScalingGroup],
462 )
463 log.debug("Found existing scaling policy record in DB...")
464 except ScalingPolicy.DoesNotExist:
465 log.debug("Creating scaling policy record in DB...")
466 scaling_policy_record = ScalingPolicyRepository.create(
467 nsr_id=nsr_id,
468 name=scaling_policy["name"],
469 cooldown_time=scaling_policy["cooldown-time"],
470 scaling_group=scaling_group_record,
471 )
472 if "scale-in-operation-type" in scaling_policy:
473 scaling_policy_record.scale_in_operation = scaling_policy[
474 "scale-in-operation-type"
475 ]
476 if "scale-out-operation-type" in scaling_policy:
477 scaling_policy_record.scale_out_operation = scaling_policy[
478 "scale-out-operation-type"
479 ]
480 if "enabled" in scaling_policy:
481 scaling_policy_record.enabled = scaling_policy["enabled"]
482 scaling_policy_record.save()
483 log.debug(
484 "Created scaling policy record in DB : name=%s, scaling_group.name=%s",
485 scaling_policy_record.name,
486 scaling_policy_record.scaling_group.name,
487 )
488 return scaling_policy_record
489
490 def _get_or_create_scaling_criteria(
491 self, nsr_id: str, scaling_criteria: dict, scaling_policy_record: ScalingPolicy
492 ):
493 try:
494 scaling_criteria_record = ScalingCriteriaRepository.get(
495 ScalingPolicy.id == scaling_policy_record.id,
496 ScalingCriteria.name == scaling_criteria["name"],
497 join_classes=[ScalingPolicy],
498 )
499 log.debug("Found existing scaling criteria record in DB...")
500 except ScalingCriteria.DoesNotExist:
501 log.debug("Creating scaling criteria record in DB...")
502 scaling_criteria_record = ScalingCriteriaRepository.create(
503 nsr_id=nsr_id,
504 name=scaling_criteria["name"],
505 scaling_policy=scaling_policy_record,
506 )
507 log.debug(
508 "Created scaling criteria record in DB : name=%s, scaling_policy.name=%s",
509 scaling_criteria_record.name,
510 scaling_criteria_record.scaling_policy.name,
511 )
512 return scaling_criteria_record
513
514 def _get_monitored_vdurs(self, scaling_aspect: dict, vdurs):
515 all_monitored_vdus = set()
516 for delta in scaling_aspect.get("aspect-delta-details", {}).get("deltas", ()):
517 for vdu_delta in delta.get("vdu-delta", ()):
518 all_monitored_vdus.add(vdu_delta.get("id"))
519
520 monitored_vdurs = list(
521 filter(lambda vdur: vdur["vdu-id-ref"] in all_monitored_vdus, vdurs)
522 )
523
524 if not monitored_vdurs:
525 log.warning(
526 "Scaling criteria is referring to a vnf-monitoring-param that does not "
527 "contain a reference to a vdu or vnf metric."
528 )
529 return monitored_vdurs
530
531 def _get_metric_name(self, vnf_monitoring_param: dict):
532 if "performance-metric" in vnf_monitoring_param:
533 return vnf_monitoring_param["performance-metric"]
534 raise ValueError(
535 "No metric name found for vnf_monitoring_param %s"
536 % vnf_monitoring_param["id"]
537 )