Adds deletion of alarms in case exception is thrown during scaling config
[osm/POL.git] / osm_policy_module / core / agent.py
1 # -*- coding: utf-8 -*-
2
3 # Copyright 2018 Whitestack, LLC
4 # *************************************************************
5
6 # This file is part of OSM Monitoring module
7 # All Rights Reserved to Whitestack, LLC
8
9 # Licensed under the Apache License, Version 2.0 (the "License"); you may
10 # not use this file except in compliance with the License. You may obtain
11 # a copy of the License at
12
13 # http://www.apache.org/licenses/LICENSE-2.0
14
15 # Unless required by applicable law or agreed to in writing, software
16 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
17 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
18 # License for the specific language governing permissions and limitations
19 # under the License.
20
21 # For those usages not covered by the Apache License, Version 2.0 please
22 # contact: bdiaz@whitestack.com or glavado@whitestack.com
23 ##
24 import datetime
25 import json
26 import logging
27 import threading
28 from json import JSONDecodeError
29
30 import yaml
31 from kafka import KafkaConsumer
32
33 from osm_policy_module.common.db_client import DbClient
34 from osm_policy_module.common.lcm_client import LcmClient
35 from osm_policy_module.common.mon_client import MonClient
36 from osm_policy_module.core import database
37 from osm_policy_module.core.config import Config
38 from osm_policy_module.core.database import ScalingGroup, ScalingAlarm, ScalingPolicy, ScalingCriteria
39
40 log = logging.getLogger(__name__)
41
42 ALLOWED_KAFKA_KEYS = ['instantiated', 'scaled', 'notify_alarm']
43
44
45 class PolicyModuleAgent:
46 def __init__(self):
47 cfg = Config.instance()
48 self.db_client = DbClient()
49 self.mon_client = MonClient()
50 self.lcm_client = LcmClient()
51 self.kafka_server = '{}:{}'.format(cfg.OSMPOL_MESSAGE_HOST,
52 cfg.OSMPOL_MESSAGE_PORT)
53
54 def run(self):
55 consumer = KafkaConsumer(bootstrap_servers=self.kafka_server,
56 key_deserializer=bytes.decode,
57 value_deserializer=bytes.decode,
58 group_id='pol-consumer')
59 consumer.subscribe(["ns", "alarm_response"])
60
61 for message in consumer:
62 t = threading.Thread(target=self._process_msg, args=(message.topic, message.key, message.value,))
63 t.start()
64
65 def _process_msg(self, topic, key, msg):
66 log.debug("_process_msg topic=%s key=%s msg=%s", topic, key, msg)
67 try:
68 if key in ALLOWED_KAFKA_KEYS:
69 try:
70 content = json.loads(msg)
71 except JSONDecodeError:
72 content = yaml.safe_load(msg)
73
74 if key == 'instantiated' or key == 'scaled':
75 self._handle_instantiated_or_scaled(content)
76
77 if key == 'notify_alarm':
78 self._handle_alarm_notification(content)
79 else:
80 log.debug("Key %s is not in ALLOWED_KAFKA_KEYS", key)
81 except Exception:
82 log.exception("Error consuming message: ")
83
84 def _handle_alarm_notification(self, content):
85 log.debug("_handle_alarm_notification: %s", content)
86 alarm_id = content['notify_details']['alarm_uuid']
87 metric_name = content['notify_details']['metric_name']
88 operation = content['notify_details']['operation']
89 threshold = content['notify_details']['threshold_value']
90 vdu_name = content['notify_details']['vdu_name']
91 vnf_member_index = content['notify_details']['vnf_member_index']
92 ns_id = content['notify_details']['ns_id']
93 log.info(
94 "Received alarm notification for alarm %s, \
95 metric %s, \
96 operation %s, \
97 threshold %s, \
98 vdu_name %s, \
99 vnf_member_index %s, \
100 ns_id %s ",
101 alarm_id, metric_name, operation, threshold, vdu_name, vnf_member_index, ns_id)
102 try:
103 alarm = ScalingAlarm.select().where(ScalingAlarm.alarm_id == alarm_id).get()
104 delta = datetime.datetime.now() - alarm.scaling_criteria.scaling_policy.last_scale
105 log.debug("last_scale: %s", alarm.scaling_criteria.scaling_policy.last_scale)
106 log.debug("now: %s", datetime.datetime.now())
107 log.debug("delta: %s", delta)
108 if delta.total_seconds() < alarm.scaling_criteria.scaling_policy.cooldown_time:
109 log.info("Time between last scale and now is less than cooldown time. Skipping.")
110 return
111 log.info("Sending scaling action message for ns: %s", alarm_id)
112 self.lcm_client.scale(alarm.scaling_criteria.scaling_policy.scaling_group.nsr_id,
113 alarm.scaling_criteria.scaling_policy.scaling_group.name,
114 alarm.vnf_member_index,
115 alarm.action)
116 alarm.scaling_criteria.scaling_policy.last_scale = datetime.datetime.now()
117 alarm.scaling_criteria.scaling_policy.save()
118 except ScalingAlarm.DoesNotExist:
119 log.info("There is no action configured for alarm %s.", alarm_id)
120
121 def _handle_instantiated_or_scaled(self, content):
122 log.debug("_handle_instantiated_or_scaled: %s", content)
123 nslcmop_id = content['nslcmop_id']
124 nslcmop = self.db_client.get_nslcmop(nslcmop_id)
125 if nslcmop['operationState'] == 'COMPLETED' or nslcmop['operationState'] == 'PARTIALLY_COMPLETED':
126 nsr_id = nslcmop['nsInstanceId']
127 log.info("Configuring scaling groups for network service with nsr_id: %s", nsr_id)
128 self._configure_scaling_groups(nsr_id)
129 else:
130 log.info(
131 "Network service is not in COMPLETED or PARTIALLY_COMPLETED state. "
132 "Current state is %s. Skipping...",
133 nslcmop['operationState'])
134
135 def _configure_scaling_groups(self, nsr_id: str):
136 log.debug("_configure_scaling_groups: %s", nsr_id)
137 # TODO: Add support for non-nfvi metrics
138 alarms_created = []
139 with database.db.atomic():
140 try:
141 with database.db.atomic():
142 vnfrs = self.db_client.get_vnfrs(nsr_id)
143 log.info("Found %s vnfrs", len(vnfrs))
144 for vnfr in vnfrs:
145 vnfd = self.db_client.get_vnfd(vnfr['vnfd-id'])
146 log.info("Looking for vnfd %s", vnfr['vnfd-id'])
147 scaling_groups = vnfd['scaling-group-descriptor']
148 vnf_monitoring_params = vnfd['monitoring-param']
149 for scaling_group in scaling_groups:
150 try:
151 scaling_group_record = ScalingGroup.select().where(
152 ScalingGroup.nsr_id == nsr_id,
153 ScalingGroup.vnf_member_index == int(vnfr['member-vnf-index-ref']),
154 ScalingGroup.name == scaling_group['name']
155 ).get()
156 log.info("Found existing scaling group record in DB...")
157 except ScalingGroup.DoesNotExist:
158 log.info("Creating scaling group record in DB...")
159 scaling_group_record = ScalingGroup.create(
160 nsr_id=nsr_id,
161 vnf_member_index=vnfr['member-vnf-index-ref'],
162 name=scaling_group['name'],
163 content=json.dumps(scaling_group)
164 )
165 log.info(
166 "Created scaling group record in DB : nsr_id=%s, vnf_member_index=%s, name=%s",
167 scaling_group_record.nsr_id,
168 scaling_group_record.vnf_member_index,
169 scaling_group_record.name)
170 for scaling_policy in scaling_group['scaling-policy']:
171 if scaling_policy['scaling-type'] != 'automatic':
172 continue
173 try:
174 scaling_policy_record = ScalingPolicy.select().join(ScalingGroup).where(
175 ScalingPolicy.name == scaling_policy['name'],
176 ScalingGroup.id == scaling_group_record.id
177 ).get()
178 log.info("Found existing scaling policy record in DB...")
179 except ScalingPolicy.DoesNotExist:
180 log.info("Creating scaling policy record in DB...")
181 scaling_policy_record = ScalingPolicy.create(
182 nsr_id=nsr_id,
183 name=scaling_policy['name'],
184 cooldown_time=scaling_policy['cooldown-time'],
185 scaling_group=scaling_group_record
186 )
187 log.info("Created scaling policy record in DB : name=%s, scaling_group.name=%s",
188 scaling_policy_record.name,
189 scaling_policy_record.scaling_group.name)
190
191 for scaling_criteria in scaling_policy['scaling-criteria']:
192 try:
193 scaling_criteria_record = ScalingCriteria.select().join(ScalingPolicy).where(
194 ScalingPolicy.id == scaling_policy_record.id,
195 ScalingCriteria.name == scaling_criteria['name']
196 ).get()
197 log.info("Found existing scaling criteria record in DB...")
198 except ScalingCriteria.DoesNotExist:
199 log.info("Creating scaling criteria record in DB...")
200 scaling_criteria_record = ScalingCriteria.create(
201 nsr_id=nsr_id,
202 name=scaling_criteria['name'],
203 scaling_policy=scaling_policy_record
204 )
205 log.info(
206 "Created scaling criteria record in DB : name=%s, scaling_policy.name=%s",
207 scaling_criteria_record.name,
208 scaling_criteria_record.scaling_policy.name)
209
210 for vdu_ref in scaling_group['vdu']:
211 vnf_monitoring_param = next(
212 filter(lambda param: param['id'] == scaling_criteria[
213 'vnf-monitoring-param-ref'], vnf_monitoring_params))
214 if not vdu_ref['vdu-id-ref'] == vnf_monitoring_param['vdu-ref']:
215 continue
216 vdu = next(
217 filter(lambda vdu: vdu['id'] == vdu_ref['vdu-id-ref'], vnfd['vdu'])
218 )
219 vdu_monitoring_params = vdu['monitoring-param']
220 vdu_monitoring_param = next(
221 filter(
222 lambda param: param['id'] == vnf_monitoring_param[
223 'vdu-monitoring-param-ref'],
224 vdu_monitoring_params))
225 vdurs = list(
226 filter(lambda vdur: vdur['vdu-id-ref'] == vnf_monitoring_param['vdu-ref'],
227 vnfr['vdur']))
228 for vdur in vdurs:
229 try:
230 ScalingAlarm.select().join(ScalingCriteria).where(
231 ScalingAlarm.vdu_name == vdur['name'],
232 ScalingCriteria.name == scaling_criteria['name']
233 ).get()
234 log.debug("vdu %s already has an alarm configured", vdur['name'])
235 continue
236 except ScalingAlarm.DoesNotExist:
237 pass
238 alarm_uuid = self.mon_client.create_alarm(
239 metric_name=vdu_monitoring_param['nfvi-metric'],
240 ns_id=nsr_id,
241 vdu_name=vdur['name'],
242 vnf_member_index=vnfr['member-vnf-index-ref'],
243 threshold=scaling_criteria['scale-in-threshold'],
244 operation=scaling_criteria['scale-in-relational-operation'],
245 statistic=vnf_monitoring_param['aggregation-type']
246 )
247 ScalingAlarm.create(
248 alarm_id=alarm_uuid,
249 action='scale_in',
250 vnf_member_index=int(vnfr['member-vnf-index-ref']),
251 vdu_name=vdur['name'],
252 scaling_criteria=scaling_criteria_record
253 )
254 alarm_uuid = self.mon_client.create_alarm(
255 metric_name=vdu_monitoring_param['nfvi-metric'],
256 ns_id=nsr_id,
257 vdu_name=vdur['name'],
258 vnf_member_index=vnfr['member-vnf-index-ref'],
259 threshold=scaling_criteria['scale-out-threshold'],
260 operation=scaling_criteria['scale-out-relational-operation'],
261 statistic=vnf_monitoring_param['aggregation-type']
262 )
263 ScalingAlarm.create(
264 alarm_id=alarm_uuid,
265 action='scale_out',
266 vnf_member_index=int(vnfr['member-vnf-index-ref']),
267 vdu_name=vdur['name'],
268 scaling_criteria=scaling_criteria_record
269 )
270
271 except Exception as e:
272 log.exception("Error configuring scaling groups:")
273 if len(alarms_created) > 0:
274 log.info("Cleaning alarm resources in MON")
275 for alarm in alarms_created:
276 self.mon_client.delete_alarm(*alarm)
277 raise e