Fixes discrepancies with IM related to monitoring params
[osm/POL.git] / osm_policy_module / core / agent.py
1 # -*- coding: utf-8 -*-
2
3 # Copyright 2018 Whitestack, LLC
4 # *************************************************************
5
6 # This file is part of OSM Monitoring module
7 # All Rights Reserved to Whitestack, LLC
8
9 # Licensed under the Apache License, Version 2.0 (the "License"); you may
10 # not use this file except in compliance with the License. You may obtain
11 # a copy of the License at
12
13 # http://www.apache.org/licenses/LICENSE-2.0
14
15 # Unless required by applicable law or agreed to in writing, software
16 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
17 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
18 # License for the specific language governing permissions and limitations
19 # under the License.
20
21 # For those usages not covered by the Apache License, Version 2.0 please
22 # contact: bdiaz@whitestack.com or glavado@whitestack.com
23 ##
24 import asyncio
25 import datetime
26 import json
27 import logging
28 from json import JSONDecodeError
29
30 import yaml
31 from aiokafka import AIOKafkaConsumer
32
33 from osm_policy_module.common.common_db_client import CommonDbClient
34 from osm_policy_module.common.lcm_client import LcmClient
35 from osm_policy_module.common.mon_client import MonClient
36 from osm_policy_module.core import database
37 from osm_policy_module.core.config import Config
38 from osm_policy_module.core.database import ScalingGroup, ScalingAlarm, ScalingPolicy, ScalingCriteria
39
40 log = logging.getLogger(__name__)
41
42 ALLOWED_KAFKA_KEYS = ['instantiated', 'scaled', 'notify_alarm']
43
44
45 class PolicyModuleAgent:
46 def __init__(self, loop=None):
47 cfg = Config.instance()
48 if not loop:
49 loop = asyncio.get_event_loop()
50 self.loop = loop
51 self.db_client = CommonDbClient()
52 self.mon_client = MonClient(loop=self.loop)
53 self.lcm_client = LcmClient(loop=self.loop)
54 self.kafka_server = '{}:{}'.format(cfg.OSMPOL_MESSAGE_HOST,
55 cfg.OSMPOL_MESSAGE_PORT)
56
57 def run(self):
58 self.loop.run_until_complete(self.start())
59
60 async def start(self):
61 consumer = AIOKafkaConsumer(
62 "ns",
63 "alarm_response",
64 loop=self.loop,
65 bootstrap_servers=self.kafka_server,
66 group_id="pol-consumer",
67 key_deserializer=bytes.decode,
68 value_deserializer=bytes.decode,
69 )
70 await consumer.start()
71 try:
72 async for msg in consumer:
73 await self._process_msg(msg.topic, msg.key, msg.value)
74 finally:
75 await consumer.stop()
76
77 async def _process_msg(self, topic, key, msg):
78 log.debug("_process_msg topic=%s key=%s msg=%s", topic, key, msg)
79 try:
80 if key in ALLOWED_KAFKA_KEYS:
81 try:
82 content = json.loads(msg)
83 except JSONDecodeError:
84 content = yaml.safe_load(msg)
85
86 if key == 'instantiated' or key == 'scaled':
87 await self._handle_instantiated_or_scaled(content)
88
89 if key == 'notify_alarm':
90 await self._handle_alarm_notification(content)
91 else:
92 log.debug("Key %s is not in ALLOWED_KAFKA_KEYS", key)
93 except Exception:
94 log.exception("Error consuming message: ")
95
96 async def _handle_alarm_notification(self, content):
97 log.debug("_handle_alarm_notification: %s", content)
98 alarm_id = content['notify_details']['alarm_uuid']
99 metric_name = content['notify_details']['metric_name']
100 operation = content['notify_details']['operation']
101 threshold = content['notify_details']['threshold_value']
102 vdu_name = content['notify_details']['vdu_name']
103 vnf_member_index = content['notify_details']['vnf_member_index']
104 ns_id = content['notify_details']['ns_id']
105 log.info(
106 "Received alarm notification for alarm %s, \
107 metric %s, \
108 operation %s, \
109 threshold %s, \
110 vdu_name %s, \
111 vnf_member_index %s, \
112 ns_id %s ",
113 alarm_id, metric_name, operation, threshold, vdu_name, vnf_member_index, ns_id)
114 try:
115 alarm = ScalingAlarm.select().where(ScalingAlarm.alarm_id == alarm_id).get()
116 delta = datetime.datetime.now() - alarm.scaling_criteria.scaling_policy.last_scale
117 log.debug("last_scale: %s", alarm.scaling_criteria.scaling_policy.last_scale)
118 log.debug("now: %s", datetime.datetime.now())
119 log.debug("delta: %s", delta)
120 if delta.total_seconds() < alarm.scaling_criteria.scaling_policy.cooldown_time:
121 log.info("Time between last scale and now is less than cooldown time. Skipping.")
122 return
123 log.info("Sending scaling action message for ns: %s", alarm_id)
124 await self.lcm_client.scale(alarm.scaling_criteria.scaling_policy.scaling_group.nsr_id,
125 alarm.scaling_criteria.scaling_policy.scaling_group.name,
126 alarm.vnf_member_index,
127 alarm.action)
128 alarm.scaling_criteria.scaling_policy.last_scale = datetime.datetime.now()
129 alarm.scaling_criteria.scaling_policy.save()
130 except ScalingAlarm.DoesNotExist:
131 log.info("There is no action configured for alarm %s.", alarm_id)
132
133 async def _handle_instantiated_or_scaled(self, content):
134 log.debug("_handle_instantiated_or_scaled: %s", content)
135 nslcmop_id = content['nslcmop_id']
136 nslcmop = self.db_client.get_nslcmop(nslcmop_id)
137 if nslcmop['operationState'] == 'COMPLETED' or nslcmop['operationState'] == 'PARTIALLY_COMPLETED':
138 nsr_id = nslcmop['nsInstanceId']
139 log.info("Configuring scaling groups for network service with nsr_id: %s", nsr_id)
140 await self._configure_scaling_groups(nsr_id)
141 else:
142 log.info(
143 "Network service is not in COMPLETED or PARTIALLY_COMPLETED state. "
144 "Current state is %s. Skipping...",
145 nslcmop['operationState'])
146
147 async def _configure_scaling_groups(self, nsr_id: str):
148 log.debug("_configure_scaling_groups: %s", nsr_id)
149 # TODO: Add support for non-nfvi metrics
150 alarms_created = []
151 with database.db.atomic():
152 try:
153 with database.db.atomic():
154 vnfrs = self.db_client.get_vnfrs(nsr_id)
155 log.info("Found %s vnfrs", len(vnfrs))
156 for vnfr in vnfrs:
157 vnfd = self.db_client.get_vnfd(vnfr['vnfd-id'])
158 log.info("Looking for vnfd %s", vnfr['vnfd-id'])
159 scaling_groups = vnfd['scaling-group-descriptor']
160 vnf_monitoring_params = vnfd['monitoring-param']
161 for scaling_group in scaling_groups:
162 try:
163 scaling_group_record = ScalingGroup.select().where(
164 ScalingGroup.nsr_id == nsr_id,
165 ScalingGroup.vnf_member_index == int(vnfr['member-vnf-index-ref']),
166 ScalingGroup.name == scaling_group['name']
167 ).get()
168 log.info("Found existing scaling group record in DB...")
169 except ScalingGroup.DoesNotExist:
170 log.info("Creating scaling group record in DB...")
171 scaling_group_record = ScalingGroup.create(
172 nsr_id=nsr_id,
173 vnf_member_index=vnfr['member-vnf-index-ref'],
174 name=scaling_group['name'],
175 content=json.dumps(scaling_group)
176 )
177 log.info(
178 "Created scaling group record in DB : nsr_id=%s, vnf_member_index=%s, name=%s",
179 scaling_group_record.nsr_id,
180 scaling_group_record.vnf_member_index,
181 scaling_group_record.name)
182 for scaling_policy in scaling_group['scaling-policy']:
183 if scaling_policy['scaling-type'] != 'automatic':
184 continue
185 try:
186 scaling_policy_record = ScalingPolicy.select().join(ScalingGroup).where(
187 ScalingPolicy.name == scaling_policy['name'],
188 ScalingGroup.id == scaling_group_record.id
189 ).get()
190 log.info("Found existing scaling policy record in DB...")
191 except ScalingPolicy.DoesNotExist:
192 log.info("Creating scaling policy record in DB...")
193 scaling_policy_record = ScalingPolicy.create(
194 nsr_id=nsr_id,
195 name=scaling_policy['name'],
196 cooldown_time=scaling_policy['cooldown-time'],
197 scaling_group=scaling_group_record
198 )
199 log.info("Created scaling policy record in DB : name=%s, scaling_group.name=%s",
200 scaling_policy_record.name,
201 scaling_policy_record.scaling_group.name)
202
203 for scaling_criteria in scaling_policy['scaling-criteria']:
204 try:
205 scaling_criteria_record = ScalingCriteria.select().join(ScalingPolicy).where(
206 ScalingPolicy.id == scaling_policy_record.id,
207 ScalingCriteria.name == scaling_criteria['name']
208 ).get()
209 log.info("Found existing scaling criteria record in DB...")
210 except ScalingCriteria.DoesNotExist:
211 log.info("Creating scaling criteria record in DB...")
212 scaling_criteria_record = ScalingCriteria.create(
213 nsr_id=nsr_id,
214 name=scaling_criteria['name'],
215 scaling_policy=scaling_policy_record
216 )
217 log.info(
218 "Created scaling criteria record in DB : name=%s, scaling_policy.name=%s",
219 scaling_criteria_record.name,
220 scaling_criteria_record.scaling_policy.name)
221
222 for vdu_ref in scaling_group['vdu']:
223 vnf_monitoring_param = next(
224 filter(
225 lambda param: param['id'] == scaling_criteria[
226 'vnf-monitoring-param-ref'
227 ],
228 vnf_monitoring_params)
229 )
230 if vdu_ref['vdu-id-ref'] != vnf_monitoring_param['vdu-monitoring-param'][
231 'vdu-ref'
232 ]:
233 continue
234 vdu = next(
235 filter(lambda vdu: vdu['id'] == vdu_ref['vdu-id-ref'], vnfd['vdu'])
236 )
237 vdu_monitoring_params = vdu['monitoring-param']
238 vdu_monitoring_param = next(
239 filter(
240 lambda param: param['id'] == vnf_monitoring_param[
241 'vdu-monitoring-param'
242 ][
243 'vdu-monitoring-param-ref'
244 ],
245 vdu_monitoring_params))
246 vdurs = list(
247 filter(lambda vdur: vdur['vdu-id-ref'] == vnf_monitoring_param[
248 'vdu-monitoring-param'
249 ][
250 'vdu-ref'
251 ], vnfr['vdur']))
252 for vdur in vdurs:
253 try:
254 (ScalingAlarm.select()
255 .join(ScalingCriteria)
256 .join(ScalingPolicy)
257 .join(ScalingGroup)
258 .where(
259 ScalingAlarm.vdu_name == vdur['name'],
260 ScalingCriteria.name == scaling_criteria['name'],
261 ScalingPolicy.name == scaling_policy['name'],
262 ScalingGroup.nsr_id == nsr_id
263 ).get())
264 log.debug("vdu %s already has an alarm configured", vdur['name'])
265 continue
266 except ScalingAlarm.DoesNotExist:
267 pass
268 alarm_uuid = await self.mon_client.create_alarm(
269 metric_name=vdu_monitoring_param['nfvi-metric'],
270 ns_id=nsr_id,
271 vdu_name=vdur['name'],
272 vnf_member_index=vnfr['member-vnf-index-ref'],
273 threshold=scaling_criteria['scale-in-threshold'],
274 operation=scaling_criteria['scale-in-relational-operation'],
275 statistic=vnf_monitoring_param['aggregation-type']
276 )
277 ScalingAlarm.create(
278 alarm_id=alarm_uuid,
279 action='scale_in',
280 vnf_member_index=int(vnfr['member-vnf-index-ref']),
281 vdu_name=vdur['name'],
282 scaling_criteria=scaling_criteria_record
283 )
284 alarm_uuid = await self.mon_client.create_alarm(
285 metric_name=vdu_monitoring_param['nfvi-metric'],
286 ns_id=nsr_id,
287 vdu_name=vdur['name'],
288 vnf_member_index=vnfr['member-vnf-index-ref'],
289 threshold=scaling_criteria['scale-out-threshold'],
290 operation=scaling_criteria['scale-out-relational-operation'],
291 statistic=vnf_monitoring_param['aggregation-type']
292 )
293 ScalingAlarm.create(
294 alarm_id=alarm_uuid,
295 action='scale_out',
296 vnf_member_index=int(vnfr['member-vnf-index-ref']),
297 vdu_name=vdur['name'],
298 scaling_criteria=scaling_criteria_record
299 )
300
301 except Exception as e:
302 log.exception("Error configuring scaling groups:")
303 if len(alarms_created) > 0:
304 log.info("Cleaning alarm resources in MON")
305 for alarm in alarms_created:
306 await self.mon_client.delete_alarm(*alarm)
307 raise e