Feature 10918: Alarm Notification Enhancement
[osm/POL.git] / osm_policy_module / alarming / service.py
1 # -*- coding: utf-8 -*-
2 # pylint: disable=no-member
3
4 # Copyright 2018 Whitestack, LLC
5 # *************************************************************
6
7 # This file is part of OSM Monitoring module
8 # All Rights Reserved to Whitestack, LLC
9
10 # Licensed under the Apache License, Version 2.0 (the "License"); you may
11 # not use this file except in compliance with the License. You may obtain
12 # a copy of the License at
13
14 # http://www.apache.org/licenses/LICENSE-2.0
15
16 # Unless required by applicable law or agreed to in writing, software
17 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
18 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
19 # License for the specific language governing permissions and limitations
20 # under the License.
21
22 # For those usages not covered by the Apache License, Version 2.0 please
23 # contact: bdiaz@whitestack.com or glavado@whitestack.com
24 ##
25 import asyncio
26 import json
27 import logging
28
29 import requests
30 from requests.exceptions import ConnectionError, RequestException
31
32 from osm_policy_module.common.common_db_client import CommonDbClient
33 from osm_policy_module.common.lcm_client import LcmClient
34 from osm_policy_module.common.mon_client import MonClient
35 from osm_policy_module.core import database
36 from osm_policy_module.core.config import Config
37 from osm_policy_module.core.database import (
38 VnfAlarm,
39 VnfAlarmRepository,
40 AlarmActionRepository,
41 )
42 from osm_policy_module.core.exceptions import VdurNotFound
43
44 log = logging.getLogger(__name__)
45
46
47 class AlarmingService:
48 def __init__(self, config: Config, loop=None):
49 self.conf = config
50 if not loop:
51 loop = asyncio.get_event_loop()
52 self.loop = loop
53 self.db_client = CommonDbClient(config)
54 self.mon_client = MonClient(config, loop=self.loop)
55 self.lcm_client = LcmClient(config, loop=self.loop)
56
57 async def configure_vnf_alarms(self, nsr_id: str):
58 log.info("Configuring vnf alarms for network service %s", nsr_id)
59 alarms_created = []
60 database.db.connect()
61 try:
62 with database.db.atomic():
63 vnfrs = self.db_client.get_vnfrs(nsr_id)
64 for vnfr in vnfrs:
65 log.debug("Processing vnfr: %s", vnfr)
66 vnfd = self.db_client.get_vnfd(vnfr["vnfd-id"])
67 for vdur in vnfr["vdur"]:
68 vdu = next(
69 filter(
70 lambda vdu: vdu["id"] == vdur["vdu-id-ref"], vnfd["vdu"]
71 )
72 )
73 if "alarm" in vdu:
74 alarm_descriptors = vdu["alarm"]
75 for alarm_descriptor in alarm_descriptors:
76 try:
77 VnfAlarmRepository.get(
78 VnfAlarm.alarm_id
79 == alarm_descriptor["alarm-id"],
80 VnfAlarm.vnf_member_index
81 == vnfr["member-vnf-index-ref"],
82 VnfAlarm.vdu_name == vdur["name"],
83 VnfAlarm.nsr_id == nsr_id,
84 )
85 log.debug(
86 "vdu %s already has an alarm configured with same id %s",
87 vdur["name"],
88 alarm_descriptor["alarm-id"],
89 )
90 continue
91 except VnfAlarm.DoesNotExist:
92 pass
93 vnf_monitoring_param = next(
94 filter(
95 lambda param: param["id"]
96 == alarm_descriptor["vnf-monitoring-param-ref"],
97 vdu.get("monitoring-parameter", []),
98 ),
99 {},
100 )
101 metric_name = self._get_metric_name(
102 vnf_monitoring_param
103 )
104 alarm_action = dict()
105 for action_type in ["ok", "insufficient-data", "alarm"]:
106 if "actions" in alarm_descriptor and action_type in alarm_descriptor["actions"]:
107 for url in alarm_descriptor["actions"][action_type]:
108 if "webhook" in alarm_action:
109 alarm_action["webhook"].append(url["url"])
110 else:
111 alarm_action["webhook"] = [url["url"]]
112 alarm_uuid = await self.mon_client.create_alarm(
113 metric_name=metric_name,
114 ns_id=nsr_id,
115 vdu_name=vdur["name"],
116 vnf_member_index=vnfr["member-vnf-index-ref"],
117 threshold=alarm_descriptor["value"],
118 operation=alarm_descriptor["operation"],
119 action=str(alarm_action),
120 )
121 alarm = VnfAlarmRepository.create(
122 alarm_id=alarm_descriptor["alarm-id"],
123 alarm_uuid=alarm_uuid,
124 nsr_id=nsr_id,
125 vnf_member_index=vnfr["member-vnf-index-ref"],
126 vdu_name=vdur["name"],
127 last_action='insufficient-data',
128 id_suffix=0,
129 ok_ack=False,
130 alarm_ack=False
131 )
132 for action_type in ["ok", "insufficient-data", "alarm"]:
133 if (
134 "actions" in alarm_descriptor
135 and action_type in alarm_descriptor["actions"]
136 ):
137 for url in alarm_descriptor["actions"][
138 action_type
139 ]:
140 AlarmActionRepository.create(
141 type=action_type,
142 url=url["url"],
143 alarm=alarm,
144 )
145 alarms_created.append(alarm)
146
147 except Exception as e:
148 log.exception("Error configuring VNF alarms:")
149 if len(alarms_created) > 0:
150 log.debug("Cleaning alarm resources in MON")
151 for alarm in alarms_created:
152 try:
153 await self.mon_client.delete_alarm(
154 alarm.nsr_id,
155 alarm.vnf_member_index,
156 alarm.vdu_name,
157 alarm.alarm_uuid,
158 )
159 except ValueError:
160 log.exception(
161 "Error deleting alarm in MON %s", alarm.alarm_uuid
162 )
163 raise e
164 finally:
165 database.db.close()
166
167 async def delete_orphaned_alarms(self, nsr_id):
168 # TODO: Review as it seems this code is never called
169 log.info("Deleting orphaned vnf alarms for network service %s", nsr_id)
170 database.db.connect()
171 try:
172 with database.db.atomic():
173 for alarm in VnfAlarmRepository.list(VnfAlarm.nsr_id == nsr_id):
174 try:
175 self.db_client.get_vdur(
176 nsr_id, alarm.vnf_member_index, alarm.vdu_name
177 )
178 except VdurNotFound:
179 log.debug("Deleting orphaned alarm %s", alarm.alarm_uuid)
180 try:
181 await self.mon_client.delete_alarm(
182 alarm.nsr_id,
183 alarm.vnf_member_index,
184 alarm.vdu_name,
185 alarm.alarm_uuid,
186 )
187 except ValueError:
188 log.exception(
189 "Error deleting alarm in MON %s", alarm.alarm_uuid
190 )
191 alarm.delete_instance()
192 except Exception as e:
193 log.exception("Error deleting orphaned alarms:")
194 raise e
195 finally:
196 database.db.close()
197
198 async def delete_vnf_alarms(self, nsr_id):
199 log.info("Deleting vnf alarms for network service %s", nsr_id)
200 database.db.connect()
201 try:
202 with database.db.atomic():
203 for alarm in VnfAlarmRepository.list(VnfAlarm.nsr_id == nsr_id):
204 log.debug("Deleting vnf alarm %s", alarm.alarm_uuid)
205 try:
206 await self.mon_client.delete_alarm(
207 alarm.nsr_id,
208 alarm.vnf_member_index,
209 alarm.vdu_name,
210 alarm.alarm_uuid,
211 )
212 except ValueError:
213 log.exception(
214 "Error deleting alarm in MON %s", alarm.alarm_uuid
215 )
216 alarm.delete_instance()
217
218 except Exception as e:
219 log.exception("Error deleting vnf alarms:")
220 raise e
221 finally:
222 database.db.close()
223
224 async def handle_alarm(self, alarm_uuid: str, status: str, payload: dict):
225 alert_timeout = int(self.conf.get('alert', 'timeout'))
226 database.db.connect()
227 try:
228 with database.db.atomic():
229 alarm = VnfAlarmRepository.get(VnfAlarm.alarm_uuid == alarm_uuid)
230 log.debug(
231 "Handling vnf alarm %s with status %s", alarm.alarm_id, status
232 )
233 for action in alarm.actions:
234 """
235 Compares the current status with the last_action status.
236 If both the status are 'alarm', it avoid sending repetitive alarm notification.
237 If both the status are 'ok', it avoid sending repetitive ok notification.
238 """
239 if action.type == status:
240 if bool(self.conf.get('alert', 'enhanced_alarms')):
241 if ((status != "ok" or (status == "ok" and alarm.ok_ack is False)) and
242 (status != "alarm" or (status == "alarm" and alarm.alarm_ack is False))):
243 log.info(
244 "Executing request to url %s for vnf alarm %s with status %s",
245 action.url,
246 alarm.alarm_id,
247 status
248 )
249 try:
250 if status == "alarm" and alarm.last_action == "ok":
251 alarm.id_suffix += 1
252 alarm.ok_ack = False
253 if status == "ok" and alarm.last_action == "alarm":
254 alarm.alarm_ack = False
255 alarm.last_action = status
256 alarm.save()
257 except Exception as e:
258 log.exception(e)
259
260 payload["notify_details"]["alarm_number"] = alarm.id_suffix
261 headers = {"content-type": "application/json"}
262 try:
263 resp = requests.post(url=action.url, data=json.dumps(payload),
264 headers=headers, verify=False, timeout=alert_timeout)
265 log.info("Response %s", resp)
266 if resp.status_code == 200:
267 if status == "ok":
268 alarm.ok_ack = True
269 alarm.save()
270 if status == "alarm":
271 alarm.alarm_ack = True
272 alarm.save()
273 if status == "insufficient-data":
274 alarm.alarm_ack = False
275 alarm.ok_ack = False
276 alarm.save()
277 except RequestException as e:
278 log.info("Error: RequestException while connecting to url %s", action.url)
279 log.debug("RequestException %s", e)
280
281 except ConnectionError:
282 log.exception("Error connecting to url %s", action.url)
283 else:
284 log.info(
285 "Executing request to url %s for vnf alarm %s with status %s",
286 action.url,
287 alarm.alarm_id,
288 status
289 )
290 try:
291 requests.post(url=action.url, json=json.dumps(payload), timeout=alert_timeout)
292 except RequestException as e:
293 log.info("Error: RequestException while connecting to url %s", action.url)
294 log.debug("RequestException %s", e)
295 except ConnectionError:
296 log.exception("Error connecting to url %s", action.url)
297
298 except VnfAlarm.DoesNotExist:
299 log.debug(
300 "There is no alarming action configured for alarm %s.", alarm_uuid
301 )
302 finally:
303 database.db.close()
304
305 def _get_metric_name(self, vnf_monitoring_param: dict):
306 if "performance-metric" in vnf_monitoring_param:
307 return vnf_monitoring_param["performance-metric"]
308 raise ValueError(
309 "No metric name found for vnf_monitoring_param %s"
310 % vnf_monitoring_param["id"]
311 )