Resolved bug 1719 - POL hangs waiting for webhook response
[osm/POL.git] / osm_policy_module / alarming / service.py
1 # -*- coding: utf-8 -*-
2 # pylint: disable=no-member
3
4 # Copyright 2018 Whitestack, LLC
5 # *************************************************************
6
7 # This file is part of OSM Monitoring module
8 # All Rights Reserved to Whitestack, LLC
9
10 # Licensed under the Apache License, Version 2.0 (the "License"); you may
11 # not use this file except in compliance with the License. You may obtain
12 # a copy of the License at
13
14 # http://www.apache.org/licenses/LICENSE-2.0
15
16 # Unless required by applicable law or agreed to in writing, software
17 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
18 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
19 # License for the specific language governing permissions and limitations
20 # under the License.
21
22 # For those usages not covered by the Apache License, Version 2.0 please
23 # contact: bdiaz@whitestack.com or glavado@whitestack.com
24 ##
25 import asyncio
26 import json
27 import logging
28
29 import requests
30 from requests.exceptions import ConnectionError, RequestException
31
32 from osm_policy_module.common.common_db_client import CommonDbClient
33 from osm_policy_module.common.lcm_client import LcmClient
34 from osm_policy_module.common.mon_client import MonClient
35 from osm_policy_module.core import database
36 from osm_policy_module.core.config import Config
37 from osm_policy_module.core.database import (
38 VnfAlarm,
39 VnfAlarmRepository,
40 AlarmActionRepository,
41 )
42 from osm_policy_module.core.exceptions import VdurNotFound
43
44 log = logging.getLogger(__name__)
45
46
47 class AlarmingService:
48 def __init__(self, config: Config, loop=None):
49 self.conf = config
50 if not loop:
51 loop = asyncio.get_event_loop()
52 self.loop = loop
53 self.db_client = CommonDbClient(config)
54 self.mon_client = MonClient(config, loop=self.loop)
55 self.lcm_client = LcmClient(config, loop=self.loop)
56
57 async def configure_vnf_alarms(self, nsr_id: str):
58 log.info("Configuring vnf alarms for network service %s", nsr_id)
59 alarms_created = []
60 database.db.connect()
61 try:
62 with database.db.atomic():
63 vnfrs = self.db_client.get_vnfrs(nsr_id)
64 for vnfr in vnfrs:
65 log.debug("Processing vnfr: %s", vnfr)
66 vnfd = self.db_client.get_vnfd(vnfr["vnfd-id"])
67 for vdur in vnfr["vdur"]:
68 vdu = next(
69 filter(
70 lambda vdu: vdu["id"] == vdur["vdu-id-ref"], vnfd["vdu"]
71 )
72 )
73 if "alarm" in vdu:
74 alarm_descriptors = vdu["alarm"]
75 for alarm_descriptor in alarm_descriptors:
76 try:
77 VnfAlarmRepository.get(
78 VnfAlarm.alarm_id
79 == alarm_descriptor["alarm-id"],
80 VnfAlarm.vnf_member_index
81 == vnfr["member-vnf-index-ref"],
82 VnfAlarm.vdu_name == vdur["name"],
83 VnfAlarm.nsr_id == nsr_id,
84 )
85 log.debug(
86 "vdu %s already has an alarm configured with same id %s",
87 vdur["name"],
88 alarm_descriptor["alarm-id"],
89 )
90 continue
91 except VnfAlarm.DoesNotExist:
92 pass
93 vnf_monitoring_param = next(
94 filter(
95 lambda param: param["id"]
96 == alarm_descriptor["vnf-monitoring-param-ref"],
97 vdu.get("monitoring-parameter", []),
98 ),
99 {},
100 )
101 metric_name = self._get_metric_name(
102 vnf_monitoring_param
103 )
104 alarm_action = dict()
105 for action_type in ["ok", "insufficient-data", "alarm"]:
106 if "actions" in alarm_descriptor and action_type in alarm_descriptor["actions"]:
107 for url in alarm_descriptor["actions"][action_type]:
108 if "webhook" in alarm_action:
109 alarm_action["webhook"].append(url["url"])
110 else:
111 alarm_action["webhook"] = [url["url"]]
112 alarm_uuid = await self.mon_client.create_alarm(
113 metric_name=metric_name,
114 ns_id=nsr_id,
115 vdu_name=vdur["name"],
116 vnf_member_index=vnfr["member-vnf-index-ref"],
117 threshold=alarm_descriptor["value"],
118 operation=alarm_descriptor["operation"],
119 action=str(alarm_action),
120 )
121 alarm = VnfAlarmRepository.create(
122 alarm_id=alarm_descriptor["alarm-id"],
123 alarm_uuid=alarm_uuid,
124 nsr_id=nsr_id,
125 vnf_member_index=vnfr["member-vnf-index-ref"],
126 vdu_name=vdur["name"],
127 )
128 for action_type in ["ok", "insufficient-data", "alarm"]:
129 if (
130 "actions" in alarm_descriptor
131 and action_type in alarm_descriptor["actions"]
132 ):
133 for url in alarm_descriptor["actions"][
134 action_type
135 ]:
136 AlarmActionRepository.create(
137 type=action_type,
138 url=url["url"],
139 alarm=alarm,
140 )
141 alarms_created.append(alarm)
142
143 except Exception as e:
144 log.exception("Error configuring VNF alarms:")
145 if len(alarms_created) > 0:
146 log.debug("Cleaning alarm resources in MON")
147 for alarm in alarms_created:
148 try:
149 await self.mon_client.delete_alarm(
150 alarm.nsr_id,
151 alarm.vnf_member_index,
152 alarm.vdu_name,
153 alarm.alarm_uuid,
154 )
155 except ValueError:
156 log.exception(
157 "Error deleting alarm in MON %s", alarm.alarm_uuid
158 )
159 raise e
160 finally:
161 database.db.close()
162
163 async def delete_orphaned_alarms(self, nsr_id):
164 # TODO: Review as it seems this code is never called
165 log.info("Deleting orphaned vnf alarms for network service %s", nsr_id)
166 database.db.connect()
167 try:
168 with database.db.atomic():
169 for alarm in VnfAlarmRepository.list(VnfAlarm.nsr_id == nsr_id):
170 try:
171 self.db_client.get_vdur(
172 nsr_id, alarm.vnf_member_index, alarm.vdu_name
173 )
174 except VdurNotFound:
175 log.debug("Deleting orphaned alarm %s", alarm.alarm_uuid)
176 try:
177 await self.mon_client.delete_alarm(
178 alarm.nsr_id,
179 alarm.vnf_member_index,
180 alarm.vdu_name,
181 alarm.alarm_uuid,
182 )
183 except ValueError:
184 log.exception(
185 "Error deleting alarm in MON %s", alarm.alarm_uuid
186 )
187 alarm.delete_instance()
188 except Exception as e:
189 log.exception("Error deleting orphaned alarms:")
190 raise e
191 finally:
192 database.db.close()
193
194 async def delete_vnf_alarms(self, nsr_id):
195 log.info("Deleting vnf alarms for network service %s", nsr_id)
196 database.db.connect()
197 try:
198 with database.db.atomic():
199 for alarm in VnfAlarmRepository.list(VnfAlarm.nsr_id == nsr_id):
200 log.debug("Deleting vnf alarm %s", alarm.alarm_uuid)
201 try:
202 await self.mon_client.delete_alarm(
203 alarm.nsr_id,
204 alarm.vnf_member_index,
205 alarm.vdu_name,
206 alarm.alarm_uuid,
207 )
208 except ValueError:
209 log.exception(
210 "Error deleting alarm in MON %s", alarm.alarm_uuid
211 )
212 alarm.delete_instance()
213
214 except Exception as e:
215 log.exception("Error deleting vnf alarms:")
216 raise e
217 finally:
218 database.db.close()
219
220 async def handle_alarm(self, alarm_uuid: str, status: str, payload: dict):
221 alert_timeout = int(self.conf.get('alert', 'timeout'))
222 database.db.connect()
223 try:
224 with database.db.atomic():
225 alarm = VnfAlarmRepository.get(VnfAlarm.alarm_uuid == alarm_uuid)
226 log.debug(
227 "Handling vnf alarm %s with status %s", alarm.alarm_id, status
228 )
229 for action in alarm.actions:
230 if action.type == status:
231 log.info(
232 "Executing request to url %s for vnf alarm %s with status %s",
233 action.url,
234 alarm.alarm_id,
235 status,
236 )
237 try:
238 requests.post(url=action.url, json=json.dumps(payload), timeout=alert_timeout)
239 except RequestException as e:
240 log.info("Error: RequestException while connecting to url %s", action.url)
241 log.debug("RequestException %s", e)
242 except ConnectionError:
243 log.exception("Error connecting to url %s", action.url)
244
245 except VnfAlarm.DoesNotExist:
246 log.debug(
247 "There is no alarming action configured for alarm %s.", alarm_uuid
248 )
249 finally:
250 database.db.close()
251
252 def _get_metric_name(self, vnf_monitoring_param: dict):
253 if "performance-metric" in vnf_monitoring_param:
254 return vnf_monitoring_param["performance-metric"]
255 raise ValueError(
256 "No metric name found for vnf_monitoring_param %s"
257 % vnf_monitoring_param["id"]
258 )