Feature 10923: Autohealing
[osm/POL.git] / osm_policy_module / healing / service.py
1 # -*- coding: utf-8 -*-
2 # pylint: disable=no-member
3
4 # Copyright 2018 Whitestack, LLC
5 # *************************************************************
6
7 # This file is part of OSM Monitoring module
8 # All Rights Reserved to Whitestack, LLC
9
10 # Licensed under the Apache License, Version 2.0 (the "License"); you may
11 # not use this file except in compliance with the License. You may obtain
12 # a copy of the License at
13
14 # http://www.apache.org/licenses/LICENSE-2.0
15
16 # Unless required by applicable law or agreed to in writing, software
17 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
18 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
19 # License for the specific language governing permissions and limitations
20 # under the License.
21
22 # For those usages not covered by the Apache License, Version 2.0 please
23 # contact: bdiaz@whitestack.com or glavado@whitestack.com
24 ##
25 import asyncio
26 import logging
27 import datetime
28
29 from osm_policy_module.common.common_db_client import CommonDbClient
30 from osm_policy_module.common.lcm_client import LcmClient
31 from osm_policy_module.common.mon_client import MonClient
32 from osm_policy_module.core import database
33 from osm_policy_module.core.config import Config
34 from osm_policy_module.core.database import (
35 HealingAction,
36 HealingActionRepository,
37 )
38 from osm_policy_module.core.exceptions import VdurNotFound
39
40 log = logging.getLogger(__name__)
41
42
43 class HealingService:
44 def __init__(self, config: Config, loop=None):
45 """
46 Initializing the HealingService
47 """
48 log.info("HealingService Initialized")
49 self.conf = config
50 if not loop:
51 loop = asyncio.get_event_loop()
52 self.loop = loop
53 self.db_client = CommonDbClient(config)
54 self.mon_client = MonClient(config, loop=self.loop)
55 self.lcm_client = LcmClient(config, loop=self.loop)
56 log.info("Constructor created for HealingService")
57
58 async def configure_healing_alarms(self, nsr_id: str):
59 """
60 Configuring the Healing alarms
61 :param nsr_id: Network service record id
62 """
63 log.info("Configuring Healing alarm for NS %s", nsr_id)
64 alarms_created = []
65 database.db.connect()
66 try:
67 with database.db.atomic():
68 vnfrs = self.db_client.get_vnfrs(nsr_id)
69 for vnfr in vnfrs:
70 vnfd = self.db_client.get_vnfd(vnfr['vnfd-id'])
71 df = vnfd.get("df", [{}])[0]
72 if "healing-aspect" not in df:
73 log.info("No healing configuration present in vnfd")
74 continue
75 healing_aspects = df["healing-aspect"]
76 for healing_aspect in healing_aspects:
77 for healing_policy in healing_aspect.get(
78 "healing-policy", ()
79 ):
80 vdu_id = healing_policy['vdu-id']
81 for vdur in vnfr["vdur"]:
82 if vdu_id == vdur["vdu-id-ref"]:
83 try:
84 HealingActionRepository.get(
85 HealingAction.alarm_id == healing_policy['event-name'],
86 HealingAction.vdur_name == vdur['name'],
87 HealingAction.nsr_id == nsr_id,
88 HealingAction.cooldown_time == healing_policy['cooldown-time'],
89 HealingAction.recovery_action == healing_policy['action-on-recovery'],
90 HealingAction.vnfinstance_id == vnfr['id'],
91 HealingAction.vdu_id == healing_policy['vdu-id'],
92 HealingAction.count_index == vdur['count-index']
93 )
94 log.debug("vdu %s already has an alarm configured with same id %s",
95 healing_policy['vdu-id'], healing_policy['event-name'])
96 continue
97 except HealingAction.DoesNotExist:
98 pass
99
100 metric_name = "vm_status"
101 alarm_uuid = await self.mon_client.create_alarm(
102 metric_name=metric_name,
103 ns_id=nsr_id,
104 vdu_name=vdur['name'],
105 vnf_member_index=vnfr[
106 'member-vnf-index-ref'
107 ],
108 threshold=1,
109 operation="LT",
110 statistic="AVERAGE"
111 )
112 alarm = HealingActionRepository.create(
113 alarm_id=healing_policy['event-name'],
114 alarm_uuid=alarm_uuid,
115 nsr_id=nsr_id,
116 vnf_member_index=vnfr[
117 'member-vnf-index-ref'
118 ],
119 vdur_name=vdur['name'],
120 recovery_action=healing_policy['action-on-recovery'],
121 cooldown_time=healing_policy['cooldown-time'],
122 day1=healing_policy['day1'],
123 vdu_id=healing_policy['vdu-id'],
124 vnfinstance_id=vnfr['id'],
125 count_index=vdur['count-index']
126 )
127 alarms_created.append(alarm)
128
129 except Exception as e:
130 log.exception("Error configuring VNF alarms:")
131 if len(alarms_created) > 0:
132 for alarm in alarms_created:
133 try:
134 await self.mon_client.delete_alarm(alarm.nsr_id,
135 alarm.vnf_member_index,
136 alarm.vdu_name,
137 alarm.alarm_uuid)
138 except ValueError:
139 log.exception("Error deleting alarm in MON %s", alarm.alarm_uuid)
140 raise e
141 finally:
142 database.db.close()
143
144 async def delete_orphaned_healing_alarms(self, nsr_id):
145 log.info("Deleting orphaned healing alarms for network service %s", nsr_id)
146 database.db.connect()
147 try:
148 with database.db.atomic():
149 for alarm in HealingActionRepository.list(
150 HealingAction.nsr_id == nsr_id
151 ):
152 try:
153 self.db_client.get_vdur(
154 nsr_id,
155 alarm.vnf_member_index,
156 alarm.vdur_name
157 )
158 except VdurNotFound:
159 log.info(
160 "Deleting orphaned alarm %s",
161 alarm.alarm_uuid
162 )
163 try:
164 await self.mon_client.delete_alarm(
165 alarm.nsr_id,
166 alarm.vnf_member_index,
167 alarm.vdur_name,
168 alarm.alarm_uuid
169 )
170 except ValueError:
171 log.exception(
172 "Error deleting alarm in MON %s",
173 alarm.alarm_uuid
174 )
175 alarm.delete_instance()
176
177 except Exception as e:
178 log.exception("Error deleting orphaned alarms:")
179 raise e
180 finally:
181 database.db.close()
182
183 async def delete_healing_alarms(self, nsr_id):
184 """
185 Deleting the healing alarms
186 :param nsr_id: Network service record id
187 """
188 log.info("Deleting healing vnf alarms for network service %s", nsr_id)
189 database.db.connect()
190 try:
191 with database.db.atomic():
192 for alarm in HealingActionRepository.list(
193 HealingAction.nsr_id == nsr_id
194 ):
195 try:
196 await self.mon_client.delete_alarm(
197 alarm.nsr_id,
198 alarm.vnf_member_index,
199 alarm.vdur_name,
200 alarm.alarm_uuid
201 )
202 except ValueError:
203 log.exception(
204 "Error deleting alarm in MON %s",
205 alarm.alarm_uuid
206 )
207 alarm.delete_instance()
208
209 except Exception as e:
210 log.exception("Error deleting vnf alarms:")
211 raise e
212 finally:
213 database.db.close()
214
215 async def update_alarm_status(self, alarm_uuid: str, status: str):
216 """
217 For updating the alarm status
218 :param alarm_uuid: vdu uuid
219 :param status: Status of an alarm
220 """
221 database.db.connect()
222 try:
223 with database.db.atomic():
224 alarm = HealingActionRepository.get(
225 HealingAction.alarm_uuid == alarm_uuid
226 )
227 alarm.last_status = status
228 alarm.save()
229 except HealingAction.DoesNotExist:
230 log.debug(
231 "There is no healing action configured for alarm %s.", alarm_uuid
232 )
233 finally:
234 database.db.close()
235
236 async def handle_alarm(self, alarm_uuid: str, status: str):
237 """
238 For Handling the healing alarms
239 :param alarm_uuid: vdu uuid
240 :param status: Status of an alarm
241 """
242 await self.update_alarm_status(alarm_uuid, status)
243 database.db.connect()
244 try:
245 if status == "alarm":
246 with database.db.atomic():
247 alarm = HealingActionRepository.get(
248 HealingAction.alarm_uuid == alarm_uuid
249 )
250 vnf_member_index = alarm.vnf_member_index
251 vdur_name = alarm.vdur_name
252 vdu_id = alarm.vdu_id
253 nsr_id = alarm.nsr_id
254 heal_type = alarm.recovery_action
255 cooldown_time = alarm.cooldown_time
256 count_index = alarm.count_index
257 last_heal = alarm.last_heal
258 day1 = alarm.day1
259 vnfinstance_id = alarm.vnfinstance_id
260 alarms = HealingActionRepository.list(
261 HealingAction.vnf_member_index == vnf_member_index,
262 HealingAction.vdur_name == vdur_name
263 )
264 statuses = []
265
266 for alarm in alarms:
267 statuses.append(alarm.last_status)
268 if ((set(statuses) == {'alarm'}) or ('alarm' in statuses)):
269 delta = datetime.datetime.now() - last_heal
270 if delta.total_seconds() > cooldown_time:
271 await self.lcm_client.heal(
272 nsr_id,
273 vnfinstance_id,
274 vdur_name,
275 vdu_id,
276 vnf_member_index,
277 heal_type,
278 day1,
279 count_index
280 )
281 last_heal = datetime.datetime.now()
282 log.info("datetime.datetime.now %s", datetime.datetime.now)
283 alarm.last_heal = last_heal
284 alarm.save()
285
286 except HealingAction.DoesNotExist:
287 log.info(
288 "There is no healing action configured for alarm %s.",
289 alarm_uuid
290 )
291 finally:
292 database.db.close()