1 |
|
# -*- coding: utf-8 -*- |
2 |
|
# pylint: disable=no-member |
3 |
|
|
4 |
|
# Copyright 2018 Whitestack, LLC |
5 |
|
# ************************************************************* |
6 |
|
|
7 |
|
# This file is part of OSM Monitoring module |
8 |
|
# All Rights Reserved to Whitestack, LLC |
9 |
|
|
10 |
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may |
11 |
|
# not use this file except in compliance with the License. You may obtain |
12 |
|
# a copy of the License at |
13 |
|
|
14 |
|
# http://www.apache.org/licenses/LICENSE-2.0 |
15 |
|
|
16 |
|
# Unless required by applicable law or agreed to in writing, software |
17 |
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
18 |
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
19 |
|
# License for the specific language governing permissions and limitations |
20 |
|
# under the License. |
21 |
|
|
22 |
|
# For those usages not covered by the Apache License, Version 2.0 please |
23 |
|
# contact: bdiaz@whitestack.com or glavado@whitestack.com |
24 |
|
## |
25 |
1 |
import logging |
26 |
1 |
import datetime |
27 |
|
|
28 |
1 |
from osm_policy_module.common.common_db_client import CommonDbClient |
29 |
1 |
from osm_policy_module.common.lcm_client import LcmClient |
30 |
1 |
from osm_policy_module.common.mon_client import MonClient |
31 |
1 |
from osm_policy_module.core import database |
32 |
1 |
from osm_policy_module.core.config import Config |
33 |
1 |
from osm_policy_module.core.database import ( |
34 |
|
HealingAction, |
35 |
|
HealingActionRepository, |
36 |
|
) |
37 |
1 |
from osm_policy_module.core.exceptions import VdurNotFound |
38 |
|
|
39 |
1 |
log = logging.getLogger(__name__) |
40 |
|
|
41 |
|
|
42 |
1 |
class HealingService: |
43 |
1 |
def __init__(self, config: Config): |
44 |
|
""" |
45 |
|
Initializing the HealingService |
46 |
|
""" |
47 |
1 |
log.info("HealingService Initialized") |
48 |
1 |
self.conf = config |
49 |
1 |
self.db_client = CommonDbClient(config) |
50 |
1 |
self.mon_client = MonClient(config) |
51 |
1 |
self.lcm_client = LcmClient(config) |
52 |
1 |
log.info("Constructor created for HealingService") |
53 |
|
|
54 |
1 |
async def configure_healing_alarms(self, nsr_id: str): |
55 |
|
""" |
56 |
|
Configuring the Healing alarms |
57 |
|
:param nsr_id: Network service record id |
58 |
|
""" |
59 |
1 |
log.info("Configuring Healing alarm for NS %s", nsr_id) |
60 |
1 |
alarms_created = [] |
61 |
1 |
database.db.connect() |
62 |
1 |
try: |
63 |
1 |
with database.db.atomic(): |
64 |
1 |
vnfrs = self.db_client.get_vnfrs(nsr_id) |
65 |
1 |
for vnfr in vnfrs: |
66 |
1 |
vnfd = self.db_client.get_vnfd(vnfr["vnfd-id"]) |
67 |
1 |
df = vnfd.get("df", [{}])[0] |
68 |
1 |
if "healing-aspect" not in df: |
69 |
0 |
log.info("No healing configuration present in vnfd") |
70 |
0 |
continue |
71 |
1 |
healing_aspects = df["healing-aspect"] |
72 |
1 |
for healing_aspect in healing_aspects: |
73 |
1 |
for healing_policy in healing_aspect.get("healing-policy", ()): |
74 |
1 |
vdu_id = healing_policy["vdu-id"] |
75 |
1 |
for vdur in vnfr["vdur"]: |
76 |
1 |
if vdu_id == vdur["vdu-id-ref"]: |
77 |
1 |
try: |
78 |
1 |
HealingActionRepository.get( |
79 |
|
HealingAction.alarm_id |
80 |
|
== healing_policy["event-name"], |
81 |
|
HealingAction.vdur_name == vdur["name"], |
82 |
|
HealingAction.nsr_id == nsr_id, |
83 |
|
HealingAction.cooldown_time |
84 |
|
== healing_policy["cooldown-time"], |
85 |
|
HealingAction.recovery_action |
86 |
|
== healing_policy["action-on-recovery"], |
87 |
|
HealingAction.vnfinstance_id == vnfr["id"], |
88 |
|
HealingAction.vdu_id |
89 |
|
== healing_policy["vdu-id"], |
90 |
|
HealingAction.count_index |
91 |
|
== vdur["count-index"], |
92 |
|
) |
93 |
0 |
log.debug( |
94 |
|
"vdu %s already has an alarm configured with same id %s", |
95 |
|
healing_policy["vdu-id"], |
96 |
|
healing_policy["event-name"], |
97 |
|
) |
98 |
0 |
continue |
99 |
1 |
except HealingAction.DoesNotExist: |
100 |
1 |
pass |
101 |
|
|
102 |
1 |
metric_name = "vm_status" |
103 |
1 |
alarm_uuid = await self.mon_client.create_alarm( |
104 |
|
metric_name=metric_name, |
105 |
|
ns_id=nsr_id, |
106 |
|
vdu_name=vdur["name"], |
107 |
|
vnf_member_index=vnfr["member-vnf-index-ref"], |
108 |
|
threshold=1, |
109 |
|
operation="LT", |
110 |
|
statistic="AVERAGE", |
111 |
|
) |
112 |
1 |
alarm = HealingActionRepository.create( |
113 |
|
alarm_id=healing_policy["event-name"], |
114 |
|
alarm_uuid=alarm_uuid, |
115 |
|
nsr_id=nsr_id, |
116 |
|
vnf_member_index=vnfr["member-vnf-index-ref"], |
117 |
|
vdur_name=vdur["name"], |
118 |
|
recovery_action=healing_policy[ |
119 |
|
"action-on-recovery" |
120 |
|
], |
121 |
|
cooldown_time=healing_policy["cooldown-time"], |
122 |
|
day1=healing_policy["day1"], |
123 |
|
vdu_id=healing_policy["vdu-id"], |
124 |
|
vnfinstance_id=vnfr["id"], |
125 |
|
count_index=vdur["count-index"], |
126 |
|
) |
127 |
1 |
alarms_created.append(alarm) |
128 |
|
|
129 |
0 |
except Exception as e: |
130 |
0 |
log.exception("Error configuring VNF alarms:") |
131 |
0 |
if len(alarms_created) > 0: |
132 |
0 |
for alarm in alarms_created: |
133 |
0 |
try: |
134 |
0 |
await self.mon_client.delete_alarm( |
135 |
|
alarm.nsr_id, |
136 |
|
alarm.vnf_member_index, |
137 |
|
alarm.vdu_name, |
138 |
|
alarm.alarm_uuid, |
139 |
|
) |
140 |
0 |
except ValueError: |
141 |
0 |
log.exception( |
142 |
|
"Error deleting alarm in MON %s", alarm.alarm_uuid |
143 |
|
) |
144 |
0 |
raise e |
145 |
|
finally: |
146 |
1 |
database.db.close() |
147 |
|
|
148 |
1 |
async def delete_orphaned_healing_alarms(self, nsr_id): |
149 |
0 |
log.info("Deleting orphaned healing alarms for network service %s", nsr_id) |
150 |
0 |
database.db.connect() |
151 |
0 |
try: |
152 |
0 |
with database.db.atomic(): |
153 |
0 |
for alarm in HealingActionRepository.list( |
154 |
|
HealingAction.nsr_id == nsr_id |
155 |
|
): |
156 |
0 |
try: |
157 |
0 |
self.db_client.get_vdur( |
158 |
|
nsr_id, alarm.vnf_member_index, alarm.vdur_name |
159 |
|
) |
160 |
0 |
except VdurNotFound: |
161 |
0 |
log.info("Deleting orphaned alarm %s", alarm.alarm_uuid) |
162 |
0 |
try: |
163 |
0 |
await self.mon_client.delete_alarm( |
164 |
|
alarm.nsr_id, |
165 |
|
alarm.vnf_member_index, |
166 |
|
alarm.vdur_name, |
167 |
|
alarm.alarm_uuid, |
168 |
|
) |
169 |
0 |
except ValueError: |
170 |
0 |
log.exception( |
171 |
|
"Error deleting alarm in MON %s", alarm.alarm_uuid |
172 |
|
) |
173 |
0 |
alarm.delete_instance() |
174 |
|
|
175 |
0 |
except Exception as e: |
176 |
0 |
log.exception("Error deleting orphaned alarms:") |
177 |
0 |
raise e |
178 |
|
finally: |
179 |
0 |
database.db.close() |
180 |
|
|
181 |
1 |
async def delete_healing_alarms(self, nsr_id): |
182 |
|
""" |
183 |
|
Deleting the healing alarms |
184 |
|
:param nsr_id: Network service record id |
185 |
|
""" |
186 |
0 |
log.info("Deleting healing vnf alarms for network service %s", nsr_id) |
187 |
0 |
database.db.connect() |
188 |
0 |
try: |
189 |
0 |
with database.db.atomic(): |
190 |
0 |
for alarm in HealingActionRepository.list( |
191 |
|
HealingAction.nsr_id == nsr_id |
192 |
|
): |
193 |
0 |
try: |
194 |
0 |
await self.mon_client.delete_alarm( |
195 |
|
alarm.nsr_id, |
196 |
|
alarm.vnf_member_index, |
197 |
|
alarm.vdur_name, |
198 |
|
alarm.alarm_uuid, |
199 |
|
) |
200 |
0 |
except ValueError: |
201 |
0 |
log.exception( |
202 |
|
"Error deleting alarm in MON %s", alarm.alarm_uuid |
203 |
|
) |
204 |
0 |
alarm.delete_instance() |
205 |
|
|
206 |
0 |
except Exception as e: |
207 |
0 |
log.exception("Error deleting vnf alarms:") |
208 |
0 |
raise e |
209 |
|
finally: |
210 |
0 |
database.db.close() |
211 |
|
|
212 |
1 |
async def update_alarm_status(self, alarm_uuid: str, status: str): |
213 |
|
""" |
214 |
|
For updating the alarm status |
215 |
|
:param alarm_uuid: vdu uuid |
216 |
|
:param status: Status of an alarm |
217 |
|
""" |
218 |
0 |
database.db.connect() |
219 |
0 |
try: |
220 |
0 |
with database.db.atomic(): |
221 |
0 |
alarm = HealingActionRepository.get( |
222 |
|
HealingAction.alarm_uuid == alarm_uuid |
223 |
|
) |
224 |
0 |
alarm.last_status = status |
225 |
0 |
alarm.save() |
226 |
0 |
except HealingAction.DoesNotExist: |
227 |
0 |
log.debug("There is no healing action configured for alarm %s.", alarm_uuid) |
228 |
|
finally: |
229 |
0 |
database.db.close() |
230 |
|
|
231 |
1 |
async def handle_alarm(self, alarm_uuid: str, status: str): |
232 |
|
""" |
233 |
|
For Handling the healing alarms |
234 |
|
:param alarm_uuid: vdu uuid |
235 |
|
:param status: Status of an alarm |
236 |
|
""" |
237 |
0 |
await self.update_alarm_status(alarm_uuid, status) |
238 |
0 |
database.db.connect() |
239 |
0 |
try: |
240 |
0 |
if status == "alarm": |
241 |
0 |
if self.conf.get("autoheal", "enabled") == "True": |
242 |
0 |
with database.db.atomic(): |
243 |
0 |
alarm = HealingActionRepository.get( |
244 |
|
HealingAction.alarm_uuid == alarm_uuid |
245 |
|
) |
246 |
0 |
vnf_member_index = alarm.vnf_member_index |
247 |
0 |
vdur_name = alarm.vdur_name |
248 |
0 |
vdu_id = alarm.vdu_id |
249 |
0 |
nsr_id = alarm.nsr_id |
250 |
0 |
heal_type = alarm.recovery_action |
251 |
0 |
cooldown_time = alarm.cooldown_time |
252 |
0 |
count_index = alarm.count_index |
253 |
0 |
last_heal = alarm.last_heal |
254 |
0 |
day1 = alarm.day1 |
255 |
0 |
vnfinstance_id = alarm.vnfinstance_id |
256 |
0 |
alarms = HealingActionRepository.list( |
257 |
|
HealingAction.vnf_member_index == vnf_member_index, |
258 |
|
HealingAction.vdur_name == vdur_name, |
259 |
|
) |
260 |
0 |
statuses = [] |
261 |
|
|
262 |
0 |
for alarm in alarms: |
263 |
0 |
statuses.append(alarm.last_status) |
264 |
0 |
if (set(statuses) == {"alarm"}) or ("alarm" in statuses): |
265 |
0 |
delta = datetime.datetime.now() - last_heal |
266 |
0 |
if delta.total_seconds() > cooldown_time: |
267 |
0 |
await self.lcm_client.heal( |
268 |
|
nsr_id, |
269 |
|
vnfinstance_id, |
270 |
|
vdur_name, |
271 |
|
vdu_id, |
272 |
|
vnf_member_index, |
273 |
|
heal_type, |
274 |
|
day1, |
275 |
|
count_index, |
276 |
|
) |
277 |
0 |
last_heal = datetime.datetime.now() |
278 |
0 |
log.info( |
279 |
|
"datetime.datetime.now %s", datetime.datetime.now |
280 |
|
) |
281 |
0 |
alarm.last_heal = last_heal |
282 |
0 |
alarm.save() |
283 |
|
|
284 |
0 |
except HealingAction.DoesNotExist: |
285 |
0 |
log.info("There is no healing action configured for alarm %s.", alarm_uuid) |
286 |
|
finally: |
287 |
0 |
database.db.close() |