| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 1 | # -*- coding: utf-8 -*- |
| 2 | # pylint: disable=no-member |
| 3 | |
| 4 | # Copyright 2018 Whitestack, LLC |
| 5 | # ************************************************************* |
| 6 | |
| 7 | # This file is part of OSM Monitoring module |
| 8 | # All Rights Reserved to Whitestack, LLC |
| 9 | |
| 10 | # Licensed under the Apache License, Version 2.0 (the "License"); you may |
| 11 | # not use this file except in compliance with the License. You may obtain |
| 12 | # a copy of the License at |
| 13 | |
| 14 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 15 | |
| 16 | # Unless required by applicable law or agreed to in writing, software |
| 17 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 18 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 19 | # License for the specific language governing permissions and limitations |
| 20 | # under the License. |
| 21 | |
| 22 | # For those usages not covered by the Apache License, Version 2.0 please |
| 23 | # contact: bdiaz@whitestack.com or glavado@whitestack.com |
| 24 | ## |
| 25 | import asyncio |
| 26 | import logging |
| 27 | import datetime |
| 28 | |
| 29 | from osm_policy_module.common.common_db_client import CommonDbClient |
| 30 | from osm_policy_module.common.lcm_client import LcmClient |
| 31 | from osm_policy_module.common.mon_client import MonClient |
| 32 | from osm_policy_module.core import database |
| 33 | from osm_policy_module.core.config import Config |
| 34 | from osm_policy_module.core.database import ( |
| 35 | HealingAction, |
| 36 | HealingActionRepository, |
| 37 | ) |
| 38 | from osm_policy_module.core.exceptions import VdurNotFound |
| 39 | |
| 40 | log = logging.getLogger(__name__) |
| 41 | |
| 42 | |
| 43 | class HealingService: |
| 44 | def __init__(self, config: Config, loop=None): |
| 45 | """ |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 46 | Initializing the HealingService |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 47 | """ |
| 48 | log.info("HealingService Initialized") |
| 49 | self.conf = config |
| 50 | if not loop: |
| 51 | loop = asyncio.get_event_loop() |
| 52 | self.loop = loop |
| 53 | self.db_client = CommonDbClient(config) |
| 54 | self.mon_client = MonClient(config, loop=self.loop) |
| 55 | self.lcm_client = LcmClient(config, loop=self.loop) |
| 56 | log.info("Constructor created for HealingService") |
| 57 | |
| 58 | async def configure_healing_alarms(self, nsr_id: str): |
| 59 | """ |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 60 | Configuring the Healing alarms |
| 61 | :param nsr_id: Network service record id |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 62 | """ |
| 63 | log.info("Configuring Healing alarm for NS %s", nsr_id) |
| 64 | alarms_created = [] |
| 65 | database.db.connect() |
| 66 | try: |
| 67 | with database.db.atomic(): |
| 68 | vnfrs = self.db_client.get_vnfrs(nsr_id) |
| 69 | for vnfr in vnfrs: |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 70 | vnfd = self.db_client.get_vnfd(vnfr["vnfd-id"]) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 71 | df = vnfd.get("df", [{}])[0] |
| 72 | if "healing-aspect" not in df: |
| 73 | log.info("No healing configuration present in vnfd") |
| 74 | continue |
| 75 | healing_aspects = df["healing-aspect"] |
| 76 | for healing_aspect in healing_aspects: |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 77 | for healing_policy in healing_aspect.get("healing-policy", ()): |
| 78 | vdu_id = healing_policy["vdu-id"] |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 79 | for vdur in vnfr["vdur"]: |
| 80 | if vdu_id == vdur["vdu-id-ref"]: |
| 81 | try: |
| 82 | HealingActionRepository.get( |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 83 | HealingAction.alarm_id |
| 84 | == healing_policy["event-name"], |
| 85 | HealingAction.vdur_name == vdur["name"], |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 86 | HealingAction.nsr_id == nsr_id, |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 87 | HealingAction.cooldown_time |
| 88 | == healing_policy["cooldown-time"], |
| 89 | HealingAction.recovery_action |
| 90 | == healing_policy["action-on-recovery"], |
| 91 | HealingAction.vnfinstance_id == vnfr["id"], |
| 92 | HealingAction.vdu_id |
| 93 | == healing_policy["vdu-id"], |
| 94 | HealingAction.count_index |
| 95 | == vdur["count-index"], |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 96 | ) |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 97 | log.debug( |
| 98 | "vdu %s already has an alarm configured with same id %s", |
| 99 | healing_policy["vdu-id"], |
| 100 | healing_policy["event-name"], |
| 101 | ) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 102 | continue |
| 103 | except HealingAction.DoesNotExist: |
| 104 | pass |
| 105 | |
| 106 | metric_name = "vm_status" |
| 107 | alarm_uuid = await self.mon_client.create_alarm( |
| 108 | metric_name=metric_name, |
| 109 | ns_id=nsr_id, |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 110 | vdu_name=vdur["name"], |
| 111 | vnf_member_index=vnfr["member-vnf-index-ref"], |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 112 | threshold=1, |
| 113 | operation="LT", |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 114 | statistic="AVERAGE", |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 115 | ) |
| 116 | alarm = HealingActionRepository.create( |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 117 | alarm_id=healing_policy["event-name"], |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 118 | alarm_uuid=alarm_uuid, |
| 119 | nsr_id=nsr_id, |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 120 | vnf_member_index=vnfr["member-vnf-index-ref"], |
| 121 | vdur_name=vdur["name"], |
| 122 | recovery_action=healing_policy[ |
| 123 | "action-on-recovery" |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 124 | ], |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 125 | cooldown_time=healing_policy["cooldown-time"], |
| 126 | day1=healing_policy["day1"], |
| 127 | vdu_id=healing_policy["vdu-id"], |
| 128 | vnfinstance_id=vnfr["id"], |
| 129 | count_index=vdur["count-index"], |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 130 | ) |
| 131 | alarms_created.append(alarm) |
| 132 | |
| 133 | except Exception as e: |
| 134 | log.exception("Error configuring VNF alarms:") |
| 135 | if len(alarms_created) > 0: |
| 136 | for alarm in alarms_created: |
| 137 | try: |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 138 | await self.mon_client.delete_alarm( |
| 139 | alarm.nsr_id, |
| 140 | alarm.vnf_member_index, |
| 141 | alarm.vdu_name, |
| 142 | alarm.alarm_uuid, |
| 143 | ) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 144 | except ValueError: |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 145 | log.exception( |
| 146 | "Error deleting alarm in MON %s", alarm.alarm_uuid |
| 147 | ) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 148 | raise e |
| 149 | finally: |
| 150 | database.db.close() |
| 151 | |
| 152 | async def delete_orphaned_healing_alarms(self, nsr_id): |
| 153 | log.info("Deleting orphaned healing alarms for network service %s", nsr_id) |
| 154 | database.db.connect() |
| 155 | try: |
| 156 | with database.db.atomic(): |
| 157 | for alarm in HealingActionRepository.list( |
| 158 | HealingAction.nsr_id == nsr_id |
| 159 | ): |
| 160 | try: |
| 161 | self.db_client.get_vdur( |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 162 | nsr_id, alarm.vnf_member_index, alarm.vdur_name |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 163 | ) |
| 164 | except VdurNotFound: |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 165 | log.info("Deleting orphaned alarm %s", alarm.alarm_uuid) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 166 | try: |
| 167 | await self.mon_client.delete_alarm( |
| 168 | alarm.nsr_id, |
| 169 | alarm.vnf_member_index, |
| 170 | alarm.vdur_name, |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 171 | alarm.alarm_uuid, |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 172 | ) |
| 173 | except ValueError: |
| 174 | log.exception( |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 175 | "Error deleting alarm in MON %s", alarm.alarm_uuid |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 176 | ) |
| 177 | alarm.delete_instance() |
| 178 | |
| 179 | except Exception as e: |
| 180 | log.exception("Error deleting orphaned alarms:") |
| 181 | raise e |
| 182 | finally: |
| 183 | database.db.close() |
| 184 | |
| 185 | async def delete_healing_alarms(self, nsr_id): |
| 186 | """ |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 187 | Deleting the healing alarms |
| 188 | :param nsr_id: Network service record id |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 189 | """ |
| 190 | log.info("Deleting healing vnf alarms for network service %s", nsr_id) |
| 191 | database.db.connect() |
| 192 | try: |
| 193 | with database.db.atomic(): |
| 194 | for alarm in HealingActionRepository.list( |
| 195 | HealingAction.nsr_id == nsr_id |
| 196 | ): |
| 197 | try: |
| 198 | await self.mon_client.delete_alarm( |
| 199 | alarm.nsr_id, |
| 200 | alarm.vnf_member_index, |
| 201 | alarm.vdur_name, |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 202 | alarm.alarm_uuid, |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 203 | ) |
| 204 | except ValueError: |
| 205 | log.exception( |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 206 | "Error deleting alarm in MON %s", alarm.alarm_uuid |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 207 | ) |
| 208 | alarm.delete_instance() |
| 209 | |
| 210 | except Exception as e: |
| 211 | log.exception("Error deleting vnf alarms:") |
| 212 | raise e |
| 213 | finally: |
| 214 | database.db.close() |
| 215 | |
| 216 | async def update_alarm_status(self, alarm_uuid: str, status: str): |
| 217 | """ |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 218 | For updating the alarm status |
| 219 | :param alarm_uuid: vdu uuid |
| 220 | :param status: Status of an alarm |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 221 | """ |
| 222 | database.db.connect() |
| 223 | try: |
| 224 | with database.db.atomic(): |
| 225 | alarm = HealingActionRepository.get( |
| 226 | HealingAction.alarm_uuid == alarm_uuid |
| 227 | ) |
| 228 | alarm.last_status = status |
| 229 | alarm.save() |
| 230 | except HealingAction.DoesNotExist: |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 231 | log.debug("There is no healing action configured for alarm %s.", alarm_uuid) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 232 | finally: |
| 233 | database.db.close() |
| 234 | |
| 235 | async def handle_alarm(self, alarm_uuid: str, status: str): |
| 236 | """ |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 237 | For Handling the healing alarms |
| 238 | :param alarm_uuid: vdu uuid |
| 239 | :param status: Status of an alarm |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 240 | """ |
| 241 | await self.update_alarm_status(alarm_uuid, status) |
| 242 | database.db.connect() |
| 243 | try: |
| 244 | if status == "alarm": |
| 245 | with database.db.atomic(): |
| 246 | alarm = HealingActionRepository.get( |
| 247 | HealingAction.alarm_uuid == alarm_uuid |
| 248 | ) |
| 249 | vnf_member_index = alarm.vnf_member_index |
| 250 | vdur_name = alarm.vdur_name |
| 251 | vdu_id = alarm.vdu_id |
| 252 | nsr_id = alarm.nsr_id |
| 253 | heal_type = alarm.recovery_action |
| 254 | cooldown_time = alarm.cooldown_time |
| 255 | count_index = alarm.count_index |
| 256 | last_heal = alarm.last_heal |
| 257 | day1 = alarm.day1 |
| 258 | vnfinstance_id = alarm.vnfinstance_id |
| 259 | alarms = HealingActionRepository.list( |
| 260 | HealingAction.vnf_member_index == vnf_member_index, |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 261 | HealingAction.vdur_name == vdur_name, |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 262 | ) |
| 263 | statuses = [] |
| 264 | |
| 265 | for alarm in alarms: |
| 266 | statuses.append(alarm.last_status) |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 267 | if (set(statuses) == {"alarm"}) or ("alarm" in statuses): |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 268 | delta = datetime.datetime.now() - last_heal |
| 269 | if delta.total_seconds() > cooldown_time: |
| 270 | await self.lcm_client.heal( |
| 271 | nsr_id, |
| 272 | vnfinstance_id, |
| 273 | vdur_name, |
| 274 | vdu_id, |
| 275 | vnf_member_index, |
| 276 | heal_type, |
| 277 | day1, |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 278 | count_index, |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 279 | ) |
| 280 | last_heal = datetime.datetime.now() |
| 281 | log.info("datetime.datetime.now %s", datetime.datetime.now) |
| 282 | alarm.last_heal = last_heal |
| 283 | alarm.save() |
| 284 | |
| 285 | except HealingAction.DoesNotExist: |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 286 | log.info("There is no healing action configured for alarm %s.", alarm_uuid) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 287 | finally: |
| 288 | database.db.close() |