| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 1 | # -*- coding: utf-8 -*- |
| 2 | # pylint: disable=no-member |
| 3 | |
| 4 | # Copyright 2018 Whitestack, LLC |
| 5 | # ************************************************************* |
| 6 | |
| 7 | # This file is part of OSM Monitoring module |
| 8 | # All Rights Reserved to Whitestack, LLC |
| 9 | |
| 10 | # Licensed under the Apache License, Version 2.0 (the "License"); you may |
| 11 | # not use this file except in compliance with the License. You may obtain |
| 12 | # a copy of the License at |
| 13 | |
| 14 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 15 | |
| 16 | # Unless required by applicable law or agreed to in writing, software |
| 17 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 18 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 19 | # License for the specific language governing permissions and limitations |
| 20 | # under the License. |
| 21 | |
| 22 | # For those usages not covered by the Apache License, Version 2.0 please |
| 23 | # contact: bdiaz@whitestack.com or glavado@whitestack.com |
| 24 | ## |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 25 | import logging |
| 26 | import datetime |
| elumalai | 055c4ee | 2023-11-14 21:03:34 +0530 | [diff] [blame] | 27 | import functools |
| 28 | import operator |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 29 | |
| 30 | from osm_policy_module.common.common_db_client import CommonDbClient |
| 31 | from osm_policy_module.common.lcm_client import LcmClient |
| 32 | from osm_policy_module.common.mon_client import MonClient |
| 33 | from osm_policy_module.core import database |
| 34 | from osm_policy_module.core.config import Config |
| 35 | from osm_policy_module.core.database import ( |
| 36 | HealingAction, |
| 37 | HealingActionRepository, |
| 38 | ) |
| 39 | from osm_policy_module.core.exceptions import VdurNotFound |
| 40 | |
| 41 | log = logging.getLogger(__name__) |
| 42 | |
| 43 | |
| 44 | class HealingService: |
| Mark Beierl | d37c54c | 2023-05-10 11:15:10 -0400 | [diff] [blame] | 45 | def __init__(self, config: Config): |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 46 | """ |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 47 | Initializing the HealingService |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 48 | """ |
| 49 | log.info("HealingService Initialized") |
| 50 | self.conf = config |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 51 | self.db_client = CommonDbClient(config) |
| Mark Beierl | d37c54c | 2023-05-10 11:15:10 -0400 | [diff] [blame] | 52 | self.mon_client = MonClient(config) |
| 53 | self.lcm_client = LcmClient(config) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 54 | log.info("Constructor created for HealingService") |
| 55 | |
| elumalai | 055c4ee | 2023-11-14 21:03:34 +0530 | [diff] [blame] | 56 | async def configure_healing_alarms(self, nsr_id: str, vnf_member_index=None): |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 57 | """ |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 58 | Configuring the Healing alarms |
| 59 | :param nsr_id: Network service record id |
| elumalai | 055c4ee | 2023-11-14 21:03:34 +0530 | [diff] [blame] | 60 | :param vnf_member_index: Member VNF Index of VNF |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 61 | """ |
| 62 | log.info("Configuring Healing alarm for NS %s", nsr_id) |
| 63 | alarms_created = [] |
| 64 | database.db.connect() |
| 65 | try: |
| 66 | with database.db.atomic(): |
| elumalai | 055c4ee | 2023-11-14 21:03:34 +0530 | [diff] [blame] | 67 | if vnf_member_index is None: |
| 68 | vnfrs = self.db_client.get_vnfrs(nsr_id) |
| 69 | else: |
| 70 | vnfrs = [] |
| 71 | vnfr = self.db_client.get_vnfr(nsr_id, vnf_member_index) |
| 72 | vnfrs.append(vnfr) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 73 | for vnfr in vnfrs: |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 74 | vnfd = self.db_client.get_vnfd(vnfr["vnfd-id"]) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 75 | df = vnfd.get("df", [{}])[0] |
| 76 | if "healing-aspect" not in df: |
| 77 | log.info("No healing configuration present in vnfd") |
| 78 | continue |
| 79 | healing_aspects = df["healing-aspect"] |
| 80 | for healing_aspect in healing_aspects: |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 81 | for healing_policy in healing_aspect.get("healing-policy", ()): |
| 82 | vdu_id = healing_policy["vdu-id"] |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 83 | for vdur in vnfr["vdur"]: |
| 84 | if vdu_id == vdur["vdu-id-ref"]: |
| 85 | try: |
| 86 | HealingActionRepository.get( |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 87 | HealingAction.alarm_id |
| 88 | == healing_policy["event-name"], |
| 89 | HealingAction.vdur_name == vdur["name"], |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 90 | HealingAction.nsr_id == nsr_id, |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 91 | HealingAction.cooldown_time |
| 92 | == healing_policy["cooldown-time"], |
| 93 | HealingAction.recovery_action |
| 94 | == healing_policy["action-on-recovery"], |
| 95 | HealingAction.vnfinstance_id == vnfr["id"], |
| 96 | HealingAction.vdu_id |
| 97 | == healing_policy["vdu-id"], |
| 98 | HealingAction.count_index |
| 99 | == vdur["count-index"], |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 100 | ) |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 101 | log.debug( |
| 102 | "vdu %s already has an alarm configured with same id %s", |
| 103 | healing_policy["vdu-id"], |
| 104 | healing_policy["event-name"], |
| 105 | ) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 106 | continue |
| 107 | except HealingAction.DoesNotExist: |
| 108 | pass |
| 109 | |
| 110 | metric_name = "vm_status" |
| 111 | alarm_uuid = await self.mon_client.create_alarm( |
| 112 | metric_name=metric_name, |
| 113 | ns_id=nsr_id, |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 114 | vdu_name=vdur["name"], |
| 115 | vnf_member_index=vnfr["member-vnf-index-ref"], |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 116 | threshold=1, |
| 117 | operation="LT", |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 118 | statistic="AVERAGE", |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 119 | ) |
| 120 | alarm = HealingActionRepository.create( |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 121 | alarm_id=healing_policy["event-name"], |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 122 | alarm_uuid=alarm_uuid, |
| 123 | nsr_id=nsr_id, |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 124 | vnf_member_index=vnfr["member-vnf-index-ref"], |
| 125 | vdur_name=vdur["name"], |
| 126 | recovery_action=healing_policy[ |
| 127 | "action-on-recovery" |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 128 | ], |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 129 | cooldown_time=healing_policy["cooldown-time"], |
| 130 | day1=healing_policy["day1"], |
| 131 | vdu_id=healing_policy["vdu-id"], |
| 132 | vnfinstance_id=vnfr["id"], |
| 133 | count_index=vdur["count-index"], |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 134 | ) |
| 135 | alarms_created.append(alarm) |
| 136 | |
| 137 | except Exception as e: |
| 138 | log.exception("Error configuring VNF alarms:") |
| 139 | if len(alarms_created) > 0: |
| 140 | for alarm in alarms_created: |
| 141 | try: |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 142 | await self.mon_client.delete_alarm( |
| 143 | alarm.nsr_id, |
| 144 | alarm.vnf_member_index, |
| 145 | alarm.vdu_name, |
| 146 | alarm.alarm_uuid, |
| 147 | ) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 148 | except ValueError: |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 149 | log.exception( |
| 150 | "Error deleting alarm in MON %s", alarm.alarm_uuid |
| 151 | ) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 152 | raise e |
| 153 | finally: |
| 154 | database.db.close() |
| 155 | |
| 156 | async def delete_orphaned_healing_alarms(self, nsr_id): |
| 157 | log.info("Deleting orphaned healing alarms for network service %s", nsr_id) |
| 158 | database.db.connect() |
| 159 | try: |
| 160 | with database.db.atomic(): |
| 161 | for alarm in HealingActionRepository.list( |
| 162 | HealingAction.nsr_id == nsr_id |
| 163 | ): |
| 164 | try: |
| 165 | self.db_client.get_vdur( |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 166 | nsr_id, alarm.vnf_member_index, alarm.vdur_name |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 167 | ) |
| 168 | except VdurNotFound: |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 169 | log.info("Deleting orphaned alarm %s", alarm.alarm_uuid) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 170 | try: |
| 171 | await self.mon_client.delete_alarm( |
| 172 | alarm.nsr_id, |
| 173 | alarm.vnf_member_index, |
| 174 | alarm.vdur_name, |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 175 | alarm.alarm_uuid, |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 176 | ) |
| 177 | except ValueError: |
| 178 | log.exception( |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 179 | "Error deleting alarm in MON %s", alarm.alarm_uuid |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 180 | ) |
| 181 | alarm.delete_instance() |
| 182 | |
| 183 | except Exception as e: |
| 184 | log.exception("Error deleting orphaned alarms:") |
| 185 | raise e |
| 186 | finally: |
| 187 | database.db.close() |
| 188 | |
| elumalai | 055c4ee | 2023-11-14 21:03:34 +0530 | [diff] [blame] | 189 | async def delete_healing_alarms(self, nsr_id, vnf_member_index=None): |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 190 | """ |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 191 | Deleting the healing alarms |
| 192 | :param nsr_id: Network service record id |
| elumalai | 055c4ee | 2023-11-14 21:03:34 +0530 | [diff] [blame] | 193 | :param vnf_member_index: Member VNF Index of VNF |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 194 | """ |
| 195 | log.info("Deleting healing vnf alarms for network service %s", nsr_id) |
| 196 | database.db.connect() |
| 197 | try: |
| 198 | with database.db.atomic(): |
| elumalai | 055c4ee | 2023-11-14 21:03:34 +0530 | [diff] [blame] | 199 | if vnf_member_index is None: |
| 200 | heal_conditions = HealingAction.nsr_id == nsr_id |
| 201 | else: |
| 202 | query_list = [ |
| 203 | HealingAction.nsr_id == nsr_id, |
| 204 | HealingAction.vnf_member_index == vnf_member_index, |
| 205 | ] |
| 206 | heal_conditions = functools.reduce(operator.and_, query_list) |
| 207 | for alarm in HealingActionRepository.list(heal_conditions): |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 208 | try: |
| 209 | await self.mon_client.delete_alarm( |
| 210 | alarm.nsr_id, |
| 211 | alarm.vnf_member_index, |
| 212 | alarm.vdur_name, |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 213 | alarm.alarm_uuid, |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 214 | ) |
| 215 | except ValueError: |
| 216 | log.exception( |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 217 | "Error deleting alarm in MON %s", alarm.alarm_uuid |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 218 | ) |
| 219 | alarm.delete_instance() |
| 220 | |
| 221 | except Exception as e: |
| 222 | log.exception("Error deleting vnf alarms:") |
| 223 | raise e |
| 224 | finally: |
| 225 | database.db.close() |
| 226 | |
| 227 | async def update_alarm_status(self, alarm_uuid: str, status: str): |
| 228 | """ |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 229 | For updating the alarm status |
| 230 | :param alarm_uuid: vdu uuid |
| 231 | :param status: Status of an alarm |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 232 | """ |
| 233 | database.db.connect() |
| 234 | try: |
| 235 | with database.db.atomic(): |
| 236 | alarm = HealingActionRepository.get( |
| 237 | HealingAction.alarm_uuid == alarm_uuid |
| 238 | ) |
| 239 | alarm.last_status = status |
| 240 | alarm.save() |
| 241 | except HealingAction.DoesNotExist: |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 242 | log.debug("There is no healing action configured for alarm %s.", alarm_uuid) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 243 | finally: |
| 244 | database.db.close() |
| 245 | |
| 246 | async def handle_alarm(self, alarm_uuid: str, status: str): |
| 247 | """ |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 248 | For Handling the healing alarms |
| 249 | :param alarm_uuid: vdu uuid |
| 250 | :param status: Status of an alarm |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 251 | """ |
| 252 | await self.update_alarm_status(alarm_uuid, status) |
| 253 | database.db.connect() |
| 254 | try: |
| 255 | if status == "alarm": |
| sritharan | 0f6b1c4 | 2023-04-28 05:32:26 +0000 | [diff] [blame] | 256 | if self.conf.get("autoheal", "enabled") == "True": |
| 257 | with database.db.atomic(): |
| 258 | alarm = HealingActionRepository.get( |
| 259 | HealingAction.alarm_uuid == alarm_uuid |
| 260 | ) |
| 261 | vnf_member_index = alarm.vnf_member_index |
| 262 | vdur_name = alarm.vdur_name |
| 263 | vdu_id = alarm.vdu_id |
| 264 | nsr_id = alarm.nsr_id |
| 265 | heal_type = alarm.recovery_action |
| 266 | cooldown_time = alarm.cooldown_time |
| 267 | count_index = alarm.count_index |
| 268 | last_heal = alarm.last_heal |
| 269 | day1 = alarm.day1 |
| 270 | vnfinstance_id = alarm.vnfinstance_id |
| 271 | alarms = HealingActionRepository.list( |
| 272 | HealingAction.vnf_member_index == vnf_member_index, |
| 273 | HealingAction.vdur_name == vdur_name, |
| 274 | ) |
| 275 | statuses = [] |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 276 | |
| sritharan | 0f6b1c4 | 2023-04-28 05:32:26 +0000 | [diff] [blame] | 277 | for alarm in alarms: |
| 278 | statuses.append(alarm.last_status) |
| 279 | if (set(statuses) == {"alarm"}) or ("alarm" in statuses): |
| 280 | delta = datetime.datetime.now() - last_heal |
| 281 | if delta.total_seconds() > cooldown_time: |
| 282 | await self.lcm_client.heal( |
| 283 | nsr_id, |
| 284 | vnfinstance_id, |
| 285 | vdur_name, |
| 286 | vdu_id, |
| 287 | vnf_member_index, |
| 288 | heal_type, |
| 289 | day1, |
| 290 | count_index, |
| 291 | ) |
| 292 | last_heal = datetime.datetime.now() |
| 293 | log.info( |
| 294 | "datetime.datetime.now %s", datetime.datetime.now |
| 295 | ) |
| 296 | alarm.last_heal = last_heal |
| 297 | alarm.save() |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 298 | |
| 299 | except HealingAction.DoesNotExist: |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 300 | log.info("There is no healing action configured for alarm %s.", alarm_uuid) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 301 | finally: |
| 302 | database.db.close() |