| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 1 | # -*- coding: utf-8 -*- |
| 2 | # pylint: disable=no-member |
| 3 | |
| 4 | # Copyright 2018 Whitestack, LLC |
| 5 | # ************************************************************* |
| 6 | |
| 7 | # This file is part of OSM Monitoring module |
| 8 | # All Rights Reserved to Whitestack, LLC |
| 9 | |
| 10 | # Licensed under the Apache License, Version 2.0 (the "License"); you may |
| 11 | # not use this file except in compliance with the License. You may obtain |
| 12 | # a copy of the License at |
| 13 | |
| 14 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 15 | |
| 16 | # Unless required by applicable law or agreed to in writing, software |
| 17 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 18 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 19 | # License for the specific language governing permissions and limitations |
| 20 | # under the License. |
| 21 | |
| 22 | # For those usages not covered by the Apache License, Version 2.0 please |
| 23 | # contact: bdiaz@whitestack.com or glavado@whitestack.com |
| 24 | ## |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 25 | import logging |
| 26 | import datetime |
| 27 | |
| 28 | from osm_policy_module.common.common_db_client import CommonDbClient |
| 29 | from osm_policy_module.common.lcm_client import LcmClient |
| 30 | from osm_policy_module.common.mon_client import MonClient |
| 31 | from osm_policy_module.core import database |
| 32 | from osm_policy_module.core.config import Config |
| 33 | from osm_policy_module.core.database import ( |
| 34 | HealingAction, |
| 35 | HealingActionRepository, |
| 36 | ) |
| 37 | from osm_policy_module.core.exceptions import VdurNotFound |
| 38 | |
| 39 | log = logging.getLogger(__name__) |
| 40 | |
| 41 | |
| 42 | class HealingService: |
| Mark Beierl | d37c54c | 2023-05-10 11:15:10 -0400 | [diff] [blame^] | 43 | def __init__(self, config: Config): |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 44 | """ |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 45 | Initializing the HealingService |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 46 | """ |
| 47 | log.info("HealingService Initialized") |
| 48 | self.conf = config |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 49 | self.db_client = CommonDbClient(config) |
| Mark Beierl | d37c54c | 2023-05-10 11:15:10 -0400 | [diff] [blame^] | 50 | self.mon_client = MonClient(config) |
| 51 | self.lcm_client = LcmClient(config) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 52 | log.info("Constructor created for HealingService") |
| 53 | |
| 54 | async def configure_healing_alarms(self, nsr_id: str): |
| 55 | """ |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 56 | Configuring the Healing alarms |
| 57 | :param nsr_id: Network service record id |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 58 | """ |
| 59 | log.info("Configuring Healing alarm for NS %s", nsr_id) |
| 60 | alarms_created = [] |
| 61 | database.db.connect() |
| 62 | try: |
| 63 | with database.db.atomic(): |
| 64 | vnfrs = self.db_client.get_vnfrs(nsr_id) |
| 65 | for vnfr in vnfrs: |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 66 | vnfd = self.db_client.get_vnfd(vnfr["vnfd-id"]) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 67 | df = vnfd.get("df", [{}])[0] |
| 68 | if "healing-aspect" not in df: |
| 69 | log.info("No healing configuration present in vnfd") |
| 70 | continue |
| 71 | healing_aspects = df["healing-aspect"] |
| 72 | for healing_aspect in healing_aspects: |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 73 | for healing_policy in healing_aspect.get("healing-policy", ()): |
| 74 | vdu_id = healing_policy["vdu-id"] |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 75 | for vdur in vnfr["vdur"]: |
| 76 | if vdu_id == vdur["vdu-id-ref"]: |
| 77 | try: |
| 78 | HealingActionRepository.get( |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 79 | HealingAction.alarm_id |
| 80 | == healing_policy["event-name"], |
| 81 | HealingAction.vdur_name == vdur["name"], |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 82 | HealingAction.nsr_id == nsr_id, |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 83 | HealingAction.cooldown_time |
| 84 | == healing_policy["cooldown-time"], |
| 85 | HealingAction.recovery_action |
| 86 | == healing_policy["action-on-recovery"], |
| 87 | HealingAction.vnfinstance_id == vnfr["id"], |
| 88 | HealingAction.vdu_id |
| 89 | == healing_policy["vdu-id"], |
| 90 | HealingAction.count_index |
| 91 | == vdur["count-index"], |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 92 | ) |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 93 | log.debug( |
| 94 | "vdu %s already has an alarm configured with same id %s", |
| 95 | healing_policy["vdu-id"], |
| 96 | healing_policy["event-name"], |
| 97 | ) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 98 | continue |
| 99 | except HealingAction.DoesNotExist: |
| 100 | pass |
| 101 | |
| 102 | metric_name = "vm_status" |
| 103 | alarm_uuid = await self.mon_client.create_alarm( |
| 104 | metric_name=metric_name, |
| 105 | ns_id=nsr_id, |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 106 | vdu_name=vdur["name"], |
| 107 | vnf_member_index=vnfr["member-vnf-index-ref"], |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 108 | threshold=1, |
| 109 | operation="LT", |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 110 | statistic="AVERAGE", |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 111 | ) |
| 112 | alarm = HealingActionRepository.create( |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 113 | alarm_id=healing_policy["event-name"], |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 114 | alarm_uuid=alarm_uuid, |
| 115 | nsr_id=nsr_id, |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 116 | vnf_member_index=vnfr["member-vnf-index-ref"], |
| 117 | vdur_name=vdur["name"], |
| 118 | recovery_action=healing_policy[ |
| 119 | "action-on-recovery" |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 120 | ], |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 121 | cooldown_time=healing_policy["cooldown-time"], |
| 122 | day1=healing_policy["day1"], |
| 123 | vdu_id=healing_policy["vdu-id"], |
| 124 | vnfinstance_id=vnfr["id"], |
| 125 | count_index=vdur["count-index"], |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 126 | ) |
| 127 | alarms_created.append(alarm) |
| 128 | |
| 129 | except Exception as e: |
| 130 | log.exception("Error configuring VNF alarms:") |
| 131 | if len(alarms_created) > 0: |
| 132 | for alarm in alarms_created: |
| 133 | try: |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 134 | await self.mon_client.delete_alarm( |
| 135 | alarm.nsr_id, |
| 136 | alarm.vnf_member_index, |
| 137 | alarm.vdu_name, |
| 138 | alarm.alarm_uuid, |
| 139 | ) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 140 | except ValueError: |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 141 | log.exception( |
| 142 | "Error deleting alarm in MON %s", alarm.alarm_uuid |
| 143 | ) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 144 | raise e |
| 145 | finally: |
| 146 | database.db.close() |
| 147 | |
| 148 | async def delete_orphaned_healing_alarms(self, nsr_id): |
| 149 | log.info("Deleting orphaned healing alarms for network service %s", nsr_id) |
| 150 | database.db.connect() |
| 151 | try: |
| 152 | with database.db.atomic(): |
| 153 | for alarm in HealingActionRepository.list( |
| 154 | HealingAction.nsr_id == nsr_id |
| 155 | ): |
| 156 | try: |
| 157 | self.db_client.get_vdur( |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 158 | nsr_id, alarm.vnf_member_index, alarm.vdur_name |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 159 | ) |
| 160 | except VdurNotFound: |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 161 | log.info("Deleting orphaned alarm %s", alarm.alarm_uuid) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 162 | try: |
| 163 | await self.mon_client.delete_alarm( |
| 164 | alarm.nsr_id, |
| 165 | alarm.vnf_member_index, |
| 166 | alarm.vdur_name, |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 167 | alarm.alarm_uuid, |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 168 | ) |
| 169 | except ValueError: |
| 170 | log.exception( |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 171 | "Error deleting alarm in MON %s", alarm.alarm_uuid |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 172 | ) |
| 173 | alarm.delete_instance() |
| 174 | |
| 175 | except Exception as e: |
| 176 | log.exception("Error deleting orphaned alarms:") |
| 177 | raise e |
| 178 | finally: |
| 179 | database.db.close() |
| 180 | |
| 181 | async def delete_healing_alarms(self, nsr_id): |
| 182 | """ |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 183 | Deleting the healing alarms |
| 184 | :param nsr_id: Network service record id |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 185 | """ |
| 186 | log.info("Deleting healing vnf alarms for network service %s", nsr_id) |
| 187 | database.db.connect() |
| 188 | try: |
| 189 | with database.db.atomic(): |
| 190 | for alarm in HealingActionRepository.list( |
| 191 | HealingAction.nsr_id == nsr_id |
| 192 | ): |
| 193 | try: |
| 194 | await self.mon_client.delete_alarm( |
| 195 | alarm.nsr_id, |
| 196 | alarm.vnf_member_index, |
| 197 | alarm.vdur_name, |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 198 | alarm.alarm_uuid, |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 199 | ) |
| 200 | except ValueError: |
| 201 | log.exception( |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 202 | "Error deleting alarm in MON %s", alarm.alarm_uuid |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 203 | ) |
| 204 | alarm.delete_instance() |
| 205 | |
| 206 | except Exception as e: |
| 207 | log.exception("Error deleting vnf alarms:") |
| 208 | raise e |
| 209 | finally: |
| 210 | database.db.close() |
| 211 | |
| 212 | async def update_alarm_status(self, alarm_uuid: str, status: str): |
| 213 | """ |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 214 | For updating the alarm status |
| 215 | :param alarm_uuid: vdu uuid |
| 216 | :param status: Status of an alarm |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 217 | """ |
| 218 | database.db.connect() |
| 219 | try: |
| 220 | with database.db.atomic(): |
| 221 | alarm = HealingActionRepository.get( |
| 222 | HealingAction.alarm_uuid == alarm_uuid |
| 223 | ) |
| 224 | alarm.last_status = status |
| 225 | alarm.save() |
| 226 | except HealingAction.DoesNotExist: |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 227 | log.debug("There is no healing action configured for alarm %s.", alarm_uuid) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 228 | finally: |
| 229 | database.db.close() |
| 230 | |
| 231 | async def handle_alarm(self, alarm_uuid: str, status: str): |
| 232 | """ |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 233 | For Handling the healing alarms |
| 234 | :param alarm_uuid: vdu uuid |
| 235 | :param status: Status of an alarm |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 236 | """ |
| 237 | await self.update_alarm_status(alarm_uuid, status) |
| 238 | database.db.connect() |
| 239 | try: |
| 240 | if status == "alarm": |
| 241 | with database.db.atomic(): |
| 242 | alarm = HealingActionRepository.get( |
| 243 | HealingAction.alarm_uuid == alarm_uuid |
| 244 | ) |
| 245 | vnf_member_index = alarm.vnf_member_index |
| 246 | vdur_name = alarm.vdur_name |
| 247 | vdu_id = alarm.vdu_id |
| 248 | nsr_id = alarm.nsr_id |
| 249 | heal_type = alarm.recovery_action |
| 250 | cooldown_time = alarm.cooldown_time |
| 251 | count_index = alarm.count_index |
| 252 | last_heal = alarm.last_heal |
| 253 | day1 = alarm.day1 |
| 254 | vnfinstance_id = alarm.vnfinstance_id |
| 255 | alarms = HealingActionRepository.list( |
| 256 | HealingAction.vnf_member_index == vnf_member_index, |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 257 | HealingAction.vdur_name == vdur_name, |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 258 | ) |
| 259 | statuses = [] |
| 260 | |
| 261 | for alarm in alarms: |
| 262 | statuses.append(alarm.last_status) |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 263 | if (set(statuses) == {"alarm"}) or ("alarm" in statuses): |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 264 | delta = datetime.datetime.now() - last_heal |
| 265 | if delta.total_seconds() > cooldown_time: |
| 266 | await self.lcm_client.heal( |
| 267 | nsr_id, |
| 268 | vnfinstance_id, |
| 269 | vdur_name, |
| 270 | vdu_id, |
| 271 | vnf_member_index, |
| 272 | heal_type, |
| 273 | day1, |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 274 | count_index, |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 275 | ) |
| 276 | last_heal = datetime.datetime.now() |
| 277 | log.info("datetime.datetime.now %s", datetime.datetime.now) |
| 278 | alarm.last_heal = last_heal |
| 279 | alarm.save() |
| 280 | |
| 281 | except HealingAction.DoesNotExist: |
| garciadeblas | be42d54 | 2022-11-14 00:29:47 +0100 | [diff] [blame] | 282 | log.info("There is no healing action configured for alarm %s.", alarm_uuid) |
| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 283 | finally: |
| 284 | database.db.close() |