| sritharan | 7ef2b88 | 2022-04-25 12:37:55 +0000 | [diff] [blame] | 1 | # -*- coding: utf-8 -*- |
| 2 | # pylint: disable=no-member |
| 3 | |
| 4 | # Copyright 2018 Whitestack, LLC |
| 5 | # ************************************************************* |
| 6 | |
| 7 | # This file is part of OSM Monitoring module |
| 8 | # All Rights Reserved to Whitestack, LLC |
| 9 | |
| 10 | # Licensed under the Apache License, Version 2.0 (the "License"); you may |
| 11 | # not use this file except in compliance with the License. You may obtain |
| 12 | # a copy of the License at |
| 13 | |
| 14 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 15 | |
| 16 | # Unless required by applicable law or agreed to in writing, software |
| 17 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 18 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 19 | # License for the specific language governing permissions and limitations |
| 20 | # under the License. |
| 21 | |
| 22 | # For those usages not covered by the Apache License, Version 2.0 please |
| 23 | # contact: bdiaz@whitestack.com or glavado@whitestack.com |
| 24 | ## |
| 25 | import asyncio |
| 26 | import logging |
| 27 | import datetime |
| 28 | |
| 29 | from osm_policy_module.common.common_db_client import CommonDbClient |
| 30 | from osm_policy_module.common.lcm_client import LcmClient |
| 31 | from osm_policy_module.common.mon_client import MonClient |
| 32 | from osm_policy_module.core import database |
| 33 | from osm_policy_module.core.config import Config |
| 34 | from osm_policy_module.core.database import ( |
| 35 | HealingAction, |
| 36 | HealingActionRepository, |
| 37 | ) |
| 38 | from osm_policy_module.core.exceptions import VdurNotFound |
| 39 | |
| 40 | log = logging.getLogger(__name__) |
| 41 | |
| 42 | |
| 43 | class HealingService: |
| 44 | def __init__(self, config: Config, loop=None): |
| 45 | """ |
| 46 | Initializing the HealingService |
| 47 | """ |
| 48 | log.info("HealingService Initialized") |
| 49 | self.conf = config |
| 50 | if not loop: |
| 51 | loop = asyncio.get_event_loop() |
| 52 | self.loop = loop |
| 53 | self.db_client = CommonDbClient(config) |
| 54 | self.mon_client = MonClient(config, loop=self.loop) |
| 55 | self.lcm_client = LcmClient(config, loop=self.loop) |
| 56 | log.info("Constructor created for HealingService") |
| 57 | |
| 58 | async def configure_healing_alarms(self, nsr_id: str): |
| 59 | """ |
| 60 | Configuring the Healing alarms |
| 61 | :param nsr_id: Network service record id |
| 62 | """ |
| 63 | log.info("Configuring Healing alarm for NS %s", nsr_id) |
| 64 | alarms_created = [] |
| 65 | database.db.connect() |
| 66 | try: |
| 67 | with database.db.atomic(): |
| 68 | vnfrs = self.db_client.get_vnfrs(nsr_id) |
| 69 | for vnfr in vnfrs: |
| 70 | vnfd = self.db_client.get_vnfd(vnfr['vnfd-id']) |
| 71 | df = vnfd.get("df", [{}])[0] |
| 72 | if "healing-aspect" not in df: |
| 73 | log.info("No healing configuration present in vnfd") |
| 74 | continue |
| 75 | healing_aspects = df["healing-aspect"] |
| 76 | for healing_aspect in healing_aspects: |
| 77 | for healing_policy in healing_aspect.get( |
| 78 | "healing-policy", () |
| 79 | ): |
| 80 | vdu_id = healing_policy['vdu-id'] |
| 81 | for vdur in vnfr["vdur"]: |
| 82 | if vdu_id == vdur["vdu-id-ref"]: |
| 83 | try: |
| 84 | HealingActionRepository.get( |
| 85 | HealingAction.alarm_id == healing_policy['event-name'], |
| 86 | HealingAction.vdur_name == vdur['name'], |
| 87 | HealingAction.nsr_id == nsr_id, |
| 88 | HealingAction.cooldown_time == healing_policy['cooldown-time'], |
| 89 | HealingAction.recovery_action == healing_policy['action-on-recovery'], |
| 90 | HealingAction.vnfinstance_id == vnfr['id'], |
| 91 | HealingAction.vdu_id == healing_policy['vdu-id'], |
| 92 | HealingAction.count_index == vdur['count-index'] |
| 93 | ) |
| 94 | log.debug("vdu %s already has an alarm configured with same id %s", |
| 95 | healing_policy['vdu-id'], healing_policy['event-name']) |
| 96 | continue |
| 97 | except HealingAction.DoesNotExist: |
| 98 | pass |
| 99 | |
| 100 | metric_name = "vm_status" |
| 101 | alarm_uuid = await self.mon_client.create_alarm( |
| 102 | metric_name=metric_name, |
| 103 | ns_id=nsr_id, |
| 104 | vdu_name=vdur['name'], |
| 105 | vnf_member_index=vnfr[ |
| 106 | 'member-vnf-index-ref' |
| 107 | ], |
| 108 | threshold=1, |
| 109 | operation="LT", |
| 110 | statistic="AVERAGE" |
| 111 | ) |
| 112 | alarm = HealingActionRepository.create( |
| 113 | alarm_id=healing_policy['event-name'], |
| 114 | alarm_uuid=alarm_uuid, |
| 115 | nsr_id=nsr_id, |
| 116 | vnf_member_index=vnfr[ |
| 117 | 'member-vnf-index-ref' |
| 118 | ], |
| 119 | vdur_name=vdur['name'], |
| 120 | recovery_action=healing_policy['action-on-recovery'], |
| 121 | cooldown_time=healing_policy['cooldown-time'], |
| 122 | day1=healing_policy['day1'], |
| 123 | vdu_id=healing_policy['vdu-id'], |
| 124 | vnfinstance_id=vnfr['id'], |
| 125 | count_index=vdur['count-index'] |
| 126 | ) |
| 127 | alarms_created.append(alarm) |
| 128 | |
| 129 | except Exception as e: |
| 130 | log.exception("Error configuring VNF alarms:") |
| 131 | if len(alarms_created) > 0: |
| 132 | for alarm in alarms_created: |
| 133 | try: |
| 134 | await self.mon_client.delete_alarm(alarm.nsr_id, |
| 135 | alarm.vnf_member_index, |
| 136 | alarm.vdu_name, |
| 137 | alarm.alarm_uuid) |
| 138 | except ValueError: |
| 139 | log.exception("Error deleting alarm in MON %s", alarm.alarm_uuid) |
| 140 | raise e |
| 141 | finally: |
| 142 | database.db.close() |
| 143 | |
| 144 | async def delete_orphaned_healing_alarms(self, nsr_id): |
| 145 | log.info("Deleting orphaned healing alarms for network service %s", nsr_id) |
| 146 | database.db.connect() |
| 147 | try: |
| 148 | with database.db.atomic(): |
| 149 | for alarm in HealingActionRepository.list( |
| 150 | HealingAction.nsr_id == nsr_id |
| 151 | ): |
| 152 | try: |
| 153 | self.db_client.get_vdur( |
| 154 | nsr_id, |
| 155 | alarm.vnf_member_index, |
| 156 | alarm.vdur_name |
| 157 | ) |
| 158 | except VdurNotFound: |
| 159 | log.info( |
| 160 | "Deleting orphaned alarm %s", |
| 161 | alarm.alarm_uuid |
| 162 | ) |
| 163 | try: |
| 164 | await self.mon_client.delete_alarm( |
| 165 | alarm.nsr_id, |
| 166 | alarm.vnf_member_index, |
| 167 | alarm.vdur_name, |
| 168 | alarm.alarm_uuid |
| 169 | ) |
| 170 | except ValueError: |
| 171 | log.exception( |
| 172 | "Error deleting alarm in MON %s", |
| 173 | alarm.alarm_uuid |
| 174 | ) |
| 175 | alarm.delete_instance() |
| 176 | |
| 177 | except Exception as e: |
| 178 | log.exception("Error deleting orphaned alarms:") |
| 179 | raise e |
| 180 | finally: |
| 181 | database.db.close() |
| 182 | |
| 183 | async def delete_healing_alarms(self, nsr_id): |
| 184 | """ |
| 185 | Deleting the healing alarms |
| 186 | :param nsr_id: Network service record id |
| 187 | """ |
| 188 | log.info("Deleting healing vnf alarms for network service %s", nsr_id) |
| 189 | database.db.connect() |
| 190 | try: |
| 191 | with database.db.atomic(): |
| 192 | for alarm in HealingActionRepository.list( |
| 193 | HealingAction.nsr_id == nsr_id |
| 194 | ): |
| 195 | try: |
| 196 | await self.mon_client.delete_alarm( |
| 197 | alarm.nsr_id, |
| 198 | alarm.vnf_member_index, |
| 199 | alarm.vdur_name, |
| 200 | alarm.alarm_uuid |
| 201 | ) |
| 202 | except ValueError: |
| 203 | log.exception( |
| 204 | "Error deleting alarm in MON %s", |
| 205 | alarm.alarm_uuid |
| 206 | ) |
| 207 | alarm.delete_instance() |
| 208 | |
| 209 | except Exception as e: |
| 210 | log.exception("Error deleting vnf alarms:") |
| 211 | raise e |
| 212 | finally: |
| 213 | database.db.close() |
| 214 | |
| 215 | async def update_alarm_status(self, alarm_uuid: str, status: str): |
| 216 | """ |
| 217 | For updating the alarm status |
| 218 | :param alarm_uuid: vdu uuid |
| 219 | :param status: Status of an alarm |
| 220 | """ |
| 221 | database.db.connect() |
| 222 | try: |
| 223 | with database.db.atomic(): |
| 224 | alarm = HealingActionRepository.get( |
| 225 | HealingAction.alarm_uuid == alarm_uuid |
| 226 | ) |
| 227 | alarm.last_status = status |
| 228 | alarm.save() |
| 229 | except HealingAction.DoesNotExist: |
| 230 | log.debug( |
| 231 | "There is no healing action configured for alarm %s.", alarm_uuid |
| 232 | ) |
| 233 | finally: |
| 234 | database.db.close() |
| 235 | |
| 236 | async def handle_alarm(self, alarm_uuid: str, status: str): |
| 237 | """ |
| 238 | For Handling the healing alarms |
| 239 | :param alarm_uuid: vdu uuid |
| 240 | :param status: Status of an alarm |
| 241 | """ |
| 242 | await self.update_alarm_status(alarm_uuid, status) |
| 243 | database.db.connect() |
| 244 | try: |
| 245 | if status == "alarm": |
| 246 | with database.db.atomic(): |
| 247 | alarm = HealingActionRepository.get( |
| 248 | HealingAction.alarm_uuid == alarm_uuid |
| 249 | ) |
| 250 | vnf_member_index = alarm.vnf_member_index |
| 251 | vdur_name = alarm.vdur_name |
| 252 | vdu_id = alarm.vdu_id |
| 253 | nsr_id = alarm.nsr_id |
| 254 | heal_type = alarm.recovery_action |
| 255 | cooldown_time = alarm.cooldown_time |
| 256 | count_index = alarm.count_index |
| 257 | last_heal = alarm.last_heal |
| 258 | day1 = alarm.day1 |
| 259 | vnfinstance_id = alarm.vnfinstance_id |
| 260 | alarms = HealingActionRepository.list( |
| 261 | HealingAction.vnf_member_index == vnf_member_index, |
| 262 | HealingAction.vdur_name == vdur_name |
| 263 | ) |
| 264 | statuses = [] |
| 265 | |
| 266 | for alarm in alarms: |
| 267 | statuses.append(alarm.last_status) |
| 268 | if ((set(statuses) == {'alarm'}) or ('alarm' in statuses)): |
| 269 | delta = datetime.datetime.now() - last_heal |
| 270 | if delta.total_seconds() > cooldown_time: |
| 271 | await self.lcm_client.heal( |
| 272 | nsr_id, |
| 273 | vnfinstance_id, |
| 274 | vdur_name, |
| 275 | vdu_id, |
| 276 | vnf_member_index, |
| 277 | heal_type, |
| 278 | day1, |
| 279 | count_index |
| 280 | ) |
| 281 | last_heal = datetime.datetime.now() |
| 282 | log.info("datetime.datetime.now %s", datetime.datetime.now) |
| 283 | alarm.last_heal = last_heal |
| 284 | alarm.save() |
| 285 | |
| 286 | except HealingAction.DoesNotExist: |
| 287 | log.info( |
| 288 | "There is no healing action configured for alarm %s.", |
| 289 | alarm_uuid |
| 290 | ) |
| 291 | finally: |
| 292 | database.db.close() |