X-Git-Url: https://osm.etsi.org/gitweb/?a=blobdiff_plain;f=osm_policy_module%2Fhealing%2Fservice.py;fp=osm_policy_module%2Fhealing%2Fservice.py;h=8434c663b8595ce1f7063849fa4c675b0354b6ae;hb=7ef2b88a9c0c6ead16861411584d017e1fe216d7;hp=0000000000000000000000000000000000000000;hpb=cb5642a2af495af4319beb1dba2d417b78f3200a;p=osm%2FPOL.git diff --git a/osm_policy_module/healing/service.py b/osm_policy_module/healing/service.py new file mode 100644 index 0000000..8434c66 --- /dev/null +++ b/osm_policy_module/healing/service.py @@ -0,0 +1,292 @@ +# -*- coding: utf-8 -*- +# pylint: disable=no-member + +# Copyright 2018 Whitestack, LLC +# ************************************************************* + +# This file is part of OSM Monitoring module +# All Rights Reserved to Whitestack, LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# For those usages not covered by the Apache License, Version 2.0 please +# contact: bdiaz@whitestack.com or glavado@whitestack.com +## +import asyncio +import logging +import datetime + +from osm_policy_module.common.common_db_client import CommonDbClient +from osm_policy_module.common.lcm_client import LcmClient +from osm_policy_module.common.mon_client import MonClient +from osm_policy_module.core import database +from osm_policy_module.core.config import Config +from osm_policy_module.core.database import ( + HealingAction, + HealingActionRepository, +) +from osm_policy_module.core.exceptions import VdurNotFound + +log = logging.getLogger(__name__) + + +class HealingService: + def __init__(self, config: Config, loop=None): + """ + Initializing the HealingService + """ + log.info("HealingService Initialized") + self.conf = config + if not loop: + loop = asyncio.get_event_loop() + self.loop = loop + self.db_client = CommonDbClient(config) + self.mon_client = MonClient(config, loop=self.loop) + self.lcm_client = LcmClient(config, loop=self.loop) + log.info("Constructor created for HealingService") + + async def configure_healing_alarms(self, nsr_id: str): + """ + Configuring the Healing alarms + :param nsr_id: Network service record id + """ + log.info("Configuring Healing alarm for NS %s", nsr_id) + alarms_created = [] + database.db.connect() + try: + with database.db.atomic(): + vnfrs = self.db_client.get_vnfrs(nsr_id) + for vnfr in vnfrs: + vnfd = self.db_client.get_vnfd(vnfr['vnfd-id']) + df = vnfd.get("df", [{}])[0] + if "healing-aspect" not in df: + log.info("No healing configuration present in vnfd") + continue + healing_aspects = df["healing-aspect"] + for healing_aspect in healing_aspects: + for healing_policy in healing_aspect.get( + "healing-policy", () + ): + vdu_id = healing_policy['vdu-id'] + for vdur in vnfr["vdur"]: + if vdu_id == vdur["vdu-id-ref"]: + try: + HealingActionRepository.get( + HealingAction.alarm_id == healing_policy['event-name'], + HealingAction.vdur_name == vdur['name'], + HealingAction.nsr_id == nsr_id, + HealingAction.cooldown_time == healing_policy['cooldown-time'], + HealingAction.recovery_action == healing_policy['action-on-recovery'], + HealingAction.vnfinstance_id == vnfr['id'], + HealingAction.vdu_id == healing_policy['vdu-id'], + HealingAction.count_index == vdur['count-index'] + ) + log.debug("vdu %s already has an alarm configured with same id %s", + healing_policy['vdu-id'], healing_policy['event-name']) + continue + except HealingAction.DoesNotExist: + pass + + metric_name = "vm_status" + alarm_uuid = await self.mon_client.create_alarm( + metric_name=metric_name, + ns_id=nsr_id, + vdu_name=vdur['name'], + vnf_member_index=vnfr[ + 'member-vnf-index-ref' + ], + threshold=1, + operation="LT", + statistic="AVERAGE" + ) + alarm = HealingActionRepository.create( + alarm_id=healing_policy['event-name'], + alarm_uuid=alarm_uuid, + nsr_id=nsr_id, + vnf_member_index=vnfr[ + 'member-vnf-index-ref' + ], + vdur_name=vdur['name'], + recovery_action=healing_policy['action-on-recovery'], + cooldown_time=healing_policy['cooldown-time'], + day1=healing_policy['day1'], + vdu_id=healing_policy['vdu-id'], + vnfinstance_id=vnfr['id'], + count_index=vdur['count-index'] + ) + alarms_created.append(alarm) + + except Exception as e: + log.exception("Error configuring VNF alarms:") + if len(alarms_created) > 0: + for alarm in alarms_created: + try: + await self.mon_client.delete_alarm(alarm.nsr_id, + alarm.vnf_member_index, + alarm.vdu_name, + alarm.alarm_uuid) + except ValueError: + log.exception("Error deleting alarm in MON %s", alarm.alarm_uuid) + raise e + finally: + database.db.close() + + async def delete_orphaned_healing_alarms(self, nsr_id): + log.info("Deleting orphaned healing alarms for network service %s", nsr_id) + database.db.connect() + try: + with database.db.atomic(): + for alarm in HealingActionRepository.list( + HealingAction.nsr_id == nsr_id + ): + try: + self.db_client.get_vdur( + nsr_id, + alarm.vnf_member_index, + alarm.vdur_name + ) + except VdurNotFound: + log.info( + "Deleting orphaned alarm %s", + alarm.alarm_uuid + ) + try: + await self.mon_client.delete_alarm( + alarm.nsr_id, + alarm.vnf_member_index, + alarm.vdur_name, + alarm.alarm_uuid + ) + except ValueError: + log.exception( + "Error deleting alarm in MON %s", + alarm.alarm_uuid + ) + alarm.delete_instance() + + except Exception as e: + log.exception("Error deleting orphaned alarms:") + raise e + finally: + database.db.close() + + async def delete_healing_alarms(self, nsr_id): + """ + Deleting the healing alarms + :param nsr_id: Network service record id + """ + log.info("Deleting healing vnf alarms for network service %s", nsr_id) + database.db.connect() + try: + with database.db.atomic(): + for alarm in HealingActionRepository.list( + HealingAction.nsr_id == nsr_id + ): + try: + await self.mon_client.delete_alarm( + alarm.nsr_id, + alarm.vnf_member_index, + alarm.vdur_name, + alarm.alarm_uuid + ) + except ValueError: + log.exception( + "Error deleting alarm in MON %s", + alarm.alarm_uuid + ) + alarm.delete_instance() + + except Exception as e: + log.exception("Error deleting vnf alarms:") + raise e + finally: + database.db.close() + + async def update_alarm_status(self, alarm_uuid: str, status: str): + """ + For updating the alarm status + :param alarm_uuid: vdu uuid + :param status: Status of an alarm + """ + database.db.connect() + try: + with database.db.atomic(): + alarm = HealingActionRepository.get( + HealingAction.alarm_uuid == alarm_uuid + ) + alarm.last_status = status + alarm.save() + except HealingAction.DoesNotExist: + log.debug( + "There is no healing action configured for alarm %s.", alarm_uuid + ) + finally: + database.db.close() + + async def handle_alarm(self, alarm_uuid: str, status: str): + """ + For Handling the healing alarms + :param alarm_uuid: vdu uuid + :param status: Status of an alarm + """ + await self.update_alarm_status(alarm_uuid, status) + database.db.connect() + try: + if status == "alarm": + with database.db.atomic(): + alarm = HealingActionRepository.get( + HealingAction.alarm_uuid == alarm_uuid + ) + vnf_member_index = alarm.vnf_member_index + vdur_name = alarm.vdur_name + vdu_id = alarm.vdu_id + nsr_id = alarm.nsr_id + heal_type = alarm.recovery_action + cooldown_time = alarm.cooldown_time + count_index = alarm.count_index + last_heal = alarm.last_heal + day1 = alarm.day1 + vnfinstance_id = alarm.vnfinstance_id + alarms = HealingActionRepository.list( + HealingAction.vnf_member_index == vnf_member_index, + HealingAction.vdur_name == vdur_name + ) + statuses = [] + + for alarm in alarms: + statuses.append(alarm.last_status) + if ((set(statuses) == {'alarm'}) or ('alarm' in statuses)): + delta = datetime.datetime.now() - last_heal + if delta.total_seconds() > cooldown_time: + await self.lcm_client.heal( + nsr_id, + vnfinstance_id, + vdur_name, + vdu_id, + vnf_member_index, + heal_type, + day1, + count_index + ) + last_heal = datetime.datetime.now() + log.info("datetime.datetime.now %s", datetime.datetime.now) + alarm.last_heal = last_heal + alarm.save() + + except HealingAction.DoesNotExist: + log.info( + "There is no healing action configured for alarm %s.", + alarm_uuid + ) + finally: + database.db.close()