blob: 7c6739cbe3e174bbaf728f36ff0d2af3bbe7403e [file] [log] [blame]
# -*- coding: utf-8 -*-
# pylint: disable=no-member
# Copyright 2018 Whitestack, LLC
# *************************************************************
# This file is part of OSM Monitoring module
# All Rights Reserved to Whitestack, LLC
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# For those usages not covered by the Apache License, Version 2.0 please
# contact: bdiaz@whitestack.com or glavado@whitestack.com
##
import logging
import datetime
from osm_policy_module.common.common_db_client import CommonDbClient
from osm_policy_module.common.lcm_client import LcmClient
from osm_policy_module.common.mon_client import MonClient
from osm_policy_module.core import database
from osm_policy_module.core.config import Config
from osm_policy_module.core.database import (
HealingAction,
HealingActionRepository,
)
from osm_policy_module.core.exceptions import VdurNotFound
log = logging.getLogger(__name__)
class HealingService:
def __init__(self, config: Config):
"""
Initializing the HealingService
"""
log.info("HealingService Initialized")
self.conf = config
self.db_client = CommonDbClient(config)
self.mon_client = MonClient(config)
self.lcm_client = LcmClient(config)
log.info("Constructor created for HealingService")
async def configure_healing_alarms(self, nsr_id: str):
"""
Configuring the Healing alarms
:param nsr_id: Network service record id
"""
log.info("Configuring Healing alarm for NS %s", nsr_id)
alarms_created = []
database.db.connect()
try:
with database.db.atomic():
vnfrs = self.db_client.get_vnfrs(nsr_id)
for vnfr in vnfrs:
vnfd = self.db_client.get_vnfd(vnfr["vnfd-id"])
df = vnfd.get("df", [{}])[0]
if "healing-aspect" not in df:
log.info("No healing configuration present in vnfd")
continue
healing_aspects = df["healing-aspect"]
for healing_aspect in healing_aspects:
for healing_policy in healing_aspect.get("healing-policy", ()):
vdu_id = healing_policy["vdu-id"]
for vdur in vnfr["vdur"]:
if vdu_id == vdur["vdu-id-ref"]:
try:
HealingActionRepository.get(
HealingAction.alarm_id
== healing_policy["event-name"],
HealingAction.vdur_name == vdur["name"],
HealingAction.nsr_id == nsr_id,
HealingAction.cooldown_time
== healing_policy["cooldown-time"],
HealingAction.recovery_action
== healing_policy["action-on-recovery"],
HealingAction.vnfinstance_id == vnfr["id"],
HealingAction.vdu_id
== healing_policy["vdu-id"],
HealingAction.count_index
== vdur["count-index"],
)
log.debug(
"vdu %s already has an alarm configured with same id %s",
healing_policy["vdu-id"],
healing_policy["event-name"],
)
continue
except HealingAction.DoesNotExist:
pass
metric_name = "vm_status"
alarm_uuid = await self.mon_client.create_alarm(
metric_name=metric_name,
ns_id=nsr_id,
vdu_name=vdur["name"],
vnf_member_index=vnfr["member-vnf-index-ref"],
threshold=1,
operation="LT",
statistic="AVERAGE",
)
alarm = HealingActionRepository.create(
alarm_id=healing_policy["event-name"],
alarm_uuid=alarm_uuid,
nsr_id=nsr_id,
vnf_member_index=vnfr["member-vnf-index-ref"],
vdur_name=vdur["name"],
recovery_action=healing_policy[
"action-on-recovery"
],
cooldown_time=healing_policy["cooldown-time"],
day1=healing_policy["day1"],
vdu_id=healing_policy["vdu-id"],
vnfinstance_id=vnfr["id"],
count_index=vdur["count-index"],
)
alarms_created.append(alarm)
except Exception as e:
log.exception("Error configuring VNF alarms:")
if len(alarms_created) > 0:
for alarm in alarms_created:
try:
await self.mon_client.delete_alarm(
alarm.nsr_id,
alarm.vnf_member_index,
alarm.vdu_name,
alarm.alarm_uuid,
)
except ValueError:
log.exception(
"Error deleting alarm in MON %s", alarm.alarm_uuid
)
raise e
finally:
database.db.close()
async def delete_orphaned_healing_alarms(self, nsr_id):
log.info("Deleting orphaned healing alarms for network service %s", nsr_id)
database.db.connect()
try:
with database.db.atomic():
for alarm in HealingActionRepository.list(
HealingAction.nsr_id == nsr_id
):
try:
self.db_client.get_vdur(
nsr_id, alarm.vnf_member_index, alarm.vdur_name
)
except VdurNotFound:
log.info("Deleting orphaned alarm %s", alarm.alarm_uuid)
try:
await self.mon_client.delete_alarm(
alarm.nsr_id,
alarm.vnf_member_index,
alarm.vdur_name,
alarm.alarm_uuid,
)
except ValueError:
log.exception(
"Error deleting alarm in MON %s", alarm.alarm_uuid
)
alarm.delete_instance()
except Exception as e:
log.exception("Error deleting orphaned alarms:")
raise e
finally:
database.db.close()
async def delete_healing_alarms(self, nsr_id):
"""
Deleting the healing alarms
:param nsr_id: Network service record id
"""
log.info("Deleting healing vnf alarms for network service %s", nsr_id)
database.db.connect()
try:
with database.db.atomic():
for alarm in HealingActionRepository.list(
HealingAction.nsr_id == nsr_id
):
try:
await self.mon_client.delete_alarm(
alarm.nsr_id,
alarm.vnf_member_index,
alarm.vdur_name,
alarm.alarm_uuid,
)
except ValueError:
log.exception(
"Error deleting alarm in MON %s", alarm.alarm_uuid
)
alarm.delete_instance()
except Exception as e:
log.exception("Error deleting vnf alarms:")
raise e
finally:
database.db.close()
async def update_alarm_status(self, alarm_uuid: str, status: str):
"""
For updating the alarm status
:param alarm_uuid: vdu uuid
:param status: Status of an alarm
"""
database.db.connect()
try:
with database.db.atomic():
alarm = HealingActionRepository.get(
HealingAction.alarm_uuid == alarm_uuid
)
alarm.last_status = status
alarm.save()
except HealingAction.DoesNotExist:
log.debug("There is no healing action configured for alarm %s.", alarm_uuid)
finally:
database.db.close()
async def handle_alarm(self, alarm_uuid: str, status: str):
"""
For Handling the healing alarms
:param alarm_uuid: vdu uuid
:param status: Status of an alarm
"""
await self.update_alarm_status(alarm_uuid, status)
database.db.connect()
try:
if status == "alarm":
with database.db.atomic():
alarm = HealingActionRepository.get(
HealingAction.alarm_uuid == alarm_uuid
)
vnf_member_index = alarm.vnf_member_index
vdur_name = alarm.vdur_name
vdu_id = alarm.vdu_id
nsr_id = alarm.nsr_id
heal_type = alarm.recovery_action
cooldown_time = alarm.cooldown_time
count_index = alarm.count_index
last_heal = alarm.last_heal
day1 = alarm.day1
vnfinstance_id = alarm.vnfinstance_id
alarms = HealingActionRepository.list(
HealingAction.vnf_member_index == vnf_member_index,
HealingAction.vdur_name == vdur_name,
)
statuses = []
for alarm in alarms:
statuses.append(alarm.last_status)
if (set(statuses) == {"alarm"}) or ("alarm" in statuses):
delta = datetime.datetime.now() - last_heal
if delta.total_seconds() > cooldown_time:
await self.lcm_client.heal(
nsr_id,
vnfinstance_id,
vdur_name,
vdu_id,
vnf_member_index,
heal_type,
day1,
count_index,
)
last_heal = datetime.datetime.now()
log.info("datetime.datetime.now %s", datetime.datetime.now)
alarm.last_heal = last_heal
alarm.save()
except HealingAction.DoesNotExist:
log.info("There is no healing action configured for alarm %s.", alarm_uuid)
finally:
database.db.close()