Feature 10923: Autohealing
[osm/POL.git] / osm_policy_module / healing / service.py
diff --git a/osm_policy_module/healing/service.py b/osm_policy_module/healing/service.py
new file mode 100644 (file)
index 0000000..8434c66
--- /dev/null
@@ -0,0 +1,292 @@
+# -*- coding: utf-8 -*-
+# pylint: disable=no-member
+
+# Copyright 2018 Whitestack, LLC
+# *************************************************************
+
+# This file is part of OSM Monitoring module
+# All Rights Reserved to Whitestack, LLC
+
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+
+#         http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+# For those usages not covered by the Apache License, Version 2.0 please
+# contact: bdiaz@whitestack.com or glavado@whitestack.com
+##
+import asyncio
+import logging
+import datetime
+
+from osm_policy_module.common.common_db_client import CommonDbClient
+from osm_policy_module.common.lcm_client import LcmClient
+from osm_policy_module.common.mon_client import MonClient
+from osm_policy_module.core import database
+from osm_policy_module.core.config import Config
+from osm_policy_module.core.database import (
+    HealingAction,
+    HealingActionRepository,
+)
+from osm_policy_module.core.exceptions import VdurNotFound
+
+log = logging.getLogger(__name__)
+
+
+class HealingService:
+    def __init__(self, config: Config, loop=None):
+        """
+          Initializing the HealingService
+        """
+        log.info("HealingService Initialized")
+        self.conf = config
+        if not loop:
+            loop = asyncio.get_event_loop()
+        self.loop = loop
+        self.db_client = CommonDbClient(config)
+        self.mon_client = MonClient(config, loop=self.loop)
+        self.lcm_client = LcmClient(config, loop=self.loop)
+        log.info("Constructor created for HealingService")
+
+    async def configure_healing_alarms(self, nsr_id: str):
+        """
+           Configuring the Healing alarms
+           :param nsr_id: Network service record id
+        """
+        log.info("Configuring Healing alarm for NS %s", nsr_id)
+        alarms_created = []
+        database.db.connect()
+        try:
+            with database.db.atomic():
+                vnfrs = self.db_client.get_vnfrs(nsr_id)
+                for vnfr in vnfrs:
+                    vnfd = self.db_client.get_vnfd(vnfr['vnfd-id'])
+                    df = vnfd.get("df", [{}])[0]
+                    if "healing-aspect" not in df:
+                        log.info("No healing configuration present in vnfd")
+                        continue
+                    healing_aspects = df["healing-aspect"]
+                    for healing_aspect in healing_aspects:
+                        for healing_policy in healing_aspect.get(
+                            "healing-policy", ()
+                        ):
+                            vdu_id = healing_policy['vdu-id']
+                            for vdur in vnfr["vdur"]:
+                                if vdu_id == vdur["vdu-id-ref"]:
+                                    try:
+                                        HealingActionRepository.get(
+                                            HealingAction.alarm_id == healing_policy['event-name'],
+                                            HealingAction.vdur_name == vdur['name'],
+                                            HealingAction.nsr_id == nsr_id,
+                                            HealingAction.cooldown_time == healing_policy['cooldown-time'],
+                                            HealingAction.recovery_action == healing_policy['action-on-recovery'],
+                                            HealingAction.vnfinstance_id == vnfr['id'],
+                                            HealingAction.vdu_id == healing_policy['vdu-id'],
+                                            HealingAction.count_index == vdur['count-index']
+                                        )
+                                        log.debug("vdu %s already has an alarm configured with same id %s",
+                                                  healing_policy['vdu-id'], healing_policy['event-name'])
+                                        continue
+                                    except HealingAction.DoesNotExist:
+                                        pass
+
+                                    metric_name = "vm_status"
+                                    alarm_uuid = await self.mon_client.create_alarm(
+                                        metric_name=metric_name,
+                                        ns_id=nsr_id,
+                                        vdu_name=vdur['name'],
+                                        vnf_member_index=vnfr[
+                                            'member-vnf-index-ref'
+                                        ],
+                                        threshold=1,
+                                        operation="LT",
+                                        statistic="AVERAGE"
+                                    )
+                                    alarm = HealingActionRepository.create(
+                                        alarm_id=healing_policy['event-name'],
+                                        alarm_uuid=alarm_uuid,
+                                        nsr_id=nsr_id,
+                                        vnf_member_index=vnfr[
+                                            'member-vnf-index-ref'
+                                        ],
+                                        vdur_name=vdur['name'],
+                                        recovery_action=healing_policy['action-on-recovery'],
+                                        cooldown_time=healing_policy['cooldown-time'],
+                                        day1=healing_policy['day1'],
+                                        vdu_id=healing_policy['vdu-id'],
+                                        vnfinstance_id=vnfr['id'],
+                                        count_index=vdur['count-index']
+                                    )
+                                    alarms_created.append(alarm)
+
+        except Exception as e:
+            log.exception("Error configuring VNF alarms:")
+            if len(alarms_created) > 0:
+                for alarm in alarms_created:
+                    try:
+                        await self.mon_client.delete_alarm(alarm.nsr_id,
+                                                           alarm.vnf_member_index,
+                                                           alarm.vdu_name,
+                                                           alarm.alarm_uuid)
+                    except ValueError:
+                        log.exception("Error deleting alarm in MON %s", alarm.alarm_uuid)
+            raise e
+        finally:
+            database.db.close()
+
+    async def delete_orphaned_healing_alarms(self, nsr_id):
+        log.info("Deleting orphaned healing alarms for network service %s", nsr_id)
+        database.db.connect()
+        try:
+            with database.db.atomic():
+                for alarm in HealingActionRepository.list(
+                    HealingAction.nsr_id == nsr_id
+                ):
+                    try:
+                        self.db_client.get_vdur(
+                            nsr_id,
+                            alarm.vnf_member_index,
+                            alarm.vdur_name
+                        )
+                    except VdurNotFound:
+                        log.info(
+                            "Deleting orphaned alarm %s",
+                            alarm.alarm_uuid
+                        )
+                        try:
+                            await self.mon_client.delete_alarm(
+                                alarm.nsr_id,
+                                alarm.vnf_member_index,
+                                alarm.vdur_name,
+                                alarm.alarm_uuid
+                            )
+                        except ValueError:
+                            log.exception(
+                                "Error deleting alarm in MON %s",
+                                alarm.alarm_uuid
+                            )
+                        alarm.delete_instance()
+
+        except Exception as e:
+            log.exception("Error deleting orphaned alarms:")
+            raise e
+        finally:
+            database.db.close()
+
+    async def delete_healing_alarms(self, nsr_id):
+        """
+            Deleting the healing alarms
+            :param nsr_id: Network service record id
+        """
+        log.info("Deleting healing vnf alarms for network service %s", nsr_id)
+        database.db.connect()
+        try:
+            with database.db.atomic():
+                for alarm in HealingActionRepository.list(
+                    HealingAction.nsr_id == nsr_id
+                ):
+                    try:
+                        await self.mon_client.delete_alarm(
+                            alarm.nsr_id,
+                            alarm.vnf_member_index,
+                            alarm.vdur_name,
+                            alarm.alarm_uuid
+                        )
+                    except ValueError:
+                        log.exception(
+                            "Error deleting alarm in MON %s",
+                            alarm.alarm_uuid
+                        )
+                    alarm.delete_instance()
+
+        except Exception as e:
+            log.exception("Error deleting vnf alarms:")
+            raise e
+        finally:
+            database.db.close()
+
+    async def update_alarm_status(self, alarm_uuid: str, status: str):
+        """
+          For updating the alarm status
+          :param alarm_uuid: vdu uuid
+          :param status: Status of an alarm
+        """
+        database.db.connect()
+        try:
+            with database.db.atomic():
+                alarm = HealingActionRepository.get(
+                    HealingAction.alarm_uuid == alarm_uuid
+                )
+                alarm.last_status = status
+                alarm.save()
+        except HealingAction.DoesNotExist:
+            log.debug(
+                "There is no healing action configured for alarm %s.", alarm_uuid
+            )
+        finally:
+            database.db.close()
+
+    async def handle_alarm(self, alarm_uuid: str, status: str):
+        """
+          For Handling the healing alarms
+          :param alarm_uuid: vdu uuid
+          :param status: Status of an alarm
+        """
+        await self.update_alarm_status(alarm_uuid, status)
+        database.db.connect()
+        try:
+            if status == "alarm":
+                with database.db.atomic():
+                    alarm = HealingActionRepository.get(
+                        HealingAction.alarm_uuid == alarm_uuid
+                    )
+                    vnf_member_index = alarm.vnf_member_index
+                    vdur_name = alarm.vdur_name
+                    vdu_id = alarm.vdu_id
+                    nsr_id = alarm.nsr_id
+                    heal_type = alarm.recovery_action
+                    cooldown_time = alarm.cooldown_time
+                    count_index = alarm.count_index
+                    last_heal = alarm.last_heal
+                    day1 = alarm.day1
+                    vnfinstance_id = alarm.vnfinstance_id
+                    alarms = HealingActionRepository.list(
+                        HealingAction.vnf_member_index == vnf_member_index,
+                        HealingAction.vdur_name == vdur_name
+                    )
+                    statuses = []
+
+                    for alarm in alarms:
+                        statuses.append(alarm.last_status)
+                    if ((set(statuses) == {'alarm'}) or ('alarm' in statuses)):
+                        delta = datetime.datetime.now() - last_heal
+                        if delta.total_seconds() > cooldown_time:
+                            await self.lcm_client.heal(
+                                nsr_id,
+                                vnfinstance_id,
+                                vdur_name,
+                                vdu_id,
+                                vnf_member_index,
+                                heal_type,
+                                day1,
+                                count_index
+                            )
+                            last_heal = datetime.datetime.now()
+                            log.info("datetime.datetime.now %s", datetime.datetime.now)
+                            alarm.last_heal = last_heal
+                            alarm.save()
+
+        except HealingAction.DoesNotExist:
+            log.info(
+                "There is no healing action configured for alarm %s.",
+                alarm_uuid
+            )
+        finally:
+            database.db.close()