Feature 10923: Autohealing 33/11933/6
authorsritharan <priyadarshini@tataelxsi.co.in>
Mon, 25 Apr 2022 12:37:55 +0000 (12:37 +0000)
committergarciadeblas <gerardo.garciadeblas@telefonica.com>
Wed, 15 Jun 2022 17:38:43 +0000 (19:38 +0200)
Change-Id: I898ee9a904306e94c130140831ece04e9bf7dbac
Signed-off-by: sritharan <priyadarshini@tataelxsi.co.in>
osm_policy_module/common/lcm_client.py
osm_policy_module/core/agent.py
osm_policy_module/core/database.py
osm_policy_module/healing/__init__.py [new file with mode: 0644]
osm_policy_module/healing/service.py [new file with mode: 0644]
osm_policy_module/migrations/006_add_heal_alarm.py [new file with mode: 0644]
osm_policy_module/tests/integration/test_policy_agent.py
osm_policy_module/tests/unit/core/test_policy_agent.py
osm_policy_module/tests/unit/healing/test_healing_service.py [new file with mode: 0644]

index 5085a86..102b0dc 100644 (file)
@@ -131,3 +131,116 @@ class LcmClient:
             },
         }
         return nslcmop
+
+    async def heal(
+            self, nsr_id: str, vnfinstance_id: str, vdur_name: str, vdu_id: str,
+            vnf_member_index: str, heal_type: str, day1: bool, count_index: int):
+        """
+        Sends healing action to LCM through the message bus.
+
+        param nsr_id: Network service record id
+        param vdu_id: Scaling vdu id
+        param vnf_member_index: VNF member index
+        param heal_type: healing action to be executed. Valid values: restart,respawn
+        param day1: To run day1 operations
+        param cause: cause of healing
+        return
+        """
+        log.debug(
+            "heal %s %s %s %s %s %s %s %s",
+            nsr_id,
+            vnfinstance_id,
+            vdur_name,
+            vdu_id,
+            vnf_member_index,
+            heal_type,
+            day1,
+            count_index,
+        )
+        nsr = self.db_client.get_nsr(nsr_id)
+        nslcmop = self._generate_nslcmop_heal(
+            nsr_id, vnfinstance_id, vdur_name, vdu_id, vnf_member_index, heal_type, day1,
+            count_index, nsr['_admin']
+        )
+        self.db_client.create_nslcmop(nslcmop)
+        log.debug("Sending heal action message: %s", json.dumps(nslcmop))
+        await self.msg_bus.aiowrite("ns", "heal", nslcmop)
+
+    def _generate_nslcmop_heal(
+        self,
+        nsr_id: str,
+        vnfinstance_id: str,
+        vdur_name: str,
+        vdu_id: str,
+        vnf_member_index: str,
+        heal_type: str,
+        day1: bool,
+        count_index: int,
+        admin: dict,
+    ):
+        """
+        Builds healing nslcmop.
+        param nsr_id: Network service record id
+        param vnf_member_index: VNF member index
+        param action: healing action to be executed. Valid values: restart, respawn
+        param admin: Dict corresponding to the _admin section of the nsr. Required keys: projects_read, projects_write.
+        return:
+        """
+        log.debug(
+            "_generate_nslcmop_heal %s %s %s %s %s %s %s %s %s",
+            nsr_id,
+            vnfinstance_id,
+            vdur_name,
+            vdu_id,
+            vnf_member_index,
+            heal_type,
+            day1,
+            count_index,
+            admin,
+        )
+        _id = str(uuid.uuid4())
+        now = time.time()
+        params = {
+            "lcmOperationType": "heal",
+            "nsInstanceId": nsr_id,
+            "healVnfData": [
+                {
+                    "vnfInstanceId": vnfinstance_id,
+                    "cause": "default",
+                    "additionalParams": {
+                        "run-day1": day1,
+                        "vdu": [
+                            {
+                                "run-day1": day1,
+                                "count-index": count_index,
+                                "vdu-id": vdu_id
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+
+        nslcmop = {
+            "id": _id,
+            "_id": _id,
+            "operationState": "PROCESSING",
+            "statusEnteredTime": now,
+            "nsInstanceId": nsr_id,
+            "member-vnf-index": vnf_member_index,
+            "lcmOperationType": "heal",
+            "startTime": now,
+            "location": "default",
+            "isAutomaticInvocation": True,
+            "operationParams": params,
+            "isCancelPending": False,
+            "links": {
+                "self": "/osm/nslcm/v1/ns_lcm_op_occs/" + _id,
+                "nsInstance": "/osm/nslcm/v1/ns_instances/" + nsr_id,
+            },
+            "_admin": {
+                "projects_read": admin['projects_read'],
+                "projects_write": admin['projects_write']
+            }
+        }
+        return nslcmop
index 777723b..9867c0c 100644 (file)
@@ -30,6 +30,7 @@ import peewee
 
 from osm_policy_module.alarming.service import AlarmingService
 from osm_policy_module.autoscaling.service import AutoscalingService
+from osm_policy_module.healing.service import HealingService
 from osm_policy_module.common.common_db_client import CommonDbClient
 from osm_policy_module.common.message_bus_client import MessageBusClient
 from osm_policy_module.core.config import Config
@@ -49,6 +50,7 @@ class PolicyModuleAgent:
         self.db_client = CommonDbClient(config)
         self.autoscaling_service = AutoscalingService(config, loop)
         self.alarming_service = AlarmingService(config, loop)
+        self.healing_service = HealingService(config, loop)
 
     def run(self):
         self.loop.run_until_complete(self.start())
@@ -98,6 +100,7 @@ class PolicyModuleAgent:
         status = content["notify_details"]["status"]
         await self.autoscaling_service.handle_alarm(alarm_uuid, status)
         await self.alarming_service.handle_alarm(alarm_uuid, status, content)
+        await self.healing_service.handle_alarm(alarm_uuid, status)
 
     async def _handle_instantiated(self, content):
         log.debug("_handle_instantiated: %s", content)
@@ -111,6 +114,7 @@ class PolicyModuleAgent:
             log.info("Configuring nsr_id: %s", nsr_id)
             await self.autoscaling_service.configure_scaling_groups(nsr_id)
             await self.alarming_service.configure_vnf_alarms(nsr_id)
+            await self.healing_service.configure_healing_alarms(nsr_id)
         else:
             log.info(
                 "Network_service is not in COMPLETED or PARTIALLY_COMPLETED state. "
@@ -131,6 +135,8 @@ class PolicyModuleAgent:
             await self.autoscaling_service.configure_scaling_groups(nsr_id)
             await self.autoscaling_service.delete_orphaned_alarms(nsr_id)
             await self.alarming_service.configure_vnf_alarms(nsr_id)
+            await self.healing_service.configure_healing_alarms(nsr_id)
+            await self.healing_service.delete_orphaned_healing_alarms(nsr_id)
         else:
             log.debug(
                 "Network service is not in COMPLETED or PARTIALLY_COMPLETED state. "
@@ -151,6 +157,7 @@ class PolicyModuleAgent:
             )
             await self.autoscaling_service.delete_scaling_groups(nsr_id)
             await self.alarming_service.delete_vnf_alarms(nsr_id)
+            await self.healing_service.delete_healing_alarms(nsr_id)
         else:
             log.info(
                 "Network service is not in COMPLETED or PARTIALLY_COMPLETED state. "
index b7a0b4f..e596d74 100644 (file)
@@ -110,6 +110,22 @@ class AlarmAction(BaseModel):
     alarm = ForeignKeyField(VnfAlarm, related_name="actions", on_delete="CASCADE")
 
 
+class HealingAction(BaseModel):
+    alarm_id = CharField()
+    recovery_action = CharField()
+    alarm_uuid = CharField(unique=True)
+    nsr_id = CharField()
+    vnfinstance_id = CharField()
+    vnf_member_index = CharField()
+    vdur_name = CharField()
+    vdu_id = CharField()
+    cooldown_time = IntegerField()
+    count_index = IntegerField()
+    last_heal = DateTimeField(default=datetime.datetime.now)
+    last_status = CharField(default='insufficient-data')
+    day1 = BooleanField(default=False)
+
+
 class DatabaseManager:
     def __init__(self, config: Config):
         db.initialize(connect(config.get("sql", "database_uri")))
@@ -224,3 +240,18 @@ class AlarmActionRepository:
     @staticmethod
     def create(**query) -> AlarmAction:
         return AlarmAction.create(**query)
+
+
+class HealingActionRepository:
+    @staticmethod
+    def list(*expressions) -> Iterable[HealingAction]:
+        log.info("### Printing healing action db alarm {}".format(HealingAction.select().where(*expressions)))
+        return HealingAction.select().where(*expressions)
+
+    @staticmethod
+    def get(*expressions) -> HealingAction:
+        return HealingAction.select().where(*expressions).get()
+
+    @staticmethod
+    def create(**query) -> HealingAction:
+        return HealingAction.create(**query)
diff --git a/osm_policy_module/healing/__init__.py b/osm_policy_module/healing/__init__.py
new file mode 100644 (file)
index 0000000..d81308a
--- /dev/null
@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2018 Whitestack, LLC
+# *************************************************************
+
+# This file is part of OSM Monitoring module
+# All Rights Reserved to Whitestack, LLC
+
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+
+#         http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+# For those usages not covered by the Apache License, Version 2.0 please
+# contact: bdiaz@whitestack.com or glavado@whitestack.com
+##
diff --git a/osm_policy_module/healing/service.py b/osm_policy_module/healing/service.py
new file mode 100644 (file)
index 0000000..8434c66
--- /dev/null
@@ -0,0 +1,292 @@
+# -*- coding: utf-8 -*-
+# pylint: disable=no-member
+
+# Copyright 2018 Whitestack, LLC
+# *************************************************************
+
+# This file is part of OSM Monitoring module
+# All Rights Reserved to Whitestack, LLC
+
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+
+#         http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+# For those usages not covered by the Apache License, Version 2.0 please
+# contact: bdiaz@whitestack.com or glavado@whitestack.com
+##
+import asyncio
+import logging
+import datetime
+
+from osm_policy_module.common.common_db_client import CommonDbClient
+from osm_policy_module.common.lcm_client import LcmClient
+from osm_policy_module.common.mon_client import MonClient
+from osm_policy_module.core import database
+from osm_policy_module.core.config import Config
+from osm_policy_module.core.database import (
+    HealingAction,
+    HealingActionRepository,
+)
+from osm_policy_module.core.exceptions import VdurNotFound
+
+log = logging.getLogger(__name__)
+
+
+class HealingService:
+    def __init__(self, config: Config, loop=None):
+        """
+          Initializing the HealingService
+        """
+        log.info("HealingService Initialized")
+        self.conf = config
+        if not loop:
+            loop = asyncio.get_event_loop()
+        self.loop = loop
+        self.db_client = CommonDbClient(config)
+        self.mon_client = MonClient(config, loop=self.loop)
+        self.lcm_client = LcmClient(config, loop=self.loop)
+        log.info("Constructor created for HealingService")
+
+    async def configure_healing_alarms(self, nsr_id: str):
+        """
+           Configuring the Healing alarms
+           :param nsr_id: Network service record id
+        """
+        log.info("Configuring Healing alarm for NS %s", nsr_id)
+        alarms_created = []
+        database.db.connect()
+        try:
+            with database.db.atomic():
+                vnfrs = self.db_client.get_vnfrs(nsr_id)
+                for vnfr in vnfrs:
+                    vnfd = self.db_client.get_vnfd(vnfr['vnfd-id'])
+                    df = vnfd.get("df", [{}])[0]
+                    if "healing-aspect" not in df:
+                        log.info("No healing configuration present in vnfd")
+                        continue
+                    healing_aspects = df["healing-aspect"]
+                    for healing_aspect in healing_aspects:
+                        for healing_policy in healing_aspect.get(
+                            "healing-policy", ()
+                        ):
+                            vdu_id = healing_policy['vdu-id']
+                            for vdur in vnfr["vdur"]:
+                                if vdu_id == vdur["vdu-id-ref"]:
+                                    try:
+                                        HealingActionRepository.get(
+                                            HealingAction.alarm_id == healing_policy['event-name'],
+                                            HealingAction.vdur_name == vdur['name'],
+                                            HealingAction.nsr_id == nsr_id,
+                                            HealingAction.cooldown_time == healing_policy['cooldown-time'],
+                                            HealingAction.recovery_action == healing_policy['action-on-recovery'],
+                                            HealingAction.vnfinstance_id == vnfr['id'],
+                                            HealingAction.vdu_id == healing_policy['vdu-id'],
+                                            HealingAction.count_index == vdur['count-index']
+                                        )
+                                        log.debug("vdu %s already has an alarm configured with same id %s",
+                                                  healing_policy['vdu-id'], healing_policy['event-name'])
+                                        continue
+                                    except HealingAction.DoesNotExist:
+                                        pass
+
+                                    metric_name = "vm_status"
+                                    alarm_uuid = await self.mon_client.create_alarm(
+                                        metric_name=metric_name,
+                                        ns_id=nsr_id,
+                                        vdu_name=vdur['name'],
+                                        vnf_member_index=vnfr[
+                                            'member-vnf-index-ref'
+                                        ],
+                                        threshold=1,
+                                        operation="LT",
+                                        statistic="AVERAGE"
+                                    )
+                                    alarm = HealingActionRepository.create(
+                                        alarm_id=healing_policy['event-name'],
+                                        alarm_uuid=alarm_uuid,
+                                        nsr_id=nsr_id,
+                                        vnf_member_index=vnfr[
+                                            'member-vnf-index-ref'
+                                        ],
+                                        vdur_name=vdur['name'],
+                                        recovery_action=healing_policy['action-on-recovery'],
+                                        cooldown_time=healing_policy['cooldown-time'],
+                                        day1=healing_policy['day1'],
+                                        vdu_id=healing_policy['vdu-id'],
+                                        vnfinstance_id=vnfr['id'],
+                                        count_index=vdur['count-index']
+                                    )
+                                    alarms_created.append(alarm)
+
+        except Exception as e:
+            log.exception("Error configuring VNF alarms:")
+            if len(alarms_created) > 0:
+                for alarm in alarms_created:
+                    try:
+                        await self.mon_client.delete_alarm(alarm.nsr_id,
+                                                           alarm.vnf_member_index,
+                                                           alarm.vdu_name,
+                                                           alarm.alarm_uuid)
+                    except ValueError:
+                        log.exception("Error deleting alarm in MON %s", alarm.alarm_uuid)
+            raise e
+        finally:
+            database.db.close()
+
+    async def delete_orphaned_healing_alarms(self, nsr_id):
+        log.info("Deleting orphaned healing alarms for network service %s", nsr_id)
+        database.db.connect()
+        try:
+            with database.db.atomic():
+                for alarm in HealingActionRepository.list(
+                    HealingAction.nsr_id == nsr_id
+                ):
+                    try:
+                        self.db_client.get_vdur(
+                            nsr_id,
+                            alarm.vnf_member_index,
+                            alarm.vdur_name
+                        )
+                    except VdurNotFound:
+                        log.info(
+                            "Deleting orphaned alarm %s",
+                            alarm.alarm_uuid
+                        )
+                        try:
+                            await self.mon_client.delete_alarm(
+                                alarm.nsr_id,
+                                alarm.vnf_member_index,
+                                alarm.vdur_name,
+                                alarm.alarm_uuid
+                            )
+                        except ValueError:
+                            log.exception(
+                                "Error deleting alarm in MON %s",
+                                alarm.alarm_uuid
+                            )
+                        alarm.delete_instance()
+
+        except Exception as e:
+            log.exception("Error deleting orphaned alarms:")
+            raise e
+        finally:
+            database.db.close()
+
+    async def delete_healing_alarms(self, nsr_id):
+        """
+            Deleting the healing alarms
+            :param nsr_id: Network service record id
+        """
+        log.info("Deleting healing vnf alarms for network service %s", nsr_id)
+        database.db.connect()
+        try:
+            with database.db.atomic():
+                for alarm in HealingActionRepository.list(
+                    HealingAction.nsr_id == nsr_id
+                ):
+                    try:
+                        await self.mon_client.delete_alarm(
+                            alarm.nsr_id,
+                            alarm.vnf_member_index,
+                            alarm.vdur_name,
+                            alarm.alarm_uuid
+                        )
+                    except ValueError:
+                        log.exception(
+                            "Error deleting alarm in MON %s",
+                            alarm.alarm_uuid
+                        )
+                    alarm.delete_instance()
+
+        except Exception as e:
+            log.exception("Error deleting vnf alarms:")
+            raise e
+        finally:
+            database.db.close()
+
+    async def update_alarm_status(self, alarm_uuid: str, status: str):
+        """
+          For updating the alarm status
+          :param alarm_uuid: vdu uuid
+          :param status: Status of an alarm
+        """
+        database.db.connect()
+        try:
+            with database.db.atomic():
+                alarm = HealingActionRepository.get(
+                    HealingAction.alarm_uuid == alarm_uuid
+                )
+                alarm.last_status = status
+                alarm.save()
+        except HealingAction.DoesNotExist:
+            log.debug(
+                "There is no healing action configured for alarm %s.", alarm_uuid
+            )
+        finally:
+            database.db.close()
+
+    async def handle_alarm(self, alarm_uuid: str, status: str):
+        """
+          For Handling the healing alarms
+          :param alarm_uuid: vdu uuid
+          :param status: Status of an alarm
+        """
+        await self.update_alarm_status(alarm_uuid, status)
+        database.db.connect()
+        try:
+            if status == "alarm":
+                with database.db.atomic():
+                    alarm = HealingActionRepository.get(
+                        HealingAction.alarm_uuid == alarm_uuid
+                    )
+                    vnf_member_index = alarm.vnf_member_index
+                    vdur_name = alarm.vdur_name
+                    vdu_id = alarm.vdu_id
+                    nsr_id = alarm.nsr_id
+                    heal_type = alarm.recovery_action
+                    cooldown_time = alarm.cooldown_time
+                    count_index = alarm.count_index
+                    last_heal = alarm.last_heal
+                    day1 = alarm.day1
+                    vnfinstance_id = alarm.vnfinstance_id
+                    alarms = HealingActionRepository.list(
+                        HealingAction.vnf_member_index == vnf_member_index,
+                        HealingAction.vdur_name == vdur_name
+                    )
+                    statuses = []
+
+                    for alarm in alarms:
+                        statuses.append(alarm.last_status)
+                    if ((set(statuses) == {'alarm'}) or ('alarm' in statuses)):
+                        delta = datetime.datetime.now() - last_heal
+                        if delta.total_seconds() > cooldown_time:
+                            await self.lcm_client.heal(
+                                nsr_id,
+                                vnfinstance_id,
+                                vdur_name,
+                                vdu_id,
+                                vnf_member_index,
+                                heal_type,
+                                day1,
+                                count_index
+                            )
+                            last_heal = datetime.datetime.now()
+                            log.info("datetime.datetime.now %s", datetime.datetime.now)
+                            alarm.last_heal = last_heal
+                            alarm.save()
+
+        except HealingAction.DoesNotExist:
+            log.info(
+                "There is no healing action configured for alarm %s.",
+                alarm_uuid
+            )
+        finally:
+            database.db.close()
diff --git a/osm_policy_module/migrations/006_add_heal_alarm.py b/osm_policy_module/migrations/006_add_heal_alarm.py
new file mode 100644 (file)
index 0000000..b44f42d
--- /dev/null
@@ -0,0 +1,79 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2018 Whitestack, LLC
+# *************************************************************
+
+# This file is part of OSM Monitoring module
+# All Rights Reserved to Whitestack, LLC
+
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+
+#         http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+# For those usages not covered by the Apache License, Version 2.0 please
+# contact: bdiaz@whitestack.com or glavado@whitestack.com
+##
+"""Peewee migrations -- 006_add_heal_alarm.py.
+
+Some examples (model - class or model name)::
+
+    > Model = migrator.orm['model_name']            # Return model in current state by name
+
+    > migrator.sql(sql)                             # Run custom SQL
+    > migrator.python(func, *args, **kwargs)        # Run python code
+    > migrator.create_model(Model)                  # Create a model (could be used as decorator)
+    > migrator.remove_model(model, cascade=True)    # Remove a model
+    > migrator.add_fields(model, **fields)          # Add fields to a model
+    > migrator.change_fields(model, **fields)       # Change fields
+    > migrator.remove_fields(model, *field_names, cascade=True)
+    > migrator.rename_field(model, old_field_name, new_field_name)
+    > migrator.rename_table(model, new_table_name)
+    > migrator.add_index(model, *col_names, unique=False)
+    > migrator.drop_index(model, *col_names)
+    > migrator.add_not_null(model, *field_names)
+    > migrator.drop_not_null(model, *field_names)
+    > migrator.add_default(model, field_name, default)
+
+"""
+
+import peewee as pw
+
+SQL = pw.SQL
+
+
+def migrate(migrator, database, fake=False, **kwargs):
+    """Write your migrations here."""
+
+    @migrator.create_model
+    class HealingAction(pw.Model):
+        id = pw.AutoField()
+        recovery_action = pw.CharField(max_length=255)
+        alarm_id = pw.CharField(max_length=255)
+        alarm_uuid = pw.CharField(max_length=255, unique=True)
+        nsr_id = pw.CharField(max_length=255)
+        vnfinstance_id = pw.CharField(max_length=255)
+        vnf_member_index = pw.CharField(max_length=255)
+        vdur_name = pw.CharField(max_length=255)
+        vdu_id = pw.CharField(max_length=255)
+        cooldown_time = pw.IntegerField()
+        count_index = pw.IntegerField()
+        last_heal = pw.DateTimeField()
+        last_status = pw.CharField(max_length=255, default='insufficient-data')
+        day1 = pw.BooleanField(default=False)
+
+        class Meta:
+            table_name = "healingaction"
+
+
+def rollback(migrator, database, fake=False, **kwargs):
+    """Write your rollback migrations here."""
+
+    migrator.remove_model('healingaction')
index 6b1dc64..0329b7c 100644 (file)
@@ -45,6 +45,7 @@ from osm_policy_module.core.database import (
     ScalingCriteria,
     VnfAlarm,
     AlarmAction,
+    HealingAction,
 )
 
 log = logging.getLogger()
@@ -183,6 +184,7 @@ vnfr_record_mocks = [
                         "ip-address": "192.168.160.2",
                     }
                 ],
+                "count-index": 0,
                 "status": "ACTIVE",
                 "vim-id": "63a65636-9fc8-4022-b070-980823e6266a",
                 "name": "cirros_ns-1-cirros_vnfd-VM-1",
@@ -218,6 +220,7 @@ vnfr_record_mocks = [
                         "ip-address": "192.168.160.10",
                     }
                 ],
+                "count-index": 0,
                 "status": "ACTIVE",
                 "vim-id": "a154b8d3-2b10-421a-a51d-4b391d9bd366",
                 "name": "cirros_ns-2-cirros_vnfd-VM-1",
@@ -362,6 +365,21 @@ vnfd_record_mock = {
                     "vdu-configuration-id": "cirros_vnfd-VM-vdu-configuration",
                 }
             ],
+            "healing-aspect": [
+                {
+                    "id": "cirros_vnfd-VM-autoheal",
+                    "healing-policy": [
+                        {
+                            "vdu-id": "cirros_vnfd-VM",
+                            "event-name": "heal-alarm",
+                            "recovery-type": "automatic",
+                            "action-on-recovery": "REDEPLOY_ONLY",
+                            "cooldown-time": 180,
+                            "day1": False
+                        }
+                    ],
+                }
+            ],
             "instantiation-level": [
                 {
                     "id": "default-instantiation-level",
@@ -444,6 +462,7 @@ MODELS = [
     ScalingAlarm,
     VnfAlarm,
     AlarmAction,
+    HealingAction,
 ]
 
 
@@ -585,6 +604,53 @@ class PolicyModuleAgentTest(unittest.TestCase):
             action="{'webhook': ['localhost:9090', 'localhost:9090', 'localhost:9090']}"
         )
 
+    @patch.object(DbMongo, "db_connect", Mock())
+    @patch.object(KafkaProducer, "__init__")
+    @patch.object(MonClient, "create_alarm")
+    @patch.object(CommonDbClient, "get_vnfd")
+    @patch.object(CommonDbClient, "get_nsr")
+    @patch.object(CommonDbClient, "get_vnfr")
+    def test_configure_healing_alarms(
+        self, get_vnfr, get_nsr, get_vnfd, create_alarm, kafka_producer_init
+    ):
+        def _test_configure_scaling_groups_get_vnfr(*args, **kwargs):
+            if "1" in args[1]:
+                return vnfr_record_mocks[0]
+            if "2" in args[1]:
+                return vnfr_record_mocks[1]
+
+        async def _test_configure_healing_alarms_create_alarm(*args, **kwargs):
+            return uuid.uuid4()
+
+        kafka_producer_init.return_value = None
+        get_vnfr.side_effect = _test_configure_scaling_groups_get_vnfr
+        get_nsr.return_value = nsr_record_mock
+        get_vnfd.return_value = vnfd_record_mock
+        create_alarm.side_effect = _test_configure_healing_alarms_create_alarm
+        config = Config()
+        agent = PolicyModuleAgent(config, self.loop)
+        self.loop.run_until_complete(
+            agent.healing_service.configure_healing_alarms("test_nsr_id")
+        )
+        create_alarm.assert_any_call(
+            metric_name="vm_status",
+            ns_id="test_nsr_id",
+            vdu_name="cirros_ns-1-cirros_vnfd-VM-1",
+            vnf_member_index="1",
+            threshold=1,
+            operation="LT",
+            statistic="AVERAGE",
+        )
+        create_alarm.assert_any_call(
+            metric_name="vm_status",
+            ns_id="test_nsr_id",
+            vdu_name="cirros_ns-2-cirros_vnfd-VM-1",
+            vnf_member_index="2",
+            threshold=1,
+            operation="LT",
+            statistic="AVERAGE",
+        )
+
 
 if __name__ == "__main__":
     unittest.main()
index edc15a6..cc17891 100644 (file)
@@ -30,6 +30,7 @@ from osm_policy_module.autoscaling.service import AutoscalingService
 from osm_policy_module.common.common_db_client import CommonDbClient
 from osm_policy_module.core.agent import PolicyModuleAgent
 from osm_policy_module.core.config import Config
+from osm_policy_module.healing.service import HealingService
 
 
 class PolicyAgentTest(unittest.TestCase):
@@ -41,20 +42,26 @@ class PolicyAgentTest(unittest.TestCase):
     @mock.patch("osm_policy_module.alarming.service.LcmClient")
     @mock.patch("osm_policy_module.autoscaling.service.MonClient")
     @mock.patch("osm_policy_module.autoscaling.service.LcmClient")
+    @mock.patch("osm_policy_module.healing.service.MonClient")
+    @mock.patch("osm_policy_module.healing.service.LcmClient")
     @mock.patch.object(AutoscalingService, "configure_scaling_groups")
     @mock.patch.object(AlarmingService, "configure_vnf_alarms")
+    @mock.patch.object(HealingService, "configure_healing_alarms")
     @mock.patch.object(AutoscalingService, "delete_orphaned_alarms")
     @mock.patch.object(CommonDbClient, "get_nslcmop")
     def test_handle_instantiated(
         self,
         get_nslcmop,
         delete_orphaned_alarms,
+        configure_healing_alarms,
         configure_vnf_alarms,
         configure_scaling_groups,
         autoscaling_lcm_client,
         autoscaling_mon_client,
         alarming_lcm_client,
         alarming_mon_client,
+        healing_lcm_client,
+        healing_mon_client,
     ):
         async def mock_configure_scaling_groups(nsr_id):
             pass
@@ -62,6 +69,9 @@ class PolicyAgentTest(unittest.TestCase):
         async def mock_configure_vnf_alarms(nsr_id):
             pass
 
+        async def mock_configure_healing_alarms(nsr_id):
+            pass
+
         async def mock_delete_orphaned_alarms(nsr_id):
             pass
 
@@ -71,6 +81,8 @@ class PolicyAgentTest(unittest.TestCase):
         assert autoscaling_mon_client.called
         assert alarming_lcm_client.called
         assert alarming_mon_client.called
+        assert healing_lcm_client.called
+        assert healing_mon_client.called
         content = {
             "nslcmop_id": "test_id",
         }
@@ -81,6 +93,7 @@ class PolicyAgentTest(unittest.TestCase):
         nslcmop_failed = {"operationState": "FAILED", "nsInstanceId": "test_nsr_id"}
         configure_scaling_groups.side_effect = mock_configure_scaling_groups
         configure_vnf_alarms.side_effect = mock_configure_vnf_alarms
+        configure_healing_alarms.side_effect = mock_configure_healing_alarms
         delete_orphaned_alarms.side_effect = mock_delete_orphaned_alarms
 
         get_nslcmop.return_value = nslcmop_completed
@@ -157,16 +170,22 @@ class PolicyAgentTest(unittest.TestCase):
     @mock.patch("osm_policy_module.autoscaling.service.LcmClient")
     @mock.patch("osm_policy_module.alarming.service.MonClient")
     @mock.patch("osm_policy_module.alarming.service.LcmClient")
+    @mock.patch("osm_policy_module.healing.service.MonClient")
+    @mock.patch("osm_policy_module.healing.service.LcmClient")
     @mock.patch.object(AutoscalingService, "handle_alarm")
     @mock.patch.object(AlarmingService, "handle_alarm")
+    @mock.patch.object(HealingService, "handle_alarm")
     def test_handle_alarm_notification(
         self,
+        healing_handle_alarm,
         alarming_handle_alarm,
         autoscaling_handle_alarm,
         autoscaling_lcm_client,
         autoscaling_mon_client,
         alarming_lcm_client,
         alarming_mon_client,
+        healing_lcm_client,
+        healing_mon_client,
     ):
         async def mock_handle_alarm(alarm_uuid, status, payload=None):
             pass
@@ -177,6 +196,8 @@ class PolicyAgentTest(unittest.TestCase):
         assert autoscaling_mon_client.called
         assert alarming_lcm_client.called
         assert alarming_mon_client.called
+        assert healing_lcm_client.called
+        assert healing_mon_client.called
         content = {
             "notify_details": {
                 "alarm_uuid": "test_alarm_uuid",
@@ -191,10 +212,12 @@ class PolicyAgentTest(unittest.TestCase):
         }
         autoscaling_handle_alarm.side_effect = mock_handle_alarm
         alarming_handle_alarm.side_effect = mock_handle_alarm
+        healing_handle_alarm.side_effect = mock_handle_alarm
 
         self.loop.run_until_complete(agent._handle_alarm_notification(content))
         autoscaling_handle_alarm.assert_called_with("test_alarm_uuid", "alarm")
         alarming_handle_alarm.assert_called_with("test_alarm_uuid", "alarm", content)
+        healing_handle_alarm.assert_called_with("test_alarm_uuid", "alarm")
 
     @mock.patch.object(CommonDbClient, "__init__", lambda *args, **kwargs: None)
     @mock.patch("osm_policy_module.alarming.service.MonClient")
diff --git a/osm_policy_module/tests/unit/healing/test_healing_service.py b/osm_policy_module/tests/unit/healing/test_healing_service.py
new file mode 100644 (file)
index 0000000..e99f654
--- /dev/null
@@ -0,0 +1,107 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2018 Whitestack, LLC
+# *************************************************************
+
+# This file is part of OSM Monitoring module
+# All Rights Reserved to Whitestack, LLC
+
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+
+#         http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+# For those usages not covered by the Apache License, Version 2.0 please
+# contact: bdiaz@whitestack.com or glavado@whitestack.com
+##
+import asyncio
+import datetime
+from unittest import TestCase, mock
+
+from osm_policy_module.autoscaling.service import HealingService
+from osm_policy_module.common.common_db_client import CommonDbClient
+from osm_policy_module.common.lcm_client import LcmClient
+from osm_policy_module.common.mon_client import MonClient
+from osm_policy_module.core.config import Config
+from osm_policy_module.core.database import HealingActionRepository
+
+
+@mock.patch.object(LcmClient, "__init__", lambda *args, **kwargs: None)
+@mock.patch.object(MonClient, "__init__", lambda *args, **kwargs: None)
+@mock.patch.object(CommonDbClient, "__init__", lambda *args, **kwargs: None)
+class TestHealscalingService(TestCase):
+    def setUp(self):
+        self.config = Config()
+        self.loop = asyncio.new_event_loop()
+
+    @mock.patch.object(HealingActionRepository, "get")
+    @mock.patch("osm_policy_module.core.database.db")
+    def test_update_alarm_status(self, database, get_alarm):
+        mock_alarm = mock.Mock()
+        mock_alarm.last_status = "insufficient_data"
+        get_alarm.return_value = mock_alarm
+
+        service = HealingService(self.config)
+        self.loop.run_until_complete(service.update_alarm_status("test_uuid", "alarm"))
+        self.assertEqual(mock_alarm.last_status, "alarm")
+        mock_alarm.save.assert_called_with()
+
+        service = HealingService(self.config)
+        self.loop.run_until_complete(service.update_alarm_status("test_uuid", "ok"))
+        self.assertEqual(mock_alarm.last_status, "ok")
+        mock_alarm.save.assert_called_with()
+
+        service = HealingService(self.config)
+        self.loop.run_until_complete(
+            service.update_alarm_status("test_uuid", "insufficient_data")
+        )
+        self.assertEqual(mock_alarm.last_status, "insufficient_data")
+        mock_alarm.save.assert_called_with()
+
+    @mock.patch.object(HealingActionRepository, "list")
+    @mock.patch.object(HealingActionRepository, "get")
+    @mock.patch(LcmClient, "heal")
+    @mock.patch("osm_policy_module.core.database.db")
+    def test_handle_alarm(self, database, heal, get_alarm, list_alarms):
+        mock_alarm = self._build_mock_alarm("test_id", status="alarm")
+        get_alarm.return_value = mock_alarm
+        service = HealingService(self.config)
+        self.loop.run_until_complete(service.handle_alarm("test_id", "alarm"))
+        heal.assert_called_with(
+            "test_nsr_id",
+            "test_vnfinstance_id",
+            "test_vdur_name",
+            "test_vdu_id",
+            "test_vnf_member_index",
+            "test_heal_type",
+            "test_day1",
+            "test_count_index"
+        )
+
+    def _build_mock_alarm(
+        self,
+        last_status="alarm",
+        last_heal=datetime.datetime.min,
+        cooldown_time=10,
+    ):
+        mock_alarm = mock.Mock()
+        mock_alarm.last_status = last_status
+        mock_alarm.vnf_member_index = "1"
+        mock_alarm.last_heal = last_heal
+        mock_alarm.cooldown_time = cooldown_time
+        mock_alarm.nsr_id = "test_nsr_id"
+        mock_alarm.vnfinstance_id = "test_vnfinstance_id"
+        mock_alarm.vdur_name = "test_vdur_name"
+        mock_alarm.vdu_id = "test_vdu_id"
+        mock_alarm.vnf_member_index = "test_vnf_member_index"
+        mock_alarm.heal_type = "test_heal_type"
+        mock_alarm.day1 = "test_day1"
+        mock_alarm.count_index = "test_count_index"
+        return mock_alarm