Bug 2315: Fix for Unable to update healing policy using NS Update
[osm/POL.git] / osm_policy_module / core / agent.py
index 52412a6..4a20527 100644 (file)
 # For those usages not covered by the Apache License, Version 2.0 please
 # contact: bdiaz@whitestack.com or glavado@whitestack.com
 ##
-import json
+import asyncio
 import logging
-import threading
-from json import JSONDecodeError
+from pathlib import Path
+import os
 
-import yaml
-from kafka import KafkaConsumer
+import peewee
 
-from osm_policy_module.common.db_client import DbClient
-from osm_policy_module.common.lcm_client import LcmClient
-from osm_policy_module.common.mon_client import MonClient
-from osm_policy_module.core import database
+from osm_policy_module.alarming.service import AlarmingService
+from osm_policy_module.autoscaling.service import AutoscalingService
+from osm_policy_module.healing.service import HealingService
+from osm_policy_module.common.common_db_client import CommonDbClient
+from osm_policy_module.common.message_bus_client import MessageBusClient
 from osm_policy_module.core.config import Config
-from osm_policy_module.core.database import ScalingRecord, ScalingAlarm
 
 log = logging.getLogger(__name__)
 
+ALLOWED_KAFKA_KEYS = [
+    "instantiated",
+    "scaled",
+    "terminated",
+    "notify_alarm",
+    "policy_updated",
+    "vnf_terminated",
+]
+
 
 class PolicyModuleAgent:
-    def __init__(self):
-        cfg = Config.instance()
-        self.db_client = DbClient()
-        self.mon_client = MonClient()
-        self.kafka_server = '{}:{}'.format(cfg.OSMPOL_MESSAGE_HOST,
-                                           cfg.OSMPOL_MESSAGE_PORT)
+    def __init__(self, config: Config):
+        self.conf = config
+        self.msg_bus = MessageBusClient(config)
+        self.db_client = CommonDbClient(config)
+        self.autoscaling_service = AutoscalingService(config)
+        self.alarming_service = AlarmingService(config)
+        self.healing_service = HealingService(config)
 
     def run(self):
-        consumer = KafkaConsumer(bootstrap_servers=self.kafka_server,
-                                 key_deserializer=bytes.decode,
-                                 value_deserializer=bytes.decode,
-                                 group_id='pol-consumer')
-        consumer.subscribe(["ns", "alarm_response"])
+        asyncio.run(self.start())
+
+    async def start(self):
+        Path("/tmp/osm_pol_agent_health_flag").touch()
+        topics = ["ns", "alarm_response"]
+        await self.msg_bus.aioread(topics, self._process_msg)
+        log.critical("Exiting...")
+        if os.path.exists("/tmp/osm_pol_agent_health_flag"):
+            os.remove("/tmp/osm_pol_agent_health_flag")
+
+    async def _process_msg(self, topic, key, msg):
+        Path("/tmp/osm_pol_agent_health_flag").touch()
+        log.debug("_process_msg topic=%s key=%s msg=%s", topic, key, msg)
+        try:
+            if key in ALLOWED_KAFKA_KEYS:
+                if key == "instantiated":
+                    await self._handle_instantiated(msg)
 
-        for message in consumer:
-            t = threading.Thread(target=self._process_msg, args=(message.topic, message.key, message.value,))
-            t.start()
+                if key == "scaled":
+                    await self._handle_scaled(msg)
 
-    def _process_msg(self, topic, key, msg):
-        try:
-            # Check for ns instantiation
-            if key == 'instantiated':
-                try:
-                    content = json.loads(msg)
-                except JSONDecodeError:
-                    content = yaml.safe_load(msg)
-                log.info("Message arrived with topic: %s, key: %s, msg: %s", topic, key, content)
-                nslcmop_id = content['nslcmop_id']
-                nslcmop = self.db_client.get_nslcmop(nslcmop_id)
-                if nslcmop['operationState'] == 'COMPLETED' or nslcmop['operationState'] == 'PARTIALLY_COMPLETED':
-                    nsr_id = nslcmop['nsInstanceId']
-                    log.info("Configuring scaling groups for network service with nsr_id: %s", nsr_id)
-                    self._configure_scaling_groups(nsr_id)
-                else:
-                    log.info(
-                        "Network service is not in COMPLETED or PARTIALLY_COMPLETED state. "
-                        "Current state is %s. Skipping...",
-                        nslcmop['operationState'])
-
-            if key == 'notify_alarm':
-                try:
-                    content = json.loads(msg)
-                except JSONDecodeError:
-                    content = yaml.safe_load(msg)
-                log.info("Message arrived with topic: %s, key: %s, msg: %s", topic, key, content)
-                alarm_id = content['notify_details']['alarm_uuid']
-                metric_name = content['notify_details']['metric_name']
-                operation = content['notify_details']['operation']
-                threshold = content['notify_details']['threshold_value']
-                vdu_name = content['notify_details']['vdu_name']
-                vnf_member_index = content['notify_details']['vnf_member_index']
-                ns_id = content['notify_details']['ns_id']
-                log.info(
-                    "Received alarm notification for alarm %s, \
-                    metric %s, \
-                    operation %s, \
-                    threshold %s, \
-                    vdu_name %s, \
-                    vnf_member_index %s, \
-                    ns_id %s ",
-                    alarm_id, metric_name, operation, threshold, vdu_name, vnf_member_index, ns_id)
-                try:
-                    alarm = ScalingAlarm.select().where(ScalingAlarm.alarm_id == alarm_id).get()
-                    lcm_client = LcmClient()
-                    log.info("Sending scaling action message for ns: %s", alarm_id)
-                    lcm_client.scale(alarm.scaling_record.nsr_id, alarm.scaling_record.name, alarm.vnf_member_index,
-                                     alarm.action)
-                except ScalingAlarm.DoesNotExist:
-                    log.info("There is no action configured for alarm %s.", alarm_id)
+                if key == "terminated":
+                    await self._handle_terminated(msg)
+
+                if key == "notify_alarm":
+                    await self._handle_alarm_notification(msg)
+
+                if key == "policy_updated":
+                    await self._handle_policy_update(msg)
+
+                if key == "vnf_terminated":
+                    await self._handle_vnf_terminated(msg)
+            else:
+                log.debug("Key %s is not in ALLOWED_KAFKA_KEYS", key)
+        except peewee.PeeweeException:
+            log.exception("Database error consuming message: ")
+            raise
         except Exception:
             log.exception("Error consuming message: ")
 
-    def _configure_scaling_groups(self, nsr_id: str):
-        # TODO(diazb): Check for alarm creation on exception and clean resources if needed.
-        with database.db.atomic():
-            vnfrs = self.db_client.get_vnfrs(nsr_id)
-            log.info("Checking %s vnfrs...", len(vnfrs))
-            for vnfr in vnfrs:
-                vnfd = self.db_client.get_vnfd(vnfr['vnfd-id'])
-                log.info("Looking for vnfd %s", vnfr['vnfd-id'])
-                scaling_groups = vnfd['scaling-group-descriptor']
-                vnf_monitoring_params = vnfd['monitoring-param']
-                for scaling_group in scaling_groups:
-                    log.info("Creating scaling record in DB...")
-                    scaling_record = ScalingRecord.create(
-                        nsr_id=nsr_id,
-                        name=scaling_group['name'],
-                        content=json.dumps(scaling_group)
-                    )
-                    log.info("Created scaling record in DB : nsr_id=%s, name=%s, content=%s",
-                             scaling_record.nsr_id,
-                             scaling_record.name,
-                             scaling_record.content)
-                    for scaling_policy in scaling_group['scaling-policy']:
-                        for vdur in vnfd['vdu']:
-                            vdu_monitoring_params = vdur['monitoring-param']
-                            for scaling_criteria in scaling_policy['scaling-criteria']:
-                                vnf_monitoring_param = next(
-                                    filter(lambda param: param['id'] == scaling_criteria['vnf-monitoring-param-ref'],
-                                           vnf_monitoring_params))
-                                # TODO: Add support for non-nfvi metrics
-                                vdu_monitoring_param = next(
-                                    filter(
-                                        lambda param: param['id'] == vnf_monitoring_param['vdu-monitoring-param-ref'],
-                                        vdu_monitoring_params))
-                                alarm_uuid = self.mon_client.create_alarm(
-                                    metric_name=vdu_monitoring_param['nfvi-metric'],
-                                    ns_id=nsr_id,
-                                    vdu_name=vdur['name'],
-                                    vnf_member_index=vnfr['member-vnf-index-ref'],
-                                    threshold=scaling_criteria['scale-in-threshold'],
-                                    operation=scaling_criteria['scale-in-relational-operation'],
-                                    statistic=vnf_monitoring_param['aggregation-type']
-                                )
-                                ScalingAlarm.create(
-                                    alarm_id=alarm_uuid,
-                                    action='scale_in',
-                                    vnf_member_index=int(vnfr['member-vnf-index-ref']),
-                                    vdu_name=vdur['name'],
-                                    scaling_record=scaling_record
-                                )
-                                alarm_uuid = self.mon_client.create_alarm(
-                                    metric_name=vdu_monitoring_param['nfvi-metric'],
-                                    ns_id=nsr_id,
-                                    vdu_name=vdur['name'],
-                                    vnf_member_index=vnfr['member-vnf-index-ref'],
-                                    threshold=scaling_criteria['scale-out-threshold'],
-                                    operation=scaling_criteria['scale-out-relational-operation'],
-                                    statistic=vnf_monitoring_param['aggregation-type']
-                                )
-                                ScalingAlarm.create(
-                                    alarm_id=alarm_uuid,
-                                    action='scale_out',
-                                    vnf_member_index=int(vnfr['member-vnf-index-ref']),
-                                    vdu_name=vdur['name'],
-                                    scaling_record=scaling_record
-                                )
+    async def _handle_alarm_notification(self, content):
+        log.debug("_handle_alarm_notification: %s", content)
+        alarm_uuid = content["notify_details"]["alarm_uuid"]
+        status = content["notify_details"]["status"]
+        await self.autoscaling_service.handle_alarm(alarm_uuid, status)
+        await self.alarming_service.handle_alarm(alarm_uuid, status, content)
+        await self.healing_service.handle_alarm(alarm_uuid, status)
+
+    async def _handle_instantiated(self, content):
+        log.debug("_handle_instantiated: %s", content)
+        nslcmop_id = content["nslcmop_id"]
+        nslcmop = self.db_client.get_nslcmop(nslcmop_id)
+        if (
+            nslcmop["operationState"] == "COMPLETED"
+            or nslcmop["operationState"] == "PARTIALLY_COMPLETED"
+        ):
+            nsr_id = nslcmop["nsInstanceId"]
+            log.info("Configuring nsr_id: %s", nsr_id)
+            await self.autoscaling_service.configure_scaling_groups(nsr_id)
+            await self.alarming_service.configure_vnf_alarms(nsr_id)
+            await self.healing_service.configure_healing_alarms(nsr_id)
+        else:
+            log.info(
+                "Network_service is not in COMPLETED or PARTIALLY_COMPLETED state. "
+                "Current state is %s. Skipping...",
+                nslcmop["operationState"],
+            )
+
+    async def _handle_scaled(self, content):
+        log.debug("_handle_scaled: %s", content)
+        nslcmop_id = content["nslcmop_id"]
+        nslcmop = self.db_client.get_nslcmop(nslcmop_id)
+        if (
+            nslcmop["operationState"] == "COMPLETED"
+            or nslcmop["operationState"] == "PARTIALLY_COMPLETED"
+        ):
+            nsr_id = nslcmop["nsInstanceId"]
+            log.info("Configuring scaled service with nsr_id: %s", nsr_id)
+            await self.autoscaling_service.configure_scaling_groups(nsr_id)
+            await self.autoscaling_service.delete_orphaned_alarms(nsr_id)
+            await self.alarming_service.configure_vnf_alarms(nsr_id)
+            await self.healing_service.configure_healing_alarms(nsr_id)
+            await self.healing_service.delete_orphaned_healing_alarms(nsr_id)
+        else:
+            log.debug(
+                "Network service is not in COMPLETED or PARTIALLY_COMPLETED state. "
+                "Current state is %s. Skipping...",
+                nslcmop["operationState"],
+            )
+
+    async def _handle_terminated(self, content):
+        log.debug("_handle_deleted: %s", content)
+        nsr_id = content["nsr_id"]
+        if (
+            content["operationState"] == "COMPLETED"
+            or content["operationState"] == "PARTIALLY_COMPLETED"
+        ):
+            log.info(
+                "Deleting scaling groups and alarms for network autoscaling_service with nsr_id: %s",
+                nsr_id,
+            )
+            await self.autoscaling_service.delete_scaling_groups(nsr_id)
+            await self.alarming_service.delete_vnf_alarms(nsr_id)
+            await self.healing_service.delete_healing_alarms(nsr_id)
+        else:
+            log.info(
+                "Network service is not in COMPLETED or PARTIALLY_COMPLETED state. "
+                "Current state is %s. Skipping...",
+                content["operationState"],
+            )
+
+    async def _handle_policy_update(self, content):
+        log.info("_handle_policy_update: %s", content)
+        nsr_id = content["nsr_id"]
+        vnf_member_index = content["vnf_member_index"]
+        if (
+            content["operationState"] == "COMPLETED"
+            or content["operationState"] == "PARTIALLY_COMPLETED"
+        ):
+            log.info(
+                "Updating policies of VNF with nsr_id: %s and vnf-member-index: %s"
+                % (nsr_id, vnf_member_index)
+            )
+            await self.autoscaling_service.delete_scaling_groups(
+                nsr_id, vnf_member_index
+            )
+            await self.alarming_service.delete_vnf_alarms(nsr_id, vnf_member_index)
+            await self.healing_service.delete_healing_alarms(nsr_id, vnf_member_index)
+            await self.autoscaling_service.configure_scaling_groups(
+                nsr_id, vnf_member_index
+            )
+            await self.alarming_service.configure_vnf_alarms(nsr_id, vnf_member_index)
+            await self.healing_service.configure_healing_alarms(
+                nsr_id, vnf_member_index
+            )
+        else:
+            log.info(
+                "Network service is not in COMPLETED or PARTIALLY_COMPLETED state. "
+                "Current state is %s. Skipping...",
+                content["operationState"],
+            )
+
+    async def _handle_vnf_terminated(self, content):
+        nsr_id = content["nsr_id"]
+        vnf_member_index = content["vnf_member_index"]
+        if (
+            content["operationState"] == "COMPLETED"
+            or content["operationState"] == "PARTIALLY_COMPLETED"
+        ):
+            log.info(
+                "Deleting policies of VNF with nsr_id: %s and vnf-member-index: %s"
+                % (nsr_id, vnf_member_index)
+            )
+            await self.autoscaling_service.delete_scaling_groups(
+                nsr_id, vnf_member_index
+            )
+            await self.alarming_service.delete_vnf_alarms(nsr_id, vnf_member_index)
+        else:
+            log.info(
+                "Network service is not in COMPLETED or PARTIALLY_COMPLETED state. "
+                "Current state is %s. Skipping...",
+                content["operationState"],
+            )