Adds migration engine for peewee ORM

[osm/POL.git] / osm_policy_module / core / agent.py
diff --git a/osm_policy_module/core/agent.py b/osm_policy_module/core/agent.py

index 8a1a31c..65c741f 100644 (file)
--- a/osm_policy_module/core/agent.py
+++ b/osm_policy_module/core/agent.py
@@ -25,84 +25,81 @@ import asyncio
  import datetime
  import json
  import logging
-from json import JSONDecodeError
  
-import yaml
-from aiokafka import AIOKafkaConsumer
+import peewee
  
  from osm_policy_module.common.common_db_client import CommonDbClient
  from osm_policy_module.common.lcm_client import LcmClient
+from osm_policy_module.common.message_bus_client import MessageBusClient
  from osm_policy_module.common.mon_client import MonClient
  from osm_policy_module.core import database
  from osm_policy_module.core.config import Config
  from osm_policy_module.core.database import ScalingGroup, ScalingAlarm, ScalingPolicy, ScalingCriteria, DatabaseManager
+from osm_policy_module.core.exceptions import VdurNotFound
+from osm_policy_module.utils.vnfd import VnfdUtils
  
  log = logging.getLogger(__name__)
  
-ALLOWED_KAFKA_KEYS = ['instantiated', 'scaled', 'notify_alarm']
+ALLOWED_KAFKA_KEYS = ['instantiated', 'scaled', 'terminated', 'notify_alarm']
  
  
  class PolicyModuleAgent:
-    def __init__(self, loop=None):
-        cfg = Config.instance()
+    def __init__(self, config: Config, loop=None):
+        self.conf = config
          if not loop:
              loop = asyncio.get_event_loop()
          self.loop = loop
-        self.db_client = CommonDbClient()
-        self.mon_client = MonClient(loop=self.loop)
-        self.lcm_client = LcmClient(loop=self.loop)
-        self.kafka_server = '{}:{}'.format(cfg.OSMPOL_MESSAGE_HOST,
-                                           cfg.OSMPOL_MESSAGE_PORT)
-        self.database_manager = DatabaseManager()
+        self.db_client = CommonDbClient(config)
+        self.mon_client = MonClient(config, loop=self.loop)
+        self.lcm_client = LcmClient(config, loop=self.loop)
+        self.database_manager = DatabaseManager(config)
+        self.msg_bus = MessageBusClient(config)
  
      def run(self):
          self.loop.run_until_complete(self.start())
  
      async def start(self):
-        consumer = AIOKafkaConsumer(
+        topics = [
              "ns",
-            "alarm_response",
-            loop=self.loop,
-            bootstrap_servers=self.kafka_server,
-            group_id="pol-consumer",
-            key_deserializer=bytes.decode,
-            value_deserializer=bytes.decode,
-        )
-        await consumer.start()
-        try:
-            async for msg in consumer:
-                await self._process_msg(msg.topic, msg.key, msg.value)
-        finally:
-            await consumer.stop()
+            "alarm_response"
+        ]
+        await self.msg_bus.aioread(topics, self._process_msg)
+        log.critical("Exiting...")
  
      async def _process_msg(self, topic, key, msg):
          log.debug("_process_msg topic=%s key=%s msg=%s", topic, key, msg)
+        log.info("Message arrived: %s", msg)
          try:
              if key in ALLOWED_KAFKA_KEYS:
-                try:
-                    content = json.loads(msg)
-                except JSONDecodeError:
-                    content = yaml.safe_load(msg)
  
-                if key == 'instantiated' or key == 'scaled':
-                    await self._handle_instantiated_or_scaled(content)
+                if key == 'instantiated':
+                    await self._handle_instantiated(msg)
+
+                if key == 'scaled':
+                    await self._handle_scaled(msg)
+
+                if key == 'terminated':
+                    await self._handle_terminated(msg)
  
                  if key == 'notify_alarm':
-                    await self._handle_alarm_notification(content)
+                    await self._handle_alarm_notification(msg)
              else:
                  log.debug("Key %s is not in ALLOWED_KAFKA_KEYS", key)
+        except peewee.PeeweeException:
+            log.exception("Database error consuming message: ")
+            raise
          except Exception:
              log.exception("Error consuming message: ")
  
      async def _handle_alarm_notification(self, content):
          log.debug("_handle_alarm_notification: %s", content)
-        alarm_id = content['notify_details']['alarm_uuid']
+        alarm_uuid = content['notify_details']['alarm_uuid']
          metric_name = content['notify_details']['metric_name']
          operation = content['notify_details']['operation']
          threshold = content['notify_details']['threshold_value']
          vdu_name = content['notify_details']['vdu_name']
          vnf_member_index = content['notify_details']['vnf_member_index']
-        ns_id = content['notify_details']['ns_id']
+        nsr_id = content['notify_details']['ns_id']
          log.info(
              "Received alarm notification for alarm %s, \
              metric %s, \
@@ -111,9 +108,9 @@ class PolicyModuleAgent:
              vdu_name %s, \
              vnf_member_index %s, \
              ns_id %s ",
-            alarm_id, metric_name, operation, threshold, vdu_name, vnf_member_index, ns_id)
+            alarm_uuid, metric_name, operation, threshold, vdu_name, vnf_member_index, nsr_id)
          try:
-            alarm = self.database_manager.get_alarm(alarm_id)
+            alarm = self.database_manager.get_alarm(alarm_uuid)
              delta = datetime.datetime.now() - alarm.scaling_criteria.scaling_policy.last_scale
              log.debug("last_scale: %s", alarm.scaling_criteria.scaling_policy.last_scale)
              log.debug("now: %s", datetime.datetime.now())
@@ -121,33 +118,60 @@ class PolicyModuleAgent:
              if delta.total_seconds() < alarm.scaling_criteria.scaling_policy.cooldown_time:
                  log.info("Time between last scale and now is less than cooldown time. Skipping.")
                  return
-            log.info("Sending scaling action message for ns: %s", alarm_id)
-            await self.lcm_client.scale(ns_id,
+            log.info("Sending scaling action message for ns: %s", nsr_id)
+            await self.lcm_client.scale(nsr_id,
                                          alarm.scaling_criteria.scaling_policy.scaling_group.name,
                                          alarm.vnf_member_index,
                                          alarm.action)
              alarm.scaling_criteria.scaling_policy.last_scale = datetime.datetime.now()
              alarm.scaling_criteria.scaling_policy.save()
          except ScalingAlarm.DoesNotExist:
-            log.info("There is no action configured for alarm %s.", alarm_id)
+            log.info("There is no action configured for alarm %s.", alarm_uuid)
+
+    async def _handle_instantiated(self, content):
+        log.debug("_handle_instantiated: %s", content)
+        nslcmop_id = content['nslcmop_id']
+        nslcmop = self.db_client.get_nslcmop(nslcmop_id)
+        if nslcmop['operationState'] == 'COMPLETED' or nslcmop['operationState'] == 'PARTIALLY_COMPLETED':
+            nsr_id = nslcmop['nsInstanceId']
+            log.info("Configuring scaling groups for network service with nsr_id: %s", nsr_id)
+            await self._configure_scaling_groups(nsr_id)
+        else:
+            log.info(
+                "Network service is not in COMPLETED or PARTIALLY_COMPLETED state. "
+                "Current state is %s. Skipping...",
+                nslcmop['operationState'])
  
-    async def _handle_instantiated_or_scaled(self, content):
-        log.debug("_handle_instantiated_or_scaled: %s", content)
+    async def _handle_scaled(self, content):
+        log.debug("_handle_scaled: %s", content)
          nslcmop_id = content['nslcmop_id']
          nslcmop = self.db_client.get_nslcmop(nslcmop_id)
          if nslcmop['operationState'] == 'COMPLETED' or nslcmop['operationState'] == 'PARTIALLY_COMPLETED':
              nsr_id = nslcmop['nsInstanceId']
              log.info("Configuring scaling groups for network service with nsr_id: %s", nsr_id)
              await self._configure_scaling_groups(nsr_id)
+            log.info("Checking for orphaned alarms to be deleted for network service with nsr_id: %s", nsr_id)
+            await self._delete_orphaned_alarms(nsr_id)
          else:
              log.info(
                  "Network service is not in COMPLETED or PARTIALLY_COMPLETED state. "
                  "Current state is %s. Skipping...",
                  nslcmop['operationState'])
  
+    async def _handle_terminated(self, content):
+        log.debug("_handle_deleted: %s", content)
+        nsr_id = content['nsr_id']
+        if content['operationState'] == 'COMPLETED' or content['operationState'] == 'PARTIALLY_COMPLETED':
+            log.info("Deleting scaling groups and alarms for network service with nsr_id: %s", nsr_id)
+            await self._delete_scaling_groups(nsr_id)
+        else:
+            log.info(
+                "Network service is not in COMPLETED or PARTIALLY_COMPLETED state. "
+                "Current state is %s. Skipping...",
+                content['operationState'])
+
      async def _configure_scaling_groups(self, nsr_id: str):
          log.debug("_configure_scaling_groups: %s", nsr_id)
-        # TODO: Add support for non-nfvi metrics
          alarms_created = []
          with database.db.atomic() as tx:
              try:
@@ -247,8 +271,13 @@ class PolicyModuleAgent:
                                          )
                                      )
                                  elif 'vnf-metric' in vnf_monitoring_param:
-                                    log.warning("vnf-metric is not currently supported.")
-                                    continue
+                                    vdu = VnfdUtils.get_mgmt_vdu(vnfd)
+                                    vdurs = list(
+                                        filter(
+                                            lambda vdur: vdur['vdu-id-ref'] == vdu['id'],
+                                            vnfr['vdur']
+                                        )
+                                    )
                                  else:
                                      log.warning(
                                          "Scaling criteria is referring to a vnf-monitoring-param that does not "
@@ -280,13 +309,14 @@ class PolicyModuleAgent:
                                          operation=scaling_criteria['scale-in-relational-operation'],
                                          statistic=vnf_monitoring_param['aggregation-type']
                                      )
-                                    ScalingAlarm.create(
-                                        alarm_id=alarm_uuid,
+                                    alarm = ScalingAlarm.create(
+                                        alarm_uuid=alarm_uuid,
                                          action='scale_in',
                                          vnf_member_index=int(vnfr['member-vnf-index-ref']),
                                          vdu_name=vdur['name'],
                                          scaling_criteria=scaling_criteria_record
                                      )
+                                    alarms_created.append(alarm)
                                      alarm_uuid = await self.mon_client.create_alarm(
                                          metric_name=vnf_monitoring_param['id'],
                                          ns_id=nsr_id,
@@ -296,13 +326,14 @@ class PolicyModuleAgent:
                                          operation=scaling_criteria['scale-out-relational-operation'],
                                          statistic=vnf_monitoring_param['aggregation-type']
                                      )
-                                    ScalingAlarm.create(
-                                        alarm_id=alarm_uuid,
+                                    alarm = ScalingAlarm.create(
+                                        alarm_uuid=alarm_uuid,
                                          action='scale_out',
                                          vnf_member_index=int(vnfr['member-vnf-index-ref']),
                                          vdu_name=vdur['name'],
                                          scaling_criteria=scaling_criteria_record
                                      )
+                                    alarms_created.append(alarm)
  
              except Exception as e:
                  log.exception("Error configuring scaling groups:")
@@ -310,5 +341,59 @@ class PolicyModuleAgent:
                  if len(alarms_created) > 0:
                      log.info("Cleaning alarm resources in MON")
                      for alarm in alarms_created:
-                        await self.mon_client.delete_alarm(*alarm)
+                        await self.mon_client.delete_alarm(alarm.scaling_criteria.scaling_policy.scaling_group.nsr_id,
+                                                           alarm.vnf_member_index,
+                                                           alarm.vdu_name,
+                                                           alarm.alarm_uuid)
+                raise e
+
+    async def _delete_scaling_groups(self, nsr_id: str):
+        with database.db.atomic() as tx:
+            try:
+                for scaling_group in ScalingGroup.select().where(ScalingGroup.nsr_id == nsr_id):
+                    for scaling_policy in scaling_group.scaling_policies:
+                        for scaling_criteria in scaling_policy.scaling_criterias:
+                            for alarm in scaling_criteria.scaling_alarms:
+                                try:
+                                    await self.mon_client.delete_alarm(
+                                        alarm.scaling_criteria.scaling_policy.scaling_group.nsr_id,
+                                        alarm.vnf_member_index,
+                                        alarm.vdu_name,
+                                        alarm.alarm_uuid)
+                                except ValueError:
+                                    log.exception("Error deleting alarm in MON %s", alarm.alarm_uuid)
+                                alarm.delete_instance()
+                            scaling_criteria.delete_instance()
+                        scaling_policy.delete_instance()
+                    scaling_group.delete_instance()
+
+            except Exception as e:
+                log.exception("Error deleting scaling groups and alarms:")
+                tx.rollback()
+                raise e
+
+    async def _delete_orphaned_alarms(self, nsr_id):
+        with database.db.atomic() as tx:
+            try:
+                for scaling_group in ScalingGroup.select().where(ScalingGroup.nsr_id == nsr_id):
+                    for scaling_policy in scaling_group.scaling_policies:
+                        for scaling_criteria in scaling_policy.scaling_criterias:
+                            for alarm in scaling_criteria.scaling_alarms:
+                                try:
+                                    self.db_client.get_vdur(nsr_id, alarm.vnf_member_index, alarm.vdu_name)
+                                except VdurNotFound:
+                                    log.info("Deleting orphaned alarm %s", alarm.alarm_uuid)
+                                    try:
+                                        await self.mon_client.delete_alarm(
+                                            alarm.scaling_criteria.scaling_policy.scaling_group.nsr_id,
+                                            alarm.vnf_member_index,
+                                            alarm.vdu_name,
+                                            alarm.alarm_uuid)
+                                    except ValueError:
+                                        log.exception("Error deleting alarm in MON %s", alarm.alarm_uuid)
+                                    alarm.delete_instance()
+
+            except Exception as e:
+                log.exception("Error deleting orphaned alarms:")
+                tx.rollback()
                  raise e