Adds migration engine for peewee ORM
[osm/POL.git] / osm_policy_module / core / agent.py
index 410413f..65c741f 100644 (file)
 # For those usages not covered by the Apache License, Version 2.0 please
 # contact: bdiaz@whitestack.com or glavado@whitestack.com
 ##
+import asyncio
+import datetime
 import json
 import logging
-import threading
-from json import JSONDecodeError
 
-import yaml
-from kafka import KafkaConsumer
-from osm_common import dbmongo
+import peewee
 
+from osm_policy_module.common.common_db_client import CommonDbClient
 from osm_policy_module.common.lcm_client import LcmClient
+from osm_policy_module.common.message_bus_client import MessageBusClient
 from osm_policy_module.common.mon_client import MonClient
 from osm_policy_module.core import database
 from osm_policy_module.core.config import Config
-from osm_policy_module.core.database import ScalingRecord, ScalingAlarm
+from osm_policy_module.core.database import ScalingGroup, ScalingAlarm, ScalingPolicy, ScalingCriteria, DatabaseManager
+from osm_policy_module.core.exceptions import VdurNotFound
+from osm_policy_module.utils.vnfd import VnfdUtils
 
 log = logging.getLogger(__name__)
 
+ALLOWED_KAFKA_KEYS = ['instantiated', 'scaled', 'terminated', 'notify_alarm']
+
 
 class PolicyModuleAgent:
-    def __init__(self):
-        cfg = Config.instance()
-        self.common_db = dbmongo.DbMongo()
-        self.common_db.db_connect({'host': cfg.OSMPOL_DATABASE_HOST,
-                                   'port': int(cfg.OSMPOL_DATABASE_PORT),
-                                   'name': 'osm'})
-        self.mon_client = MonClient()
-        self.kafka_server = '{}:{}'.format(cfg.OSMPOL_MESSAGE_HOST,
-                                           cfg.OSMPOL_MESSAGE_PORT)
+    def __init__(self, config: Config, loop=None):
+        self.conf = config
+        if not loop:
+            loop = asyncio.get_event_loop()
+        self.loop = loop
+        self.db_client = CommonDbClient(config)
+        self.mon_client = MonClient(config, loop=self.loop)
+        self.lcm_client = LcmClient(config, loop=self.loop)
+        self.database_manager = DatabaseManager(config)
+        self.msg_bus = MessageBusClient(config)
 
     def run(self):
-        consumer = KafkaConsumer(bootstrap_servers=self.kafka_server,
-                                 key_deserializer=bytes.decode,
-                                 value_deserializer=bytes.decode,
-                                 group_id='pol-consumer')
-        consumer.subscribe(["ns", "alarm_response"])
+        self.loop.run_until_complete(self.start())
 
-        for message in consumer:
-            t = threading.Thread(target=self._process_msg, args=(message.topic, message.key, message.value,))
-            t.start()
+    async def start(self):
+        topics = [
+            "ns",
+            "alarm_response"
+        ]
+        await self.msg_bus.aioread(topics, self._process_msg)
+        log.critical("Exiting...")
 
-    def _process_msg(self, topic, key, msg):
+    async def _process_msg(self, topic, key, msg):
+        log.debug("_process_msg topic=%s key=%s msg=%s", topic, key, msg)
+        log.info("Message arrived: %s", msg)
         try:
-            # Check for ns instantiation
-            if key == 'instantiated':
-                try:
-                    content = json.loads(msg)
-                except JSONDecodeError:
-                    content = yaml.safe_load(msg)
-                log.info("Message arrived with topic: %s, key: %s, msg: %s", topic, key, content)
-                nslcmop_id = content['nslcmop_id']
-                nslcmop = self.common_db.get_one(table="nslcmops",
-                                                 filter={"_id": nslcmop_id})
-                if nslcmop['operationState'] == 'COMPLETED' or nslcmop['operationState'] == 'PARTIALLY_COMPLETED':
-                    nsr_id = nslcmop['nsInstanceId']
-                    log.info("Configuring scaling groups for network service with nsr_id: %s", nsr_id)
-                    self._configure_scaling_groups(nsr_id)
-                else:
-                    log.info(
-                        "Network service is not in COMPLETED or PARTIALLY_COMPLETED state. "
-                        "Current state is %s. Skipping...",
-                        nslcmop['operationState'])
-
-            if key == 'notify_alarm':
-                try:
-                    content = json.loads(msg)
-                except JSONDecodeError:
-                    content = yaml.safe_load(msg)
-                log.info("Message arrived with topic: %s, key: %s, msg: %s", topic, key, content)
-                alarm_id = content['notify_details']['alarm_uuid']
-                metric_name = content['notify_details']['metric_name']
-                operation = content['notify_details']['operation']
-                threshold = content['notify_details']['threshold_value']
-                vdu_name = content['notify_details']['vdu_name']
-                vnf_member_index = content['notify_details']['vnf_member_index']
-                ns_id = content['notify_details']['ns_id']
-                log.info(
-                    "Received alarm notification for alarm %s, \
-                    metric %s, \
-                    operation %s, \
-                    threshold %s, \
-                    vdu_name %s, \
-                    vnf_member_index %s, \
-                    ns_id %s ",
-                    alarm_id, metric_name, operation, threshold, vdu_name, vnf_member_index, ns_id)
-                try:
-                    alarm = ScalingAlarm.select().where(ScalingAlarm.alarm_id == alarm_id).get()
-                    lcm_client = LcmClient()
-                    log.info("Sending scaling action message for ns: %s", alarm_id)
-                    lcm_client.scale(alarm.scaling_record.nsr_id, alarm.scaling_record.name, alarm.vnf_member_index,
-                                     alarm.action)
-                except ScalingAlarm.DoesNotExist:
-                    log.info("There is no action configured for alarm %s.", alarm_id)
+            if key in ALLOWED_KAFKA_KEYS:
+
+                if key == 'instantiated':
+                    await self._handle_instantiated(msg)
+
+                if key == 'scaled':
+                    await self._handle_scaled(msg)
+
+                if key == 'terminated':
+                    await self._handle_terminated(msg)
+
+                if key == 'notify_alarm':
+                    await self._handle_alarm_notification(msg)
+            else:
+                log.debug("Key %s is not in ALLOWED_KAFKA_KEYS", key)
+        except peewee.PeeweeException:
+            log.exception("Database error consuming message: ")
+            raise
         except Exception:
             log.exception("Error consuming message: ")
 
-    def _get_vnfr(self, nsr_id: str, member_index: int):
-        vnfr = self.common_db.get_one(table="vnfrs",
-                                      filter={"nsr-id-ref": nsr_id, "member-vnf-index-ref": str(member_index)})
-        return vnfr
-
-    def _get_vnfrs(self, nsr_id: str):
-        return [self._get_vnfr(nsr_id, member['member-vnf-index']) for member in
-                self._get_nsr(nsr_id)['nsd']['constituent-vnfd']]
-
-    def _get_vnfd(self, vnfd_id: str):
-        vnfr = self.common_db.get_one(table="vnfds",
-                                      filter={"_id": vnfd_id})
-        return vnfr
-
-    def _get_nsr(self, nsr_id: str):
-        nsr = self.common_db.get_one(table="nsrs",
-                                     filter={"id": nsr_id})
-        return nsr
-
-    def _configure_scaling_groups(self, nsr_id: str):
-        # TODO(diazb): Check for alarm creation on exception and clean resources if needed.
-        with database.db.atomic():
-            vnfrs = self._get_vnfrs(nsr_id)
-            log.info("Checking %s vnfrs...", len(vnfrs))
-            for vnfr in vnfrs:
-                vnfd = self._get_vnfd(vnfr['vnfd-id'])
-                log.info("Looking for vnfd %s", vnfr['vnfd-id'])
-                scaling_groups = vnfd['scaling-group-descriptor']
-                vnf_monitoring_params = vnfd['monitoring-param']
-                for scaling_group in scaling_groups:
-                    log.info("Creating scaling record in DB...")
-                    scaling_record = ScalingRecord.create(
-                        nsr_id=nsr_id,
-                        name=scaling_group['name'],
-                        content=json.dumps(scaling_group)
-                    )
-                    log.info("Created scaling record in DB : nsr_id=%s, name=%s, content=%s",
-                             scaling_record.nsr_id,
-                             scaling_record.name,
-                             scaling_record.content)
-                    for scaling_policy in scaling_group['scaling-policy']:
-                        for vdur in vnfd['vdu']:
-                            vdu_monitoring_params = vdur['monitoring-param']
+    async def _handle_alarm_notification(self, content):
+        log.debug("_handle_alarm_notification: %s", content)
+        alarm_uuid = content['notify_details']['alarm_uuid']
+        metric_name = content['notify_details']['metric_name']
+        operation = content['notify_details']['operation']
+        threshold = content['notify_details']['threshold_value']
+        vdu_name = content['notify_details']['vdu_name']
+        vnf_member_index = content['notify_details']['vnf_member_index']
+        nsr_id = content['notify_details']['ns_id']
+        log.info(
+            "Received alarm notification for alarm %s, \
+            metric %s, \
+            operation %s, \
+            threshold %s, \
+            vdu_name %s, \
+            vnf_member_index %s, \
+            ns_id %s ",
+            alarm_uuid, metric_name, operation, threshold, vdu_name, vnf_member_index, nsr_id)
+        try:
+            alarm = self.database_manager.get_alarm(alarm_uuid)
+            delta = datetime.datetime.now() - alarm.scaling_criteria.scaling_policy.last_scale
+            log.debug("last_scale: %s", alarm.scaling_criteria.scaling_policy.last_scale)
+            log.debug("now: %s", datetime.datetime.now())
+            log.debug("delta: %s", delta)
+            if delta.total_seconds() < alarm.scaling_criteria.scaling_policy.cooldown_time:
+                log.info("Time between last scale and now is less than cooldown time. Skipping.")
+                return
+            log.info("Sending scaling action message for ns: %s", nsr_id)
+            await self.lcm_client.scale(nsr_id,
+                                        alarm.scaling_criteria.scaling_policy.scaling_group.name,
+                                        alarm.vnf_member_index,
+                                        alarm.action)
+            alarm.scaling_criteria.scaling_policy.last_scale = datetime.datetime.now()
+            alarm.scaling_criteria.scaling_policy.save()
+        except ScalingAlarm.DoesNotExist:
+            log.info("There is no action configured for alarm %s.", alarm_uuid)
+
+    async def _handle_instantiated(self, content):
+        log.debug("_handle_instantiated: %s", content)
+        nslcmop_id = content['nslcmop_id']
+        nslcmop = self.db_client.get_nslcmop(nslcmop_id)
+        if nslcmop['operationState'] == 'COMPLETED' or nslcmop['operationState'] == 'PARTIALLY_COMPLETED':
+            nsr_id = nslcmop['nsInstanceId']
+            log.info("Configuring scaling groups for network service with nsr_id: %s", nsr_id)
+            await self._configure_scaling_groups(nsr_id)
+        else:
+            log.info(
+                "Network service is not in COMPLETED or PARTIALLY_COMPLETED state. "
+                "Current state is %s. Skipping...",
+                nslcmop['operationState'])
+
+    async def _handle_scaled(self, content):
+        log.debug("_handle_scaled: %s", content)
+        nslcmop_id = content['nslcmop_id']
+        nslcmop = self.db_client.get_nslcmop(nslcmop_id)
+        if nslcmop['operationState'] == 'COMPLETED' or nslcmop['operationState'] == 'PARTIALLY_COMPLETED':
+            nsr_id = nslcmop['nsInstanceId']
+            log.info("Configuring scaling groups for network service with nsr_id: %s", nsr_id)
+            await self._configure_scaling_groups(nsr_id)
+            log.info("Checking for orphaned alarms to be deleted for network service with nsr_id: %s", nsr_id)
+            await self._delete_orphaned_alarms(nsr_id)
+        else:
+            log.info(
+                "Network service is not in COMPLETED or PARTIALLY_COMPLETED state. "
+                "Current state is %s. Skipping...",
+                nslcmop['operationState'])
+
+    async def _handle_terminated(self, content):
+        log.debug("_handle_deleted: %s", content)
+        nsr_id = content['nsr_id']
+        if content['operationState'] == 'COMPLETED' or content['operationState'] == 'PARTIALLY_COMPLETED':
+            log.info("Deleting scaling groups and alarms for network service with nsr_id: %s", nsr_id)
+            await self._delete_scaling_groups(nsr_id)
+        else:
+            log.info(
+                "Network service is not in COMPLETED or PARTIALLY_COMPLETED state. "
+                "Current state is %s. Skipping...",
+                content['operationState'])
+
+    async def _configure_scaling_groups(self, nsr_id: str):
+        log.debug("_configure_scaling_groups: %s", nsr_id)
+        alarms_created = []
+        with database.db.atomic() as tx:
+            try:
+                vnfrs = self.db_client.get_vnfrs(nsr_id)
+                for vnfr in vnfrs:
+                    log.info("Processing vnfr: %s", vnfr)
+                    vnfd = self.db_client.get_vnfd(vnfr['vnfd-id'])
+                    log.info("Looking for vnfd %s", vnfr['vnfd-id'])
+                    if 'scaling-group-descriptor' not in vnfd:
+                        continue
+                    scaling_groups = vnfd['scaling-group-descriptor']
+                    vnf_monitoring_params = vnfd['monitoring-param']
+                    for scaling_group in scaling_groups:
+                        try:
+                            scaling_group_record = ScalingGroup.select().where(
+                                ScalingGroup.nsr_id == nsr_id,
+                                ScalingGroup.vnf_member_index == int(vnfr['member-vnf-index-ref']),
+                                ScalingGroup.name == scaling_group['name']
+                            ).get()
+                            log.info("Found existing scaling group record in DB...")
+                        except ScalingGroup.DoesNotExist:
+                            log.info("Creating scaling group record in DB...")
+                            scaling_group_record = ScalingGroup.create(
+                                nsr_id=nsr_id,
+                                vnf_member_index=vnfr['member-vnf-index-ref'],
+                                name=scaling_group['name'],
+                                content=json.dumps(scaling_group)
+                            )
+                            log.info(
+                                "Created scaling group record in DB : nsr_id=%s, vnf_member_index=%s, name=%s",
+                                scaling_group_record.nsr_id,
+                                scaling_group_record.vnf_member_index,
+                                scaling_group_record.name)
+                        for scaling_policy in scaling_group['scaling-policy']:
+                            if scaling_policy['scaling-type'] != 'automatic':
+                                continue
+                            try:
+                                scaling_policy_record = ScalingPolicy.select().join(ScalingGroup).where(
+                                    ScalingPolicy.name == scaling_policy['name'],
+                                    ScalingGroup.id == scaling_group_record.id
+                                ).get()
+                                log.info("Found existing scaling policy record in DB...")
+                            except ScalingPolicy.DoesNotExist:
+                                log.info("Creating scaling policy record in DB...")
+                                scaling_policy_record = ScalingPolicy.create(
+                                    nsr_id=nsr_id,
+                                    name=scaling_policy['name'],
+                                    cooldown_time=scaling_policy['cooldown-time'],
+                                    scaling_group=scaling_group_record
+                                )
+                                log.info("Created scaling policy record in DB : name=%s, scaling_group.name=%s",
+                                         scaling_policy_record.name,
+                                         scaling_policy_record.scaling_group.name)
+
                             for scaling_criteria in scaling_policy['scaling-criteria']:
+                                try:
+                                    scaling_criteria_record = ScalingCriteria.select().join(ScalingPolicy).where(
+                                        ScalingPolicy.id == scaling_policy_record.id,
+                                        ScalingCriteria.name == scaling_criteria['name']
+                                    ).get()
+                                    log.info("Found existing scaling criteria record in DB...")
+                                except ScalingCriteria.DoesNotExist:
+                                    log.info("Creating scaling criteria record in DB...")
+                                    scaling_criteria_record = ScalingCriteria.create(
+                                        nsr_id=nsr_id,
+                                        name=scaling_criteria['name'],
+                                        scaling_policy=scaling_policy_record
+                                    )
+                                    log.info(
+                                        "Created scaling criteria record in DB : name=%s, scaling_policy.name=%s",
+                                        scaling_criteria_record.name,
+                                        scaling_criteria_record.scaling_policy.name)
+
                                 vnf_monitoring_param = next(
-                                    filter(lambda param: param['id'] == scaling_criteria['vnf-monitoring-param-ref'],
-                                           vnf_monitoring_params))
-                                # TODO: Add support for non-nfvi metrics
-                                vdu_monitoring_param = next(
                                     filter(
-                                        lambda param: param['id'] == vnf_monitoring_param['vdu-monitoring-param-ref'],
-                                        vdu_monitoring_params))
-                                alarm_uuid = self.mon_client.create_alarm(
-                                    metric_name=vdu_monitoring_param['nfvi-metric'],
-                                    ns_id=nsr_id,
-                                    vdu_name=vdur['name'],
-                                    vnf_member_index=vnfr['member-vnf-index-ref'],
-                                    threshold=scaling_criteria['scale-in-threshold'],
-                                    operation=scaling_criteria['scale-in-relational-operation'],
-                                    statistic=vnf_monitoring_param['aggregation-type']
-                                )
-                                ScalingAlarm.create(
-                                    alarm_id=alarm_uuid,
-                                    action='scale_in',
-                                    vnf_member_index=int(vnfr['member-vnf-index-ref']),
-                                    vdu_name=vdur['name'],
-                                    scaling_record=scaling_record
-                                )
-                                alarm_uuid = self.mon_client.create_alarm(
-                                    metric_name=vdu_monitoring_param['nfvi-metric'],
-                                    ns_id=nsr_id,
-                                    vdu_name=vdur['name'],
-                                    vnf_member_index=vnfr['member-vnf-index-ref'],
-                                    threshold=scaling_criteria['scale-out-threshold'],
-                                    operation=scaling_criteria['scale-out-relational-operation'],
-                                    statistic=vnf_monitoring_param['aggregation-type']
-                                )
-                                ScalingAlarm.create(
-                                    alarm_id=alarm_uuid,
-                                    action='scale_out',
-                                    vnf_member_index=int(vnfr['member-vnf-index-ref']),
-                                    vdu_name=vdur['name'],
-                                    scaling_record=scaling_record
+                                        lambda param: param['id'] == scaling_criteria[
+                                            'vnf-monitoring-param-ref'
+                                        ],
+                                        vnf_monitoring_params)
                                 )
+                                if 'vdu-monitoring-param' in vnf_monitoring_param:
+                                    vdurs = list(
+                                        filter(
+                                            lambda vdur: vdur['vdu-id-ref'] == vnf_monitoring_param
+                                            ['vdu-monitoring-param']
+                                            ['vdu-ref'],
+                                            vnfr['vdur']
+                                        )
+                                    )
+                                elif 'vdu-metric' in vnf_monitoring_param:
+                                    vdurs = list(
+                                        filter(
+                                            lambda vdur: vdur['vdu-id-ref'] == vnf_monitoring_param
+                                            ['vdu-metric']
+                                            ['vdu-ref'],
+                                            vnfr['vdur']
+                                        )
+                                    )
+                                elif 'vnf-metric' in vnf_monitoring_param:
+                                    vdu = VnfdUtils.get_mgmt_vdu(vnfd)
+                                    vdurs = list(
+                                        filter(
+                                            lambda vdur: vdur['vdu-id-ref'] == vdu['id'],
+                                            vnfr['vdur']
+                                        )
+                                    )
+                                else:
+                                    log.warning(
+                                        "Scaling criteria is referring to a vnf-monitoring-param that does not "
+                                        "contain a reference to a vdu or vnf metric.")
+                                    continue
+                                for vdur in vdurs:
+                                    log.info("Creating alarm for vdur %s ", vdur)
+                                    try:
+                                        (ScalingAlarm.select()
+                                         .join(ScalingCriteria)
+                                         .join(ScalingPolicy)
+                                         .join(ScalingGroup)
+                                         .where(
+                                            ScalingAlarm.vdu_name == vdur['name'],
+                                            ScalingCriteria.name == scaling_criteria['name'],
+                                            ScalingPolicy.name == scaling_policy['name'],
+                                            ScalingGroup.nsr_id == nsr_id
+                                        ).get())
+                                        log.debug("vdu %s already has an alarm configured", vdur['name'])
+                                        continue
+                                    except ScalingAlarm.DoesNotExist:
+                                        pass
+                                    alarm_uuid = await self.mon_client.create_alarm(
+                                        metric_name=vnf_monitoring_param['id'],
+                                        ns_id=nsr_id,
+                                        vdu_name=vdur['name'],
+                                        vnf_member_index=vnfr['member-vnf-index-ref'],
+                                        threshold=scaling_criteria['scale-in-threshold'],
+                                        operation=scaling_criteria['scale-in-relational-operation'],
+                                        statistic=vnf_monitoring_param['aggregation-type']
+                                    )
+                                    alarm = ScalingAlarm.create(
+                                        alarm_uuid=alarm_uuid,
+                                        action='scale_in',
+                                        vnf_member_index=int(vnfr['member-vnf-index-ref']),
+                                        vdu_name=vdur['name'],
+                                        scaling_criteria=scaling_criteria_record
+                                    )
+                                    alarms_created.append(alarm)
+                                    alarm_uuid = await self.mon_client.create_alarm(
+                                        metric_name=vnf_monitoring_param['id'],
+                                        ns_id=nsr_id,
+                                        vdu_name=vdur['name'],
+                                        vnf_member_index=vnfr['member-vnf-index-ref'],
+                                        threshold=scaling_criteria['scale-out-threshold'],
+                                        operation=scaling_criteria['scale-out-relational-operation'],
+                                        statistic=vnf_monitoring_param['aggregation-type']
+                                    )
+                                    alarm = ScalingAlarm.create(
+                                        alarm_uuid=alarm_uuid,
+                                        action='scale_out',
+                                        vnf_member_index=int(vnfr['member-vnf-index-ref']),
+                                        vdu_name=vdur['name'],
+                                        scaling_criteria=scaling_criteria_record
+                                    )
+                                    alarms_created.append(alarm)
+
+            except Exception as e:
+                log.exception("Error configuring scaling groups:")
+                tx.rollback()
+                if len(alarms_created) > 0:
+                    log.info("Cleaning alarm resources in MON")
+                    for alarm in alarms_created:
+                        await self.mon_client.delete_alarm(alarm.scaling_criteria.scaling_policy.scaling_group.nsr_id,
+                                                           alarm.vnf_member_index,
+                                                           alarm.vdu_name,
+                                                           alarm.alarm_uuid)
+                raise e
+
+    async def _delete_scaling_groups(self, nsr_id: str):
+        with database.db.atomic() as tx:
+            try:
+                for scaling_group in ScalingGroup.select().where(ScalingGroup.nsr_id == nsr_id):
+                    for scaling_policy in scaling_group.scaling_policies:
+                        for scaling_criteria in scaling_policy.scaling_criterias:
+                            for alarm in scaling_criteria.scaling_alarms:
+                                try:
+                                    await self.mon_client.delete_alarm(
+                                        alarm.scaling_criteria.scaling_policy.scaling_group.nsr_id,
+                                        alarm.vnf_member_index,
+                                        alarm.vdu_name,
+                                        alarm.alarm_uuid)
+                                except ValueError:
+                                    log.exception("Error deleting alarm in MON %s", alarm.alarm_uuid)
+                                alarm.delete_instance()
+                            scaling_criteria.delete_instance()
+                        scaling_policy.delete_instance()
+                    scaling_group.delete_instance()
+
+            except Exception as e:
+                log.exception("Error deleting scaling groups and alarms:")
+                tx.rollback()
+                raise e
+
+    async def _delete_orphaned_alarms(self, nsr_id):
+        with database.db.atomic() as tx:
+            try:
+                for scaling_group in ScalingGroup.select().where(ScalingGroup.nsr_id == nsr_id):
+                    for scaling_policy in scaling_group.scaling_policies:
+                        for scaling_criteria in scaling_policy.scaling_criterias:
+                            for alarm in scaling_criteria.scaling_alarms:
+                                try:
+                                    self.db_client.get_vdur(nsr_id, alarm.vnf_member_index, alarm.vdu_name)
+                                except VdurNotFound:
+                                    log.info("Deleting orphaned alarm %s", alarm.alarm_uuid)
+                                    try:
+                                        await self.mon_client.delete_alarm(
+                                            alarm.scaling_criteria.scaling_policy.scaling_group.nsr_id,
+                                            alarm.vnf_member_index,
+                                            alarm.vdu_name,
+                                            alarm.alarm_uuid)
+                                    except ValueError:
+                                        log.exception("Error deleting alarm in MON %s", alarm.alarm_uuid)
+                                    alarm.delete_instance()
+
+            except Exception as e:
+                log.exception("Error deleting orphaned alarms:")
+                tx.rollback()
+                raise e