X-Git-Url: https://osm.etsi.org/gitweb/?a=blobdiff_plain;f=osm_policy_module%2Fcore%2Fagent.py;h=ba35391eff9d3b7fe09c7224894908a5b65c5b03;hb=f35f914f45080e150cc7c6a2928de47a9ae5c848;hp=410413f68a47e066d9bce0eb1b49740849e3aca9;hpb=d0fea7ea94e262be18a0f473eebd7e7239d892b2;p=osm%2FPOL.git diff --git a/osm_policy_module/core/agent.py b/osm_policy_module/core/agent.py index 410413f..ba35391 100644 --- a/osm_policy_module/core/agent.py +++ b/osm_policy_module/core/agent.py @@ -21,6 +21,7 @@ # For those usages not covered by the Apache License, Version 2.0 please # contact: bdiaz@whitestack.com or glavado@whitestack.com ## +import datetime import json import logging import threading @@ -28,25 +29,25 @@ from json import JSONDecodeError import yaml from kafka import KafkaConsumer -from osm_common import dbmongo +from osm_policy_module.common.db_client import DbClient from osm_policy_module.common.lcm_client import LcmClient from osm_policy_module.common.mon_client import MonClient from osm_policy_module.core import database from osm_policy_module.core.config import Config -from osm_policy_module.core.database import ScalingRecord, ScalingAlarm +from osm_policy_module.core.database import ScalingGroup, ScalingAlarm, ScalingPolicy, ScalingCriteria log = logging.getLogger(__name__) +ALLOWED_KAFKA_KEYS = ['instantiated', 'scaled', 'notify_alarm'] + class PolicyModuleAgent: def __init__(self): cfg = Config.instance() - self.common_db = dbmongo.DbMongo() - self.common_db.db_connect({'host': cfg.OSMPOL_DATABASE_HOST, - 'port': int(cfg.OSMPOL_DATABASE_PORT), - 'name': 'osm'}) + self.db_client = DbClient() self.mon_client = MonClient() + self.lcm_client = LcmClient() self.kafka_server = '{}:{}'.format(cfg.OSMPOL_MESSAGE_HOST, cfg.OSMPOL_MESSAGE_PORT) @@ -62,141 +63,215 @@ class PolicyModuleAgent: t.start() def _process_msg(self, topic, key, msg): + log.debug("_process_msg topic=%s key=%s msg=%s", topic, key, msg) try: - # Check for ns instantiation - if key == 'instantiated': - try: - content = json.loads(msg) - except JSONDecodeError: - content = yaml.safe_load(msg) - log.info("Message arrived with topic: %s, key: %s, msg: %s", topic, key, content) - nslcmop_id = content['nslcmop_id'] - nslcmop = self.common_db.get_one(table="nslcmops", - filter={"_id": nslcmop_id}) - if nslcmop['operationState'] == 'COMPLETED' or nslcmop['operationState'] == 'PARTIALLY_COMPLETED': - nsr_id = nslcmop['nsInstanceId'] - log.info("Configuring scaling groups for network service with nsr_id: %s", nsr_id) - self._configure_scaling_groups(nsr_id) - else: - log.info( - "Network service is not in COMPLETED or PARTIALLY_COMPLETED state. " - "Current state is %s. Skipping...", - nslcmop['operationState']) - - if key == 'notify_alarm': + if key in ALLOWED_KAFKA_KEYS: try: content = json.loads(msg) except JSONDecodeError: content = yaml.safe_load(msg) - log.info("Message arrived with topic: %s, key: %s, msg: %s", topic, key, content) - alarm_id = content['notify_details']['alarm_uuid'] - metric_name = content['notify_details']['metric_name'] - operation = content['notify_details']['operation'] - threshold = content['notify_details']['threshold_value'] - vdu_name = content['notify_details']['vdu_name'] - vnf_member_index = content['notify_details']['vnf_member_index'] - ns_id = content['notify_details']['ns_id'] - log.info( - "Received alarm notification for alarm %s, \ - metric %s, \ - operation %s, \ - threshold %s, \ - vdu_name %s, \ - vnf_member_index %s, \ - ns_id %s ", - alarm_id, metric_name, operation, threshold, vdu_name, vnf_member_index, ns_id) - try: - alarm = ScalingAlarm.select().where(ScalingAlarm.alarm_id == alarm_id).get() - lcm_client = LcmClient() - log.info("Sending scaling action message for ns: %s", alarm_id) - lcm_client.scale(alarm.scaling_record.nsr_id, alarm.scaling_record.name, alarm.vnf_member_index, - alarm.action) - except ScalingAlarm.DoesNotExist: - log.info("There is no action configured for alarm %s.", alarm_id) - except Exception: - log.exception("Error consuming message: ") - def _get_vnfr(self, nsr_id: str, member_index: int): - vnfr = self.common_db.get_one(table="vnfrs", - filter={"nsr-id-ref": nsr_id, "member-vnf-index-ref": str(member_index)}) - return vnfr + if key == 'instantiated' or key == 'scaled': + self._handle_instantiated_or_scaled(content) - def _get_vnfrs(self, nsr_id: str): - return [self._get_vnfr(nsr_id, member['member-vnf-index']) for member in - self._get_nsr(nsr_id)['nsd']['constituent-vnfd']] + if key == 'notify_alarm': + self._handle_alarm_notification(content) + else: + log.debug("Key %s is not in ALLOWED_KAFKA_KEYS", key) + except Exception: + log.exception("Error consuming message: ") - def _get_vnfd(self, vnfd_id: str): - vnfr = self.common_db.get_one(table="vnfds", - filter={"_id": vnfd_id}) - return vnfr + def _handle_alarm_notification(self, content): + log.debug("_handle_alarm_notification: %s", content) + alarm_id = content['notify_details']['alarm_uuid'] + metric_name = content['notify_details']['metric_name'] + operation = content['notify_details']['operation'] + threshold = content['notify_details']['threshold_value'] + vdu_name = content['notify_details']['vdu_name'] + vnf_member_index = content['notify_details']['vnf_member_index'] + ns_id = content['notify_details']['ns_id'] + log.info( + "Received alarm notification for alarm %s, \ + metric %s, \ + operation %s, \ + threshold %s, \ + vdu_name %s, \ + vnf_member_index %s, \ + ns_id %s ", + alarm_id, metric_name, operation, threshold, vdu_name, vnf_member_index, ns_id) + try: + alarm = ScalingAlarm.select().where(ScalingAlarm.alarm_id == alarm_id).get() + delta = datetime.datetime.now() - alarm.scaling_criteria.scaling_policy.last_scale + log.debug("last_scale: %s", alarm.scaling_criteria.scaling_policy.last_scale) + log.debug("now: %s", datetime.datetime.now()) + log.debug("delta: %s", delta) + if delta.total_seconds() < alarm.scaling_criteria.scaling_policy.cooldown_time: + log.info("Time between last scale and now is less than cooldown time. Skipping.") + return + log.info("Sending scaling action message for ns: %s", alarm_id) + self.lcm_client.scale(alarm.scaling_criteria.scaling_policy.scaling_group.nsr_id, + alarm.scaling_criteria.scaling_policy.scaling_group.name, + alarm.vnf_member_index, + alarm.action) + alarm.scaling_criteria.scaling_policy.last_scale = datetime.datetime.now() + alarm.scaling_criteria.scaling_policy.save() + except ScalingAlarm.DoesNotExist: + log.info("There is no action configured for alarm %s.", alarm_id) - def _get_nsr(self, nsr_id: str): - nsr = self.common_db.get_one(table="nsrs", - filter={"id": nsr_id}) - return nsr + def _handle_instantiated_or_scaled(self, content): + log.debug("_handle_instantiated_or_scaled: %s", content) + nslcmop_id = content['nslcmop_id'] + nslcmop = self.db_client.get_nslcmop(nslcmop_id) + if nslcmop['operationState'] == 'COMPLETED' or nslcmop['operationState'] == 'PARTIALLY_COMPLETED': + nsr_id = nslcmop['nsInstanceId'] + log.info("Configuring scaling groups for network service with nsr_id: %s", nsr_id) + self._configure_scaling_groups(nsr_id) + else: + log.info( + "Network service is not in COMPLETED or PARTIALLY_COMPLETED state. " + "Current state is %s. Skipping...", + nslcmop['operationState']) def _configure_scaling_groups(self, nsr_id: str): - # TODO(diazb): Check for alarm creation on exception and clean resources if needed. + log.debug("_configure_scaling_groups: %s", nsr_id) + # TODO: Add support for non-nfvi metrics + alarms_created = [] with database.db.atomic(): - vnfrs = self._get_vnfrs(nsr_id) - log.info("Checking %s vnfrs...", len(vnfrs)) - for vnfr in vnfrs: - vnfd = self._get_vnfd(vnfr['vnfd-id']) - log.info("Looking for vnfd %s", vnfr['vnfd-id']) - scaling_groups = vnfd['scaling-group-descriptor'] - vnf_monitoring_params = vnfd['monitoring-param'] - for scaling_group in scaling_groups: - log.info("Creating scaling record in DB...") - scaling_record = ScalingRecord.create( - nsr_id=nsr_id, - name=scaling_group['name'], - content=json.dumps(scaling_group) - ) - log.info("Created scaling record in DB : nsr_id=%s, name=%s, content=%s", - scaling_record.nsr_id, - scaling_record.name, - scaling_record.content) - for scaling_policy in scaling_group['scaling-policy']: - for vdur in vnfd['vdu']: - vdu_monitoring_params = vdur['monitoring-param'] - for scaling_criteria in scaling_policy['scaling-criteria']: - vnf_monitoring_param = next( - filter(lambda param: param['id'] == scaling_criteria['vnf-monitoring-param-ref'], - vnf_monitoring_params)) - # TODO: Add support for non-nfvi metrics - vdu_monitoring_param = next( - filter( - lambda param: param['id'] == vnf_monitoring_param['vdu-monitoring-param-ref'], - vdu_monitoring_params)) - alarm_uuid = self.mon_client.create_alarm( - metric_name=vdu_monitoring_param['nfvi-metric'], - ns_id=nsr_id, - vdu_name=vdur['name'], - vnf_member_index=vnfr['member-vnf-index-ref'], - threshold=scaling_criteria['scale-in-threshold'], - operation=scaling_criteria['scale-in-relational-operation'], - statistic=vnf_monitoring_param['aggregation-type'] - ) - ScalingAlarm.create( - alarm_id=alarm_uuid, - action='scale_in', - vnf_member_index=int(vnfr['member-vnf-index-ref']), - vdu_name=vdur['name'], - scaling_record=scaling_record - ) - alarm_uuid = self.mon_client.create_alarm( - metric_name=vdu_monitoring_param['nfvi-metric'], - ns_id=nsr_id, - vdu_name=vdur['name'], + try: + with database.db.atomic(): + vnfrs = self.db_client.get_vnfrs(nsr_id) + log.info("Found %s vnfrs", len(vnfrs)) + for vnfr in vnfrs: + vnfd = self.db_client.get_vnfd(vnfr['vnfd-id']) + log.info("Looking for vnfd %s", vnfr['vnfd-id']) + scaling_groups = vnfd['scaling-group-descriptor'] + vnf_monitoring_params = vnfd['monitoring-param'] + for scaling_group in scaling_groups: + try: + scaling_group_record = ScalingGroup.select().where( + ScalingGroup.nsr_id == nsr_id, + ScalingGroup.vnf_member_index == int(vnfr['member-vnf-index-ref']), + ScalingGroup.name == scaling_group['name'] + ).get() + log.info("Found existing scaling group record in DB...") + except ScalingGroup.DoesNotExist: + log.info("Creating scaling group record in DB...") + scaling_group_record = ScalingGroup.create( + nsr_id=nsr_id, vnf_member_index=vnfr['member-vnf-index-ref'], - threshold=scaling_criteria['scale-out-threshold'], - operation=scaling_criteria['scale-out-relational-operation'], - statistic=vnf_monitoring_param['aggregation-type'] - ) - ScalingAlarm.create( - alarm_id=alarm_uuid, - action='scale_out', - vnf_member_index=int(vnfr['member-vnf-index-ref']), - vdu_name=vdur['name'], - scaling_record=scaling_record + name=scaling_group['name'], + content=json.dumps(scaling_group) ) + log.info( + "Created scaling group record in DB : nsr_id=%s, vnf_member_index=%s, name=%s", + scaling_group_record.nsr_id, + scaling_group_record.vnf_member_index, + scaling_group_record.name) + for scaling_policy in scaling_group['scaling-policy']: + if scaling_policy['scaling-type'] != 'automatic': + continue + try: + scaling_policy_record = ScalingPolicy.select().join(ScalingGroup).where( + ScalingPolicy.name == scaling_policy['name'], + ScalingGroup.id == scaling_group_record.id + ).get() + log.info("Found existing scaling policy record in DB...") + except ScalingPolicy.DoesNotExist: + log.info("Creating scaling policy record in DB...") + scaling_policy_record = ScalingPolicy.create( + nsr_id=nsr_id, + name=scaling_policy['name'], + cooldown_time=scaling_policy['cooldown-time'], + scaling_group=scaling_group_record + ) + log.info("Created scaling policy record in DB : name=%s, scaling_group.name=%s", + scaling_policy_record.name, + scaling_policy_record.scaling_group.name) + + for scaling_criteria in scaling_policy['scaling-criteria']: + try: + scaling_criteria_record = ScalingCriteria.select().join(ScalingPolicy).where( + ScalingPolicy.id == scaling_policy_record.id, + ScalingCriteria.name == scaling_criteria['name'] + ).get() + log.info("Found existing scaling criteria record in DB...") + except ScalingCriteria.DoesNotExist: + log.info("Creating scaling criteria record in DB...") + scaling_criteria_record = ScalingCriteria.create( + nsr_id=nsr_id, + name=scaling_criteria['name'], + scaling_policy=scaling_policy_record + ) + log.info( + "Created scaling criteria record in DB : name=%s, scaling_policy.name=%s", + scaling_criteria_record.name, + scaling_criteria_record.scaling_policy.name) + + for vdu_ref in scaling_group['vdu']: + vnf_monitoring_param = next( + filter(lambda param: param['id'] == scaling_criteria[ + 'vnf-monitoring-param-ref'], vnf_monitoring_params)) + if not vdu_ref['vdu-id-ref'] == vnf_monitoring_param['vdu-ref']: + continue + vdu = next( + filter(lambda vdu: vdu['id'] == vdu_ref['vdu-id-ref'], vnfd['vdu']) + ) + vdu_monitoring_params = vdu['monitoring-param'] + vdu_monitoring_param = next( + filter( + lambda param: param['id'] == vnf_monitoring_param[ + 'vdu-monitoring-param-ref'], + vdu_monitoring_params)) + vdurs = list( + filter(lambda vdur: vdur['vdu-id-ref'] == vnf_monitoring_param['vdu-ref'], + vnfr['vdur'])) + for vdur in vdurs: + try: + ScalingAlarm.select().join(ScalingCriteria).where( + ScalingAlarm.vdu_name == vdur['name'], + ScalingCriteria.name == scaling_criteria['name'] + ).get() + log.debug("vdu %s already has an alarm configured", vdur['name']) + continue + except ScalingAlarm.DoesNotExist: + pass + alarm_uuid = self.mon_client.create_alarm( + metric_name=vdu_monitoring_param['nfvi-metric'], + ns_id=nsr_id, + vdu_name=vdur['name'], + vnf_member_index=vnfr['member-vnf-index-ref'], + threshold=scaling_criteria['scale-in-threshold'], + operation=scaling_criteria['scale-in-relational-operation'], + statistic=vnf_monitoring_param['aggregation-type'] + ) + ScalingAlarm.create( + alarm_id=alarm_uuid, + action='scale_in', + vnf_member_index=int(vnfr['member-vnf-index-ref']), + vdu_name=vdur['name'], + scaling_criteria=scaling_criteria_record + ) + alarm_uuid = self.mon_client.create_alarm( + metric_name=vdu_monitoring_param['nfvi-metric'], + ns_id=nsr_id, + vdu_name=vdur['name'], + vnf_member_index=vnfr['member-vnf-index-ref'], + threshold=scaling_criteria['scale-out-threshold'], + operation=scaling_criteria['scale-out-relational-operation'], + statistic=vnf_monitoring_param['aggregation-type'] + ) + ScalingAlarm.create( + alarm_id=alarm_uuid, + action='scale_out', + vnf_member_index=int(vnfr['member-vnf-index-ref']), + vdu_name=vdur['name'], + scaling_criteria=scaling_criteria_record + ) + + except Exception as e: + log.exception("Error configuring scaling groups:") + if len(alarms_created) > 0: + log.info("Cleaning alarm resources in MON") + for alarm in alarms_created: + self.mon_client.delete_alarm(*alarm) + raise e