import datetime
import json
import logging
-from json import JSONDecodeError
-import yaml
-from aiokafka import AIOKafkaConsumer
+import peewee
from osm_policy_module.common.common_db_client import CommonDbClient
from osm_policy_module.common.lcm_client import LcmClient
+from osm_policy_module.common.message_bus_client import MessageBusClient
from osm_policy_module.common.mon_client import MonClient
from osm_policy_module.core import database
from osm_policy_module.core.config import Config
from osm_policy_module.core.database import ScalingGroup, ScalingAlarm, ScalingPolicy, ScalingCriteria, DatabaseManager
+from osm_policy_module.core.exceptions import VdurNotFound
+from osm_policy_module.utils.vnfd import VnfdUtils
log = logging.getLogger(__name__)
-ALLOWED_KAFKA_KEYS = ['instantiated', 'scaled', 'notify_alarm']
+ALLOWED_KAFKA_KEYS = ['instantiated', 'scaled', 'terminated', 'notify_alarm']
class PolicyModuleAgent:
- def __init__(self, loop=None):
- cfg = Config.instance()
+ def __init__(self, config: Config, loop=None):
+ self.conf = config
if not loop:
loop = asyncio.get_event_loop()
self.loop = loop
- self.db_client = CommonDbClient()
- self.mon_client = MonClient(loop=self.loop)
- self.lcm_client = LcmClient(loop=self.loop)
- self.kafka_server = '{}:{}'.format(cfg.OSMPOL_MESSAGE_HOST,
- cfg.OSMPOL_MESSAGE_PORT)
- self.database_manager = DatabaseManager()
+ self.db_client = CommonDbClient(config)
+ self.mon_client = MonClient(config, loop=self.loop)
+ self.lcm_client = LcmClient(config, loop=self.loop)
+ self.database_manager = DatabaseManager(config)
+ self.msg_bus = MessageBusClient(config)
def run(self):
self.loop.run_until_complete(self.start())
async def start(self):
- consumer = AIOKafkaConsumer(
+ topics = [
"ns",
- "alarm_response",
- loop=self.loop,
- bootstrap_servers=self.kafka_server,
- group_id="pol-consumer",
- key_deserializer=bytes.decode,
- value_deserializer=bytes.decode,
- )
- await consumer.start()
- try:
- async for msg in consumer:
- await self._process_msg(msg.topic, msg.key, msg.value)
- finally:
- await consumer.stop()
+ "alarm_response"
+ ]
+ await self.msg_bus.aioread(topics, self._process_msg)
+ log.critical("Exiting...")
async def _process_msg(self, topic, key, msg):
log.debug("_process_msg topic=%s key=%s msg=%s", topic, key, msg)
+ log.info("Message arrived: %s", msg)
try:
if key in ALLOWED_KAFKA_KEYS:
- try:
- content = json.loads(msg)
- except JSONDecodeError:
- content = yaml.safe_load(msg)
- if key == 'instantiated' or key == 'scaled':
- await self._handle_instantiated_or_scaled(content)
+ if key == 'instantiated':
+ await self._handle_instantiated(msg)
+
+ if key == 'scaled':
+ await self._handle_scaled(msg)
+
+ if key == 'terminated':
+ await self._handle_terminated(msg)
if key == 'notify_alarm':
- await self._handle_alarm_notification(content)
+ await self._handle_alarm_notification(msg)
else:
log.debug("Key %s is not in ALLOWED_KAFKA_KEYS", key)
+ except peewee.PeeweeException:
+ log.exception("Database error consuming message: ")
+ raise
except Exception:
log.exception("Error consuming message: ")
async def _handle_alarm_notification(self, content):
log.debug("_handle_alarm_notification: %s", content)
- alarm_id = content['notify_details']['alarm_uuid']
+ alarm_uuid = content['notify_details']['alarm_uuid']
metric_name = content['notify_details']['metric_name']
operation = content['notify_details']['operation']
threshold = content['notify_details']['threshold_value']
vdu_name = content['notify_details']['vdu_name']
vnf_member_index = content['notify_details']['vnf_member_index']
- ns_id = content['notify_details']['ns_id']
+ nsr_id = content['notify_details']['ns_id']
log.info(
"Received alarm notification for alarm %s, \
metric %s, \
vdu_name %s, \
vnf_member_index %s, \
ns_id %s ",
- alarm_id, metric_name, operation, threshold, vdu_name, vnf_member_index, ns_id)
+ alarm_uuid, metric_name, operation, threshold, vdu_name, vnf_member_index, nsr_id)
try:
- alarm = self.database_manager.get_alarm(alarm_id)
+ alarm = self.database_manager.get_alarm(alarm_uuid)
delta = datetime.datetime.now() - alarm.scaling_criteria.scaling_policy.last_scale
log.debug("last_scale: %s", alarm.scaling_criteria.scaling_policy.last_scale)
log.debug("now: %s", datetime.datetime.now())
if delta.total_seconds() < alarm.scaling_criteria.scaling_policy.cooldown_time:
log.info("Time between last scale and now is less than cooldown time. Skipping.")
return
- log.info("Sending scaling action message for ns: %s", alarm_id)
- await self.lcm_client.scale(ns_id,
+ log.info("Sending scaling action message for ns: %s", nsr_id)
+ await self.lcm_client.scale(nsr_id,
alarm.scaling_criteria.scaling_policy.scaling_group.name,
alarm.vnf_member_index,
alarm.action)
alarm.scaling_criteria.scaling_policy.last_scale = datetime.datetime.now()
alarm.scaling_criteria.scaling_policy.save()
except ScalingAlarm.DoesNotExist:
- log.info("There is no action configured for alarm %s.", alarm_id)
+ log.info("There is no action configured for alarm %s.", alarm_uuid)
+
+ async def _handle_instantiated(self, content):
+ log.debug("_handle_instantiated: %s", content)
+ nslcmop_id = content['nslcmop_id']
+ nslcmop = self.db_client.get_nslcmop(nslcmop_id)
+ if nslcmop['operationState'] == 'COMPLETED' or nslcmop['operationState'] == 'PARTIALLY_COMPLETED':
+ nsr_id = nslcmop['nsInstanceId']
+ log.info("Configuring scaling groups for network service with nsr_id: %s", nsr_id)
+ await self._configure_scaling_groups(nsr_id)
+ else:
+ log.info(
+ "Network service is not in COMPLETED or PARTIALLY_COMPLETED state. "
+ "Current state is %s. Skipping...",
+ nslcmop['operationState'])
- async def _handle_instantiated_or_scaled(self, content):
- log.debug("_handle_instantiated_or_scaled: %s", content)
+ async def _handle_scaled(self, content):
+ log.debug("_handle_scaled: %s", content)
nslcmop_id = content['nslcmop_id']
nslcmop = self.db_client.get_nslcmop(nslcmop_id)
if nslcmop['operationState'] == 'COMPLETED' or nslcmop['operationState'] == 'PARTIALLY_COMPLETED':
nsr_id = nslcmop['nsInstanceId']
log.info("Configuring scaling groups for network service with nsr_id: %s", nsr_id)
await self._configure_scaling_groups(nsr_id)
+ log.info("Checking for orphaned alarms to be deleted for network service with nsr_id: %s", nsr_id)
+ await self._delete_orphaned_alarms(nsr_id)
else:
log.info(
"Network service is not in COMPLETED or PARTIALLY_COMPLETED state. "
"Current state is %s. Skipping...",
nslcmop['operationState'])
+ async def _handle_terminated(self, content):
+ log.debug("_handle_deleted: %s", content)
+ nsr_id = content['nsr_id']
+ if content['operationState'] == 'COMPLETED' or content['operationState'] == 'PARTIALLY_COMPLETED':
+ log.info("Deleting scaling groups and alarms for network service with nsr_id: %s", nsr_id)
+ await self._delete_scaling_groups(nsr_id)
+ else:
+ log.info(
+ "Network service is not in COMPLETED or PARTIALLY_COMPLETED state. "
+ "Current state is %s. Skipping...",
+ content['operationState'])
+
async def _configure_scaling_groups(self, nsr_id: str):
log.debug("_configure_scaling_groups: %s", nsr_id)
- # TODO: Add support for non-nfvi metrics
alarms_created = []
with database.db.atomic() as tx:
try:
)
)
elif 'vnf-metric' in vnf_monitoring_param:
- log.warning("vnf-metric is not currently supported.")
- continue
+ vdu = VnfdUtils.get_mgmt_vdu(vnfd)
+ vdurs = list(
+ filter(
+ lambda vdur: vdur['vdu-id-ref'] == vdu['id'],
+ vnfr['vdur']
+ )
+ )
else:
log.warning(
"Scaling criteria is referring to a vnf-monitoring-param that does not "
operation=scaling_criteria['scale-in-relational-operation'],
statistic=vnf_monitoring_param['aggregation-type']
)
- ScalingAlarm.create(
- alarm_id=alarm_uuid,
+ alarm = ScalingAlarm.create(
+ alarm_uuid=alarm_uuid,
action='scale_in',
vnf_member_index=int(vnfr['member-vnf-index-ref']),
vdu_name=vdur['name'],
scaling_criteria=scaling_criteria_record
)
+ alarms_created.append(alarm)
alarm_uuid = await self.mon_client.create_alarm(
metric_name=vnf_monitoring_param['id'],
ns_id=nsr_id,
operation=scaling_criteria['scale-out-relational-operation'],
statistic=vnf_monitoring_param['aggregation-type']
)
- ScalingAlarm.create(
- alarm_id=alarm_uuid,
+ alarm = ScalingAlarm.create(
+ alarm_uuid=alarm_uuid,
action='scale_out',
vnf_member_index=int(vnfr['member-vnf-index-ref']),
vdu_name=vdur['name'],
scaling_criteria=scaling_criteria_record
)
+ alarms_created.append(alarm)
except Exception as e:
log.exception("Error configuring scaling groups:")
if len(alarms_created) > 0:
log.info("Cleaning alarm resources in MON")
for alarm in alarms_created:
- await self.mon_client.delete_alarm(*alarm)
+ await self.mon_client.delete_alarm(alarm.scaling_criteria.scaling_policy.scaling_group.nsr_id,
+ alarm.vnf_member_index,
+ alarm.vdu_name,
+ alarm.alarm_uuid)
+ raise e
+
+ async def _delete_scaling_groups(self, nsr_id: str):
+ with database.db.atomic() as tx:
+ try:
+ for scaling_group in ScalingGroup.select().where(ScalingGroup.nsr_id == nsr_id):
+ for scaling_policy in scaling_group.scaling_policies:
+ for scaling_criteria in scaling_policy.scaling_criterias:
+ for alarm in scaling_criteria.scaling_alarms:
+ try:
+ await self.mon_client.delete_alarm(
+ alarm.scaling_criteria.scaling_policy.scaling_group.nsr_id,
+ alarm.vnf_member_index,
+ alarm.vdu_name,
+ alarm.alarm_uuid)
+ except ValueError:
+ log.exception("Error deleting alarm in MON %s", alarm.alarm_uuid)
+ alarm.delete_instance()
+ scaling_criteria.delete_instance()
+ scaling_policy.delete_instance()
+ scaling_group.delete_instance()
+
+ except Exception as e:
+ log.exception("Error deleting scaling groups and alarms:")
+ tx.rollback()
+ raise e
+
+ async def _delete_orphaned_alarms(self, nsr_id):
+ with database.db.atomic() as tx:
+ try:
+ for scaling_group in ScalingGroup.select().where(ScalingGroup.nsr_id == nsr_id):
+ for scaling_policy in scaling_group.scaling_policies:
+ for scaling_criteria in scaling_policy.scaling_criterias:
+ for alarm in scaling_criteria.scaling_alarms:
+ try:
+ self.db_client.get_vdur(nsr_id, alarm.vnf_member_index, alarm.vdu_name)
+ except VdurNotFound:
+ log.info("Deleting orphaned alarm %s", alarm.alarm_uuid)
+ try:
+ await self.mon_client.delete_alarm(
+ alarm.scaling_criteria.scaling_policy.scaling_group.nsr_id,
+ alarm.vnf_member_index,
+ alarm.vdu_name,
+ alarm.alarm_uuid)
+ except ValueError:
+ log.exception("Error deleting alarm in MON %s", alarm.alarm_uuid)
+ alarm.delete_instance()
+
+ except Exception as e:
+ log.exception("Error deleting orphaned alarms:")
+ tx.rollback()
raise e