Fix for bug 1450 high memory consumption
[osm/MON.git] / osm_mon / evaluator / evaluator.py
index b040198..6ca0dc5 100644 (file)
 # For those usages not covered by the Apache License, Version 2.0 please
 # contact: bdiaz@whitestack.com or glavado@whitestack.com
 ##
-import json
+import asyncio
 import logging
-import multiprocessing
 import time
+import concurrent.futures
 
-from osm_common.dbbase import DbException
-
-from osm_mon.collector.collector import VIM_COLLECTORS
-from osm_mon.collector.collectors.juju import VCACollector
-from osm_mon.core.common_db import CommonDbClient
-from osm_mon.core.database import DatabaseManager, Alarm
-from osm_mon.core.message_bus.producer import Producer
+from osm_mon.core.config import Config
+from osm_mon.core.message_bus_client import MessageBusClient
+from osm_mon.core.models import Alarm
 from osm_mon.core.response import ResponseBuilder
-from osm_mon.core.settings import Config
+from osm_mon.evaluator.service import EvaluatorService, AlarmStatus
 
 log = logging.getLogger(__name__)
 
 
 class Evaluator:
-    def __init__(self):
-        self.common_db = CommonDbClient()
-        self.plugins = []
-        self.database_manager = DatabaseManager()
-        self.database_manager.create_tables()
-        self.queue = multiprocessing.Queue()
-
-    def _evaluate_vim_metric(self,
-                             nsr_id: str,
-                             vnf_member_index: int,
-                             vdur_name: str,
-                             nfvi_metric_name: str,
-                             vim_account_id: str,
-                             alarm: Alarm):
-        vim_type = self.database_manager.get_vim_type(vim_account_id)
-        if vim_type in VIM_COLLECTORS:
-            collector = VIM_COLLECTORS[vim_type](vim_account_id)
-            metric = collector.collect_one(nsr_id, vnf_member_index, vdur_name, nfvi_metric_name)
-            if alarm.operation.upper() == 'GT':
-                if metric.value > alarm.threshold:
-                    self.queue.put(alarm)
-            elif alarm.operation.upper() == 'LT':
-                if metric.value < alarm.threshold:
-                    self.queue.put(alarm)
-
-        else:
-            log.debug("vimtype %s is not supported.", vim_type)
-
-    def _evaluate_vca_metric(self,
-                             nsr_id: str,
-                             vnf_member_index: int,
-                             vdur_name: str,
-                             vnf_metric_name: str,
-                             alarm: Alarm):
-        collector = VCACollector()
-        metric = collector.collect_one(nsr_id, vnf_member_index, vdur_name, vnf_metric_name)
-        if alarm.operation.upper() == 'GT':
-            if metric.value > alarm.threshold:
-                self.queue.put(alarm)
-        elif alarm.operation.upper() == 'LT':
-            if metric.value < alarm.threshold:
-                self.queue.put(alarm)
+
+    def __init__(self, config: Config, loop=None):
+        self.conf = config
+        if not loop:
+            loop = asyncio.get_event_loop()
+        self.loop = loop
+        self.service = EvaluatorService(config)
+        self.msg_bus = MessageBusClient(config)
 
     def evaluate_forever(self):
-        log.debug('collect_forever')
-        cfg = Config.instance()
+        log.debug('evaluate_forever')
         while True:
             try:
                 self.evaluate()
-                time.sleep(cfg.OSMMON_EVALUATOR_INTERVAL)
+                time.sleep(int(self.conf.get('evaluator', 'interval')))
             except Exception:
                 log.exception("Error evaluating alarms")
 
     def evaluate(self):
-        processes = []
-        for alarm in Alarm.select():
-            try:
-                vnfr = self.common_db.get_vnfr(alarm.nsr_id, alarm.vnf_member_index)
-            except DbException:
-                log.exception("Error getting vnfr: ")
-                continue
-            vnfd = self.common_db.get_vnfd(vnfr['vnfd-id'])
+        log.debug('evaluate')
+        alarms_tuples = self.service.evaluate_alarms()
+        # Starting evaluate executor pool with pool size process_pool_size. Default process_pool_size is 20
+        with concurrent.futures.ProcessPoolExecutor(self.conf.get('evaluator', 'process_pool_size')) as executor:
+            log.info('Started evaluate process pool with pool size %s' % (self.conf.get('evaluator',
+                                                                                        'process_pool_size')))
+            evaluate_futures = []
+            for alarm, status in alarms_tuples:
+                evaluate_futures.append(executor.submit(Evaluator._notify_alarm, self.conf, alarm, status))
+
             try:
-                vdur = next(filter(lambda vdur: vdur['name'] == alarm.vdur_name, vnfr['vdur']))
-            except StopIteration:
-                log.warning("No vdur found with name %s for alarm %s", alarm.vdur_name, alarm.id)
-                continue
-            vdu = next(filter(lambda vdu: vdu['id'] == vdur['vdu-id-ref'], vnfd['vdu']))
-            vnf_monitoring_param = next(
-                filter(lambda param: param['id'] == alarm.monitoring_param, vnfd['monitoring-param']))
-            nsr_id = vnfr['nsr-id-ref']
-            vnf_member_index = vnfr['member-vnf-index-ref']
-            vdur_name = vdur['name']
-            if 'vdu-monitoring-param' in vnf_monitoring_param:
-                vdu_monitoring_param = next(filter(
-                    lambda param: param['id'] == vnf_monitoring_param['vdu-monitoring-param'][
-                        'vdu-monitoring-param-ref'], vdu['monitoring-param']))
-                nfvi_metric = vdu_monitoring_param['nfvi-metric']
-
-                vim_account_id = self.common_db.get_vim_account_id(nsr_id, vnf_member_index)
-                p = multiprocessing.Process(target=self._evaluate_vim_metric,
-                                            args=(nsr_id,
-                                                  vnf_member_index,
-                                                  vdur_name,
-                                                  nfvi_metric,
-                                                  vim_account_id,
-                                                  alarm))
-                processes.append(p)
-                p.start()
-            if 'vdu-metric' in vnf_monitoring_param:
-                vnf_metric_name = vnf_monitoring_param['vdu-metric']['vdu-metric-name-ref']
-                p = multiprocessing.Process(target=self._evaluate_vca_metric,
-                                            args=(nsr_id,
-                                                  vnf_member_index,
-                                                  vdur_name,
-                                                  vnf_metric_name,
-                                                  alarm))
-                processes.append(p)
-                p.start()
-            if 'vnf-metric' in vnf_monitoring_param:
-                log.warning("vnf-metric is not currently supported.")
-                continue
-
-        for process in processes:
-            process.join()
-        triggered_alarms = []
-        while not self.queue.empty():
-            triggered_alarms.append(self.queue.get())
-        for alarm in triggered_alarms:
-            self.notify_alarm(alarm)
-            p = multiprocessing.Process(target=self.notify_alarm,
-                                        args=(alarm,))
-            p.start()
-
-    def notify_alarm(self, alarm: Alarm):
+                # Wait for future calls to complete till process_timeout. Default is 50 seconds
+                for evaluate_future in concurrent.futures.as_completed(evaluate_futures,
+                                                                       self.conf.get('evaluator', 'process_timeout')):
+                    result = evaluate_future.result(timeout=int(self.conf.get('evaluator',
+                                                                              'process_timeout')))
+                    log.debug('result = %s' % (result))
+            except concurrent.futures.TimeoutError as e:
+                # Some processes have not completed due to timeout error
+                log.info('Some processes have not finished due to TimeoutError exception')
+                log.debug('concurrent.futures.TimeoutError exception %s' % (e))
+
+            # Shutting down process pool executor
+            Evaluator._stop_process_pool(executor)
+
+    @staticmethod
+    def _stop_process_pool(executor):
+        log.debug("_stop_process_pool")
+        log.info('Shutting down process pool')
+        try:
+            log.debug('Stopping residual processes in the process pool')
+            for pid, process in executor._processes.items():
+                if process.is_alive():
+                    process.terminate()
+        except Exception as e:
+            log.info("Exception during process termination")
+            log.debug("Exception %s" % (e))
+
+        try:
+            # Shutting down executor
+            log.debug('Shutting down process pool executor')
+            executor.shutdown()
+        except RuntimeError as e:
+            log.info('RuntimeError in shutting down executer')
+            log.debug('RuntimeError %s' % (e))
+        return
+
+    @staticmethod
+    def _notify_alarm(conf: Config, alarm: Alarm, status: AlarmStatus):
+        log.debug("_notify_alarm")
+        resp_message = Evaluator._build_alarm_response(alarm, status)
+        msg_bus = MessageBusClient(conf)
+        loop = asyncio.get_event_loop()
+        log.info("Sent alarm notification: %s", resp_message)
+        loop.run_until_complete(msg_bus.aiowrite('alarm_response', 'notify_alarm', resp_message))
+        return
+
+    @staticmethod
+    def _build_alarm_response(alarm: Alarm, status: AlarmStatus):
+        log.debug("_build_alarm_response")
         response = ResponseBuilder()
+        tags = {}
+        for name, value in alarm.tags.items():
+            tags[name] = value
         now = time.strftime("%d-%m-%Y") + " " + time.strftime("%X")
-        # Generate and send response
-        resp_message = response.generate_response(
+        return response.generate_response(
             'notify_alarm',
-            alarm_id=alarm.id,
-            vdu_name=alarm.vdur_name,
-            vnf_member_index=alarm.vnf_member_index,
-            ns_id=alarm.nsr_id,
-            metric_name=alarm.monitoring_param,
+            alarm_id=alarm.uuid,
+            metric_name=alarm.metric,
             operation=alarm.operation,
             threshold_value=alarm.threshold,
             sev=alarm.severity,
-            status='alarm',
-            date=now)
-        producer = Producer()
-        producer.send(topic='alarm_response', key='notify_alarm', value=json.dumps(resp_message))
-        producer.flush()
-        log.info("Sent alarm notification: %s", resp_message)
+            status=status.value,
+            date=now,
+            tags=tags)