Improved mult-processing of mon-evaluator 83/10283/12
authorpalsus <subhankar.pal@aricent.com>
Tue, 9 Feb 2021 17:03:49 +0000 (17:03 +0000)
committerpalsus <subhankar.pal@aricent.com>
Thu, 11 Feb 2021 06:15:08 +0000 (07:15 +0100)
Change-Id: I195b406eed3a008da1efb61a9642bdb63c6e4593
Signed-off-by: palsus <subhankar.pal@aricent.com>
osm_mon/core/mon.yaml
osm_mon/evaluator/evaluator.py
osm_mon/server/service.py
osm_mon/tests/unit/evaluator/test_evaluator.py

index 426675a..3c9e27f 100644 (file)
@@ -47,6 +47,8 @@ collector:
 evaluator:
   interval: 30
   backend: prometheus
+  process_pool_size: 20
+  process_timeout: 50
 
 dashboarder:
   interval: 30
index 8d4c276..d8589bb 100644 (file)
@@ -22,8 +22,8 @@
 ##
 import asyncio
 import logging
-import multiprocessing
 import time
+import concurrent.futures
 
 from osm_mon.core.config import Config
 from osm_mon.core.message_bus_client import MessageBusClient
@@ -56,22 +56,54 @@ class Evaluator:
     def evaluate(self):
         log.debug('evaluate')
         alarms_tuples = self.service.evaluate_alarms()
-        processes = []
-        for alarm, status in alarms_tuples:
-            p = multiprocessing.Process(target=self.notify_alarm,
-                                        args=(alarm, status))
-            p.start()
-            processes.append(p)
-        for process in processes:
-            process.join(timeout=10)
-
-    def notify_alarm(self, alarm: Alarm, status: AlarmStatus):
-        log.debug("notify_alarm")
-        resp_message = self._build_alarm_response(alarm, status)
+        # Starting evaluate executor pool with pool size process_pool_size. Default process_pool_size is 20
+        with concurrent.futures.ProcessPoolExecutor(self.conf.get('evaluator', 'process_pool_size')) as executor:
+            log.info('Started evaluate process pool with pool size %s' % (self.conf.get('evaluator',
+                                                                                        'process_pool_size')))
+            evaluate_futures = []
+            for alarm, status in alarms_tuples:
+                evaluate_futures.append(executor.submit(Evaluator._notify_alarm, self.conf, alarm, status))
+
+            try:
+                # Wait for future calls to complete till process_timeout. Default is 50 seconds
+                for evaluate_future in concurrent.futures.as_completed(evaluate_futures,
+                                                                       self.conf.get('evaluator', 'process_timeout')):
+                    result = evaluate_future.result(timeout=int(self.conf.get('evaluator',
+                                                                              'process_timeout')))
+                    log.debug('result = %s' % (result))
+            except concurrent.futures.TimeoutError as e:
+                # Some processes have not completed due to timeout error
+                log.info('Some processes have not finished due to TimeoutError exception')
+                log.debug('concurrent.futures.TimeoutError exception %s' % (e))
+                Evaluator._stop_process_pool(executor)
+
+    @staticmethod
+    def _stop_process_pool(executor):
+        log.debug("_stop_process_pool")
+        log.info('Stopping all processes in the process pool')
+        try:
+            for pid, process in executor._processes.items():
+                if process.is_alive():
+                    process.terminate()
+        except Exception as e:
+            log.info("Exception during process termination")
+            log.debug("Exception %s" % (e))
+        executor.shutdown()
+        return
+
+    @staticmethod
+    def _notify_alarm(conf: Config, alarm: Alarm, status: AlarmStatus):
+        log.debug("_notify_alarm")
+        resp_message = Evaluator._build_alarm_response(alarm, status)
+        msg_bus = MessageBusClient(conf)
+        loop = asyncio.get_event_loop()
         log.info("Sent alarm notification: %s", resp_message)
-        self.loop.run_until_complete(self.msg_bus.aiowrite('alarm_response', 'notify_alarm', resp_message))
+        loop.run_until_complete(msg_bus.aiowrite('alarm_response', 'notify_alarm', resp_message))
+        return
 
-    def _build_alarm_response(self, alarm: Alarm, status: AlarmStatus):
+    @staticmethod
+    def _build_alarm_response(alarm: Alarm, status: AlarmStatus):
+        log.debug("_build_alarm_response")
         response = ResponseBuilder()
         tags = {}
         for name, value in alarm.tags.items():
index 60cb3ec..b68b367 100755 (executable)
@@ -42,10 +42,14 @@ class ServerService:
                      statistic: str,
                      metric_name: str,
                      tags: dict) -> Alarm:
+        log.debug("create_alarm")
         alarm = Alarm(name, severity, threshold, operation, statistic, metric_name, tags)
         self.common_db.create_alarm(alarm)
+        log.info("Alarm %s created", alarm.name)
         return alarm
 
     def delete_alarm(self,
                      alarm_uuid: str) -> None:
+        log.debug("delete_alarm")
         self.common_db.delete_alarm(alarm_uuid)
+        log.info("Alarm %s is deleted", alarm_uuid)
index 43cec96..e7b9162 100644 (file)
@@ -36,10 +36,10 @@ class EvaluatorTest(TestCase):
         super().setUp()
         self.config = Config()
 
-    @mock.patch('multiprocessing.Process')
-    @mock.patch.object(Evaluator, "notify_alarm")
+    @mock.patch('concurrent.futures.ProcessPoolExecutor.submit')
+    @mock.patch.object(Evaluator, "_notify_alarm")
     @mock.patch.object(EvaluatorService, "evaluate_alarms")
-    def test_evaluate(self, evaluate_alarms, notify_alarm, process):
+    def test_evaluate(self, evaluate_alarms, _notify_alarm, futures):
         mock_alarm = mock.Mock()
         mock_alarm.operation = 'gt'
         mock_alarm.threshold = 50.0
@@ -48,4 +48,4 @@ class EvaluatorTest(TestCase):
         evaluator = Evaluator(self.config)
         evaluator.evaluate()
 
-        process.assert_called_with(target=notify_alarm, args=(mock_alarm, AlarmStatus.ALARM))
+        futures.assert_called_with(Evaluator._notify_alarm, self.config, mock_alarm, AlarmStatus.ALARM)