From 1c3b669e8a6030de17712758f9a81aa323d7a6d5 Mon Sep 17 00:00:00 2001 From: palsus Date: Tue, 9 Feb 2021 17:03:49 +0000 Subject: [PATCH] Improved mult-processing of mon-evaluator Change-Id: I195b406eed3a008da1efb61a9642bdb63c6e4593 Signed-off-by: palsus (cherry picked from commit c811d6839dac356b54a7cbb39f7687587e9947de) --- osm_mon/core/mon.yaml | 2 + osm_mon/evaluator/evaluator.py | 62 ++++++++++++++----- osm_mon/server/service.py | 4 ++ .../tests/unit/evaluator/test_evaluator.py | 8 +-- 4 files changed, 57 insertions(+), 19 deletions(-) diff --git a/osm_mon/core/mon.yaml b/osm_mon/core/mon.yaml index 2ce2fc1..b7e5efd 100644 --- a/osm_mon/core/mon.yaml +++ b/osm_mon/core/mon.yaml @@ -47,6 +47,8 @@ collector: evaluator: interval: 30 backend: prometheus + process_pool_size: 20 + process_timeout: 50 dashboarder: interval: 30 diff --git a/osm_mon/evaluator/evaluator.py b/osm_mon/evaluator/evaluator.py index 8d4c276..d8589bb 100644 --- a/osm_mon/evaluator/evaluator.py +++ b/osm_mon/evaluator/evaluator.py @@ -22,8 +22,8 @@ ## import asyncio import logging -import multiprocessing import time +import concurrent.futures from osm_mon.core.config import Config from osm_mon.core.message_bus_client import MessageBusClient @@ -56,22 +56,54 @@ class Evaluator: def evaluate(self): log.debug('evaluate') alarms_tuples = self.service.evaluate_alarms() - processes = [] - for alarm, status in alarms_tuples: - p = multiprocessing.Process(target=self.notify_alarm, - args=(alarm, status)) - p.start() - processes.append(p) - for process in processes: - process.join(timeout=10) - - def notify_alarm(self, alarm: Alarm, status: AlarmStatus): - log.debug("notify_alarm") - resp_message = self._build_alarm_response(alarm, status) + # Starting evaluate executor pool with pool size process_pool_size. Default process_pool_size is 20 + with concurrent.futures.ProcessPoolExecutor(self.conf.get('evaluator', 'process_pool_size')) as executor: + log.info('Started evaluate process pool with pool size %s' % (self.conf.get('evaluator', + 'process_pool_size'))) + evaluate_futures = [] + for alarm, status in alarms_tuples: + evaluate_futures.append(executor.submit(Evaluator._notify_alarm, self.conf, alarm, status)) + + try: + # Wait for future calls to complete till process_timeout. Default is 50 seconds + for evaluate_future in concurrent.futures.as_completed(evaluate_futures, + self.conf.get('evaluator', 'process_timeout')): + result = evaluate_future.result(timeout=int(self.conf.get('evaluator', + 'process_timeout'))) + log.debug('result = %s' % (result)) + except concurrent.futures.TimeoutError as e: + # Some processes have not completed due to timeout error + log.info('Some processes have not finished due to TimeoutError exception') + log.debug('concurrent.futures.TimeoutError exception %s' % (e)) + Evaluator._stop_process_pool(executor) + + @staticmethod + def _stop_process_pool(executor): + log.debug("_stop_process_pool") + log.info('Stopping all processes in the process pool') + try: + for pid, process in executor._processes.items(): + if process.is_alive(): + process.terminate() + except Exception as e: + log.info("Exception during process termination") + log.debug("Exception %s" % (e)) + executor.shutdown() + return + + @staticmethod + def _notify_alarm(conf: Config, alarm: Alarm, status: AlarmStatus): + log.debug("_notify_alarm") + resp_message = Evaluator._build_alarm_response(alarm, status) + msg_bus = MessageBusClient(conf) + loop = asyncio.get_event_loop() log.info("Sent alarm notification: %s", resp_message) - self.loop.run_until_complete(self.msg_bus.aiowrite('alarm_response', 'notify_alarm', resp_message)) + loop.run_until_complete(msg_bus.aiowrite('alarm_response', 'notify_alarm', resp_message)) + return - def _build_alarm_response(self, alarm: Alarm, status: AlarmStatus): + @staticmethod + def _build_alarm_response(alarm: Alarm, status: AlarmStatus): + log.debug("_build_alarm_response") response = ResponseBuilder() tags = {} for name, value in alarm.tags.items(): diff --git a/osm_mon/server/service.py b/osm_mon/server/service.py index 60cb3ec..b68b367 100755 --- a/osm_mon/server/service.py +++ b/osm_mon/server/service.py @@ -42,10 +42,14 @@ class ServerService: statistic: str, metric_name: str, tags: dict) -> Alarm: + log.debug("create_alarm") alarm = Alarm(name, severity, threshold, operation, statistic, metric_name, tags) self.common_db.create_alarm(alarm) + log.info("Alarm %s created", alarm.name) return alarm def delete_alarm(self, alarm_uuid: str) -> None: + log.debug("delete_alarm") self.common_db.delete_alarm(alarm_uuid) + log.info("Alarm %s is deleted", alarm_uuid) diff --git a/osm_mon/tests/unit/evaluator/test_evaluator.py b/osm_mon/tests/unit/evaluator/test_evaluator.py index 43cec96..e7b9162 100644 --- a/osm_mon/tests/unit/evaluator/test_evaluator.py +++ b/osm_mon/tests/unit/evaluator/test_evaluator.py @@ -36,10 +36,10 @@ class EvaluatorTest(TestCase): super().setUp() self.config = Config() - @mock.patch('multiprocessing.Process') - @mock.patch.object(Evaluator, "notify_alarm") + @mock.patch('concurrent.futures.ProcessPoolExecutor.submit') + @mock.patch.object(Evaluator, "_notify_alarm") @mock.patch.object(EvaluatorService, "evaluate_alarms") - def test_evaluate(self, evaluate_alarms, notify_alarm, process): + def test_evaluate(self, evaluate_alarms, _notify_alarm, futures): mock_alarm = mock.Mock() mock_alarm.operation = 'gt' mock_alarm.threshold = 50.0 @@ -48,4 +48,4 @@ class EvaluatorTest(TestCase): evaluator = Evaluator(self.config) evaluator.evaluate() - process.assert_called_with(target=notify_alarm, args=(mock_alarm, AlarmStatus.ALARM)) + futures.assert_called_with(Evaluator._notify_alarm, self.config, mock_alarm, AlarmStatus.ALARM) -- 2.25.1