Fix for bug 1450 high memory consumption
[osm/MON.git] / osm_mon / evaluator / evaluator.py
1 # -*- coding: utf-8 -*-
2
3 # Copyright 2018 Whitestack, LLC
4 # *************************************************************
5
6 # This file is part of OSM Monitoring module
7 # All Rights Reserved to Whitestack, LLC
8
9 # Licensed under the Apache License, Version 2.0 (the "License"); you may
10 # not use this file except in compliance with the License. You may obtain
11 # a copy of the License at
12
13 # http://www.apache.org/licenses/LICENSE-2.0
14
15 # Unless required by applicable law or agreed to in writing, software
16 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
17 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
18 # License for the specific language governing permissions and limitations
19 # under the License.
20 # For those usages not covered by the Apache License, Version 2.0 please
21 # contact: bdiaz@whitestack.com or glavado@whitestack.com
22 ##
23 import asyncio
24 import logging
25 import time
26 import concurrent.futures
27
28 from osm_mon.core.config import Config
29 from osm_mon.core.message_bus_client import MessageBusClient
30 from osm_mon.core.models import Alarm
31 from osm_mon.core.response import ResponseBuilder
32 from osm_mon.evaluator.service import EvaluatorService, AlarmStatus
33
34 log = logging.getLogger(__name__)
35
36
37 class Evaluator:
38
39 def __init__(self, config: Config, loop=None):
40 self.conf = config
41 if not loop:
42 loop = asyncio.get_event_loop()
43 self.loop = loop
44 self.service = EvaluatorService(config)
45 self.msg_bus = MessageBusClient(config)
46
47 def evaluate_forever(self):
48 log.debug('evaluate_forever')
49 while True:
50 try:
51 self.evaluate()
52 time.sleep(int(self.conf.get('evaluator', 'interval')))
53 except Exception:
54 log.exception("Error evaluating alarms")
55
56 def evaluate(self):
57 log.debug('evaluate')
58 alarms_tuples = self.service.evaluate_alarms()
59 # Starting evaluate executor pool with pool size process_pool_size. Default process_pool_size is 20
60 with concurrent.futures.ProcessPoolExecutor(self.conf.get('evaluator', 'process_pool_size')) as executor:
61 log.info('Started evaluate process pool with pool size %s' % (self.conf.get('evaluator',
62 'process_pool_size')))
63 evaluate_futures = []
64 for alarm, status in alarms_tuples:
65 evaluate_futures.append(executor.submit(Evaluator._notify_alarm, self.conf, alarm, status))
66
67 try:
68 # Wait for future calls to complete till process_timeout. Default is 50 seconds
69 for evaluate_future in concurrent.futures.as_completed(evaluate_futures,
70 self.conf.get('evaluator', 'process_timeout')):
71 result = evaluate_future.result(timeout=int(self.conf.get('evaluator',
72 'process_timeout')))
73 log.debug('result = %s' % (result))
74 except concurrent.futures.TimeoutError as e:
75 # Some processes have not completed due to timeout error
76 log.info('Some processes have not finished due to TimeoutError exception')
77 log.debug('concurrent.futures.TimeoutError exception %s' % (e))
78
79 # Shutting down process pool executor
80 Evaluator._stop_process_pool(executor)
81
82 @staticmethod
83 def _stop_process_pool(executor):
84 log.debug("_stop_process_pool")
85 log.info('Shutting down process pool')
86 try:
87 log.debug('Stopping residual processes in the process pool')
88 for pid, process in executor._processes.items():
89 if process.is_alive():
90 process.terminate()
91 except Exception as e:
92 log.info("Exception during process termination")
93 log.debug("Exception %s" % (e))
94
95 try:
96 # Shutting down executor
97 log.debug('Shutting down process pool executor')
98 executor.shutdown()
99 except RuntimeError as e:
100 log.info('RuntimeError in shutting down executer')
101 log.debug('RuntimeError %s' % (e))
102 return
103
104 @staticmethod
105 def _notify_alarm(conf: Config, alarm: Alarm, status: AlarmStatus):
106 log.debug("_notify_alarm")
107 resp_message = Evaluator._build_alarm_response(alarm, status)
108 msg_bus = MessageBusClient(conf)
109 loop = asyncio.get_event_loop()
110 log.info("Sent alarm notification: %s", resp_message)
111 loop.run_until_complete(msg_bus.aiowrite('alarm_response', 'notify_alarm', resp_message))
112 return
113
114 @staticmethod
115 def _build_alarm_response(alarm: Alarm, status: AlarmStatus):
116 log.debug("_build_alarm_response")
117 response = ResponseBuilder()
118 tags = {}
119 for name, value in alarm.tags.items():
120 tags[name] = value
121 now = time.strftime("%d-%m-%Y") + " " + time.strftime("%X")
122 return response.generate_response(
123 'notify_alarm',
124 alarm_id=alarm.uuid,
125 metric_name=alarm.metric,
126 operation=alarm.operation,
127 threshold_value=alarm.threshold,
128 sev=alarm.severity,
129 status=status.value,
130 date=now,
131 tags=tags)