blob: 6ca0dc576b01ac8f82e744958aac90be869a07c6 [file] [log] [blame]
Benjamin Diaz51f44862018-11-15 10:27:12 -03001# -*- coding: utf-8 -*-
2
3# Copyright 2018 Whitestack, LLC
4# *************************************************************
5
6# This file is part of OSM Monitoring module
7# All Rights Reserved to Whitestack, LLC
8
9# Licensed under the Apache License, Version 2.0 (the "License"); you may
10# not use this file except in compliance with the License. You may obtain
11# a copy of the License at
12
13# http://www.apache.org/licenses/LICENSE-2.0
14
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
17# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
18# License for the specific language governing permissions and limitations
19# under the License.
20# For those usages not covered by the Apache License, Version 2.0 please
21# contact: bdiaz@whitestack.com or glavado@whitestack.com
22##
Benjamin Diaz5ac7c082019-02-06 11:58:00 -030023import asyncio
Benjamin Diaz51f44862018-11-15 10:27:12 -030024import logging
Benjamin Diaz51f44862018-11-15 10:27:12 -030025import time
palsusc811d682021-02-09 17:03:49 +000026import concurrent.futures
Benjamin Diaz51f44862018-11-15 10:27:12 -030027
Benjamin Diaz5ac7c082019-02-06 11:58:00 -030028from osm_mon.core.config import Config
Benjamin Diaz5ac7c082019-02-06 11:58:00 -030029from osm_mon.core.message_bus_client import MessageBusClient
almagia1b7145f2019-11-30 03:56:04 +010030from osm_mon.core.models import Alarm
Benjamin Diaz51f44862018-11-15 10:27:12 -030031from osm_mon.core.response import ResponseBuilder
Benjamin Diaza97bdb32019-04-10 15:22:22 -030032from osm_mon.evaluator.service import EvaluatorService, AlarmStatus
Benjamin Diaz51f44862018-11-15 10:27:12 -030033
34log = logging.getLogger(__name__)
35
36
37class Evaluator:
Benjamin Diaz2bdf4022019-03-06 15:53:56 -030038
Benjamin Diaz5ac7c082019-02-06 11:58:00 -030039 def __init__(self, config: Config, loop=None):
40 self.conf = config
41 if not loop:
42 loop = asyncio.get_event_loop()
43 self.loop = loop
Benjamin Diaza97bdb32019-04-10 15:22:22 -030044 self.service = EvaluatorService(config)
Benjamin Diaz5ac7c082019-02-06 11:58:00 -030045 self.msg_bus = MessageBusClient(config)
Benjamin Diaz51f44862018-11-15 10:27:12 -030046
Benjamin Diaz51f44862018-11-15 10:27:12 -030047 def evaluate_forever(self):
Benjamin Diaz058d51d2018-11-20 14:01:43 -030048 log.debug('evaluate_forever')
Benjamin Diaz51f44862018-11-15 10:27:12 -030049 while True:
50 try:
51 self.evaluate()
Benjamin Diaz5ac7c082019-02-06 11:58:00 -030052 time.sleep(int(self.conf.get('evaluator', 'interval')))
Benjamin Diaz51f44862018-11-15 10:27:12 -030053 except Exception:
54 log.exception("Error evaluating alarms")
55
56 def evaluate(self):
Benjamin Diaz058d51d2018-11-20 14:01:43 -030057 log.debug('evaluate')
Benjamin Diaza97bdb32019-04-10 15:22:22 -030058 alarms_tuples = self.service.evaluate_alarms()
palsusc811d682021-02-09 17:03:49 +000059 # Starting evaluate executor pool with pool size process_pool_size. Default process_pool_size is 20
60 with concurrent.futures.ProcessPoolExecutor(self.conf.get('evaluator', 'process_pool_size')) as executor:
61 log.info('Started evaluate process pool with pool size %s' % (self.conf.get('evaluator',
62 'process_pool_size')))
63 evaluate_futures = []
64 for alarm, status in alarms_tuples:
65 evaluate_futures.append(executor.submit(Evaluator._notify_alarm, self.conf, alarm, status))
Benjamin Diaz51f44862018-11-15 10:27:12 -030066
palsusc811d682021-02-09 17:03:49 +000067 try:
68 # Wait for future calls to complete till process_timeout. Default is 50 seconds
69 for evaluate_future in concurrent.futures.as_completed(evaluate_futures,
70 self.conf.get('evaluator', 'process_timeout')):
71 result = evaluate_future.result(timeout=int(self.conf.get('evaluator',
72 'process_timeout')))
73 log.debug('result = %s' % (result))
74 except concurrent.futures.TimeoutError as e:
75 # Some processes have not completed due to timeout error
76 log.info('Some processes have not finished due to TimeoutError exception')
77 log.debug('concurrent.futures.TimeoutError exception %s' % (e))
palsuse57f2f12021-03-01 19:59:41 +000078
79 # Shutting down process pool executor
80 Evaluator._stop_process_pool(executor)
palsusc811d682021-02-09 17:03:49 +000081
82 @staticmethod
83 def _stop_process_pool(executor):
84 log.debug("_stop_process_pool")
palsuse57f2f12021-03-01 19:59:41 +000085 log.info('Shutting down process pool')
palsusc811d682021-02-09 17:03:49 +000086 try:
palsuse57f2f12021-03-01 19:59:41 +000087 log.debug('Stopping residual processes in the process pool')
palsusc811d682021-02-09 17:03:49 +000088 for pid, process in executor._processes.items():
89 if process.is_alive():
90 process.terminate()
91 except Exception as e:
92 log.info("Exception during process termination")
93 log.debug("Exception %s" % (e))
palsuse57f2f12021-03-01 19:59:41 +000094
95 try:
96 # Shutting down executor
97 log.debug('Shutting down process pool executor')
98 executor.shutdown()
99 except RuntimeError as e:
100 log.info('RuntimeError in shutting down executer')
101 log.debug('RuntimeError %s' % (e))
palsusc811d682021-02-09 17:03:49 +0000102 return
103
104 @staticmethod
105 def _notify_alarm(conf: Config, alarm: Alarm, status: AlarmStatus):
106 log.debug("_notify_alarm")
107 resp_message = Evaluator._build_alarm_response(alarm, status)
108 msg_bus = MessageBusClient(conf)
109 loop = asyncio.get_event_loop()
Benjamin Diaz2bdf4022019-03-06 15:53:56 -0300110 log.info("Sent alarm notification: %s", resp_message)
palsusc811d682021-02-09 17:03:49 +0000111 loop.run_until_complete(msg_bus.aiowrite('alarm_response', 'notify_alarm', resp_message))
112 return
Benjamin Diaz2bdf4022019-03-06 15:53:56 -0300113
palsusc811d682021-02-09 17:03:49 +0000114 @staticmethod
115 def _build_alarm_response(alarm: Alarm, status: AlarmStatus):
116 log.debug("_build_alarm_response")
Benjamin Diaz51f44862018-11-15 10:27:12 -0300117 response = ResponseBuilder()
Benjamin Diazd5ac6e12019-09-19 11:59:06 -0300118 tags = {}
Gianpietro Lavado1d71df52019-12-02 17:41:20 +0000119 for name, value in alarm.tags.items():
120 tags[name] = value
Benjamin Diaz51f44862018-11-15 10:27:12 -0300121 now = time.strftime("%d-%m-%Y") + " " + time.strftime("%X")
Benjamin Diaz2bdf4022019-03-06 15:53:56 -0300122 return response.generate_response(
Benjamin Diaz51f44862018-11-15 10:27:12 -0300123 'notify_alarm',
Benjamin Diazde3d5702018-11-22 17:27:35 -0300124 alarm_id=alarm.uuid,
Benjamin Diazd5ac6e12019-09-19 11:59:06 -0300125 metric_name=alarm.metric,
Benjamin Diaz51f44862018-11-15 10:27:12 -0300126 operation=alarm.operation,
127 threshold_value=alarm.threshold,
128 sev=alarm.severity,
Benjamin Diaz2bdf4022019-03-06 15:53:56 -0300129 status=status.value,
Benjamin Diazd5ac6e12019-09-19 11:59:06 -0300130 date=now,
131 tags=tags)