| Benjamin Diaz | 51f4486 | 2018-11-15 10:27:12 -0300 | [diff] [blame] | 1 | # -*- coding: utf-8 -*- |
| 2 | |
| 3 | # Copyright 2018 Whitestack, LLC |
| 4 | # ************************************************************* |
| 5 | |
| 6 | # This file is part of OSM Monitoring module |
| 7 | # All Rights Reserved to Whitestack, LLC |
| 8 | |
| 9 | # Licensed under the Apache License, Version 2.0 (the "License"); you may |
| 10 | # not use this file except in compliance with the License. You may obtain |
| 11 | # a copy of the License at |
| 12 | |
| 13 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 14 | |
| 15 | # Unless required by applicable law or agreed to in writing, software |
| 16 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 17 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 18 | # License for the specific language governing permissions and limitations |
| 19 | # under the License. |
| 20 | # For those usages not covered by the Apache License, Version 2.0 please |
| 21 | # contact: bdiaz@whitestack.com or glavado@whitestack.com |
| 22 | ## |
| Benjamin Diaz | 5ac7c08 | 2019-02-06 11:58:00 -0300 | [diff] [blame] | 23 | import asyncio |
| Benjamin Diaz | 51f4486 | 2018-11-15 10:27:12 -0300 | [diff] [blame] | 24 | import logging |
| Benjamin Diaz | 51f4486 | 2018-11-15 10:27:12 -0300 | [diff] [blame] | 25 | import time |
| palsus | c811d68 | 2021-02-09 17:03:49 +0000 | [diff] [blame] | 26 | import concurrent.futures |
| Benjamin Diaz | 51f4486 | 2018-11-15 10:27:12 -0300 | [diff] [blame] | 27 | |
| Benjamin Diaz | 5ac7c08 | 2019-02-06 11:58:00 -0300 | [diff] [blame] | 28 | from osm_mon.core.config import Config |
| Benjamin Diaz | 5ac7c08 | 2019-02-06 11:58:00 -0300 | [diff] [blame] | 29 | from osm_mon.core.message_bus_client import MessageBusClient |
| almagia | 1b7145f | 2019-11-30 03:56:04 +0100 | [diff] [blame] | 30 | from osm_mon.core.models import Alarm |
| Benjamin Diaz | 51f4486 | 2018-11-15 10:27:12 -0300 | [diff] [blame] | 31 | from osm_mon.core.response import ResponseBuilder |
| Benjamin Diaz | a97bdb3 | 2019-04-10 15:22:22 -0300 | [diff] [blame] | 32 | from osm_mon.evaluator.service import EvaluatorService, AlarmStatus |
| Benjamin Diaz | 51f4486 | 2018-11-15 10:27:12 -0300 | [diff] [blame] | 33 | |
| 34 | log = logging.getLogger(__name__) |
| 35 | |
| 36 | |
| 37 | class Evaluator: |
| Benjamin Diaz | 2bdf402 | 2019-03-06 15:53:56 -0300 | [diff] [blame] | 38 | |
| Benjamin Diaz | 5ac7c08 | 2019-02-06 11:58:00 -0300 | [diff] [blame] | 39 | def __init__(self, config: Config, loop=None): |
| 40 | self.conf = config |
| 41 | if not loop: |
| 42 | loop = asyncio.get_event_loop() |
| 43 | self.loop = loop |
| Benjamin Diaz | a97bdb3 | 2019-04-10 15:22:22 -0300 | [diff] [blame] | 44 | self.service = EvaluatorService(config) |
| Benjamin Diaz | 5ac7c08 | 2019-02-06 11:58:00 -0300 | [diff] [blame] | 45 | self.msg_bus = MessageBusClient(config) |
| Benjamin Diaz | 51f4486 | 2018-11-15 10:27:12 -0300 | [diff] [blame] | 46 | |
| Benjamin Diaz | 51f4486 | 2018-11-15 10:27:12 -0300 | [diff] [blame] | 47 | def evaluate_forever(self): |
| Benjamin Diaz | 058d51d | 2018-11-20 14:01:43 -0300 | [diff] [blame] | 48 | log.debug('evaluate_forever') |
| Benjamin Diaz | 51f4486 | 2018-11-15 10:27:12 -0300 | [diff] [blame] | 49 | while True: |
| 50 | try: |
| 51 | self.evaluate() |
| Benjamin Diaz | 5ac7c08 | 2019-02-06 11:58:00 -0300 | [diff] [blame] | 52 | time.sleep(int(self.conf.get('evaluator', 'interval'))) |
| Benjamin Diaz | 51f4486 | 2018-11-15 10:27:12 -0300 | [diff] [blame] | 53 | except Exception: |
| 54 | log.exception("Error evaluating alarms") |
| 55 | |
| 56 | def evaluate(self): |
| Benjamin Diaz | 058d51d | 2018-11-20 14:01:43 -0300 | [diff] [blame] | 57 | log.debug('evaluate') |
| Benjamin Diaz | a97bdb3 | 2019-04-10 15:22:22 -0300 | [diff] [blame] | 58 | alarms_tuples = self.service.evaluate_alarms() |
| palsus | c811d68 | 2021-02-09 17:03:49 +0000 | [diff] [blame] | 59 | # Starting evaluate executor pool with pool size process_pool_size. Default process_pool_size is 20 |
| 60 | with concurrent.futures.ProcessPoolExecutor(self.conf.get('evaluator', 'process_pool_size')) as executor: |
| 61 | log.info('Started evaluate process pool with pool size %s' % (self.conf.get('evaluator', |
| 62 | 'process_pool_size'))) |
| 63 | evaluate_futures = [] |
| 64 | for alarm, status in alarms_tuples: |
| 65 | evaluate_futures.append(executor.submit(Evaluator._notify_alarm, self.conf, alarm, status)) |
| Benjamin Diaz | 51f4486 | 2018-11-15 10:27:12 -0300 | [diff] [blame] | 66 | |
| palsus | c811d68 | 2021-02-09 17:03:49 +0000 | [diff] [blame] | 67 | try: |
| 68 | # Wait for future calls to complete till process_timeout. Default is 50 seconds |
| 69 | for evaluate_future in concurrent.futures.as_completed(evaluate_futures, |
| 70 | self.conf.get('evaluator', 'process_timeout')): |
| 71 | result = evaluate_future.result(timeout=int(self.conf.get('evaluator', |
| 72 | 'process_timeout'))) |
| 73 | log.debug('result = %s' % (result)) |
| 74 | except concurrent.futures.TimeoutError as e: |
| 75 | # Some processes have not completed due to timeout error |
| 76 | log.info('Some processes have not finished due to TimeoutError exception') |
| 77 | log.debug('concurrent.futures.TimeoutError exception %s' % (e)) |
| palsus | e57f2f1 | 2021-03-01 19:59:41 +0000 | [diff] [blame] | 78 | |
| 79 | # Shutting down process pool executor |
| 80 | Evaluator._stop_process_pool(executor) |
| palsus | c811d68 | 2021-02-09 17:03:49 +0000 | [diff] [blame] | 81 | |
| 82 | @staticmethod |
| 83 | def _stop_process_pool(executor): |
| 84 | log.debug("_stop_process_pool") |
| palsus | e57f2f1 | 2021-03-01 19:59:41 +0000 | [diff] [blame] | 85 | log.info('Shutting down process pool') |
| palsus | c811d68 | 2021-02-09 17:03:49 +0000 | [diff] [blame] | 86 | try: |
| palsus | e57f2f1 | 2021-03-01 19:59:41 +0000 | [diff] [blame] | 87 | log.debug('Stopping residual processes in the process pool') |
| palsus | c811d68 | 2021-02-09 17:03:49 +0000 | [diff] [blame] | 88 | for pid, process in executor._processes.items(): |
| 89 | if process.is_alive(): |
| 90 | process.terminate() |
| 91 | except Exception as e: |
| 92 | log.info("Exception during process termination") |
| 93 | log.debug("Exception %s" % (e)) |
| palsus | e57f2f1 | 2021-03-01 19:59:41 +0000 | [diff] [blame] | 94 | |
| 95 | try: |
| 96 | # Shutting down executor |
| 97 | log.debug('Shutting down process pool executor') |
| 98 | executor.shutdown() |
| 99 | except RuntimeError as e: |
| 100 | log.info('RuntimeError in shutting down executer') |
| 101 | log.debug('RuntimeError %s' % (e)) |
| palsus | c811d68 | 2021-02-09 17:03:49 +0000 | [diff] [blame] | 102 | return |
| 103 | |
| 104 | @staticmethod |
| 105 | def _notify_alarm(conf: Config, alarm: Alarm, status: AlarmStatus): |
| 106 | log.debug("_notify_alarm") |
| 107 | resp_message = Evaluator._build_alarm_response(alarm, status) |
| 108 | msg_bus = MessageBusClient(conf) |
| 109 | loop = asyncio.get_event_loop() |
| Benjamin Diaz | 2bdf402 | 2019-03-06 15:53:56 -0300 | [diff] [blame] | 110 | log.info("Sent alarm notification: %s", resp_message) |
| palsus | c811d68 | 2021-02-09 17:03:49 +0000 | [diff] [blame] | 111 | loop.run_until_complete(msg_bus.aiowrite('alarm_response', 'notify_alarm', resp_message)) |
| 112 | return |
| Benjamin Diaz | 2bdf402 | 2019-03-06 15:53:56 -0300 | [diff] [blame] | 113 | |
| palsus | c811d68 | 2021-02-09 17:03:49 +0000 | [diff] [blame] | 114 | @staticmethod |
| 115 | def _build_alarm_response(alarm: Alarm, status: AlarmStatus): |
| 116 | log.debug("_build_alarm_response") |
| Benjamin Diaz | 51f4486 | 2018-11-15 10:27:12 -0300 | [diff] [blame] | 117 | response = ResponseBuilder() |
| Benjamin Diaz | d5ac6e1 | 2019-09-19 11:59:06 -0300 | [diff] [blame] | 118 | tags = {} |
| Gianpietro Lavado | 1d71df5 | 2019-12-02 17:41:20 +0000 | [diff] [blame] | 119 | for name, value in alarm.tags.items(): |
| 120 | tags[name] = value |
| Benjamin Diaz | 51f4486 | 2018-11-15 10:27:12 -0300 | [diff] [blame] | 121 | now = time.strftime("%d-%m-%Y") + " " + time.strftime("%X") |
| Benjamin Diaz | 2bdf402 | 2019-03-06 15:53:56 -0300 | [diff] [blame] | 122 | return response.generate_response( |
| Benjamin Diaz | 51f4486 | 2018-11-15 10:27:12 -0300 | [diff] [blame] | 123 | 'notify_alarm', |
| Benjamin Diaz | de3d570 | 2018-11-22 17:27:35 -0300 | [diff] [blame] | 124 | alarm_id=alarm.uuid, |
| Benjamin Diaz | d5ac6e1 | 2019-09-19 11:59:06 -0300 | [diff] [blame] | 125 | metric_name=alarm.metric, |
| Benjamin Diaz | 51f4486 | 2018-11-15 10:27:12 -0300 | [diff] [blame] | 126 | operation=alarm.operation, |
| 127 | threshold_value=alarm.threshold, |
| 128 | sev=alarm.severity, |
| Benjamin Diaz | 2bdf402 | 2019-03-06 15:53:56 -0300 | [diff] [blame] | 129 | status=status.value, |
| Benjamin Diaz | d5ac6e1 | 2019-09-19 11:59:06 -0300 | [diff] [blame] | 130 | date=now, |
| 131 | tags=tags) |