blob: 6ca0dc576b01ac8f82e744958aac90be869a07c6 [file] [log] [blame]
# -*- coding: utf-8 -*-
# Copyright 2018 Whitestack, LLC
# *************************************************************
# This file is part of OSM Monitoring module
# All Rights Reserved to Whitestack, LLC
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# For those usages not covered by the Apache License, Version 2.0 please
# contact: bdiaz@whitestack.com or glavado@whitestack.com
##
import asyncio
import logging
import time
import concurrent.futures
from osm_mon.core.config import Config
from osm_mon.core.message_bus_client import MessageBusClient
from osm_mon.core.models import Alarm
from osm_mon.core.response import ResponseBuilder
from osm_mon.evaluator.service import EvaluatorService, AlarmStatus
log = logging.getLogger(__name__)
class Evaluator:
def __init__(self, config: Config, loop=None):
self.conf = config
if not loop:
loop = asyncio.get_event_loop()
self.loop = loop
self.service = EvaluatorService(config)
self.msg_bus = MessageBusClient(config)
def evaluate_forever(self):
log.debug('evaluate_forever')
while True:
try:
self.evaluate()
time.sleep(int(self.conf.get('evaluator', 'interval')))
except Exception:
log.exception("Error evaluating alarms")
def evaluate(self):
log.debug('evaluate')
alarms_tuples = self.service.evaluate_alarms()
# Starting evaluate executor pool with pool size process_pool_size. Default process_pool_size is 20
with concurrent.futures.ProcessPoolExecutor(self.conf.get('evaluator', 'process_pool_size')) as executor:
log.info('Started evaluate process pool with pool size %s' % (self.conf.get('evaluator',
'process_pool_size')))
evaluate_futures = []
for alarm, status in alarms_tuples:
evaluate_futures.append(executor.submit(Evaluator._notify_alarm, self.conf, alarm, status))
try:
# Wait for future calls to complete till process_timeout. Default is 50 seconds
for evaluate_future in concurrent.futures.as_completed(evaluate_futures,
self.conf.get('evaluator', 'process_timeout')):
result = evaluate_future.result(timeout=int(self.conf.get('evaluator',
'process_timeout')))
log.debug('result = %s' % (result))
except concurrent.futures.TimeoutError as e:
# Some processes have not completed due to timeout error
log.info('Some processes have not finished due to TimeoutError exception')
log.debug('concurrent.futures.TimeoutError exception %s' % (e))
# Shutting down process pool executor
Evaluator._stop_process_pool(executor)
@staticmethod
def _stop_process_pool(executor):
log.debug("_stop_process_pool")
log.info('Shutting down process pool')
try:
log.debug('Stopping residual processes in the process pool')
for pid, process in executor._processes.items():
if process.is_alive():
process.terminate()
except Exception as e:
log.info("Exception during process termination")
log.debug("Exception %s" % (e))
try:
# Shutting down executor
log.debug('Shutting down process pool executor')
executor.shutdown()
except RuntimeError as e:
log.info('RuntimeError in shutting down executer')
log.debug('RuntimeError %s' % (e))
return
@staticmethod
def _notify_alarm(conf: Config, alarm: Alarm, status: AlarmStatus):
log.debug("_notify_alarm")
resp_message = Evaluator._build_alarm_response(alarm, status)
msg_bus = MessageBusClient(conf)
loop = asyncio.get_event_loop()
log.info("Sent alarm notification: %s", resp_message)
loop.run_until_complete(msg_bus.aiowrite('alarm_response', 'notify_alarm', resp_message))
return
@staticmethod
def _build_alarm_response(alarm: Alarm, status: AlarmStatus):
log.debug("_build_alarm_response")
response = ResponseBuilder()
tags = {}
for name, value in alarm.tags.items():
tags[name] = value
now = time.strftime("%d-%m-%Y") + " " + time.strftime("%X")
return response.generate_response(
'notify_alarm',
alarm_id=alarm.uuid,
metric_name=alarm.metric,
operation=alarm.operation,
threshold_value=alarm.threshold,
sev=alarm.severity,
status=status.value,
date=now,
tags=tags)