osm_mon/evaluator/service.py

   1 # -*- coding: utf-8 -*-
   2
   3 # Copyright 2018 Whitestack, LLC
   4 # *************************************************************
   5
   6 # This file is part of OSM Monitoring module
   7 # All Rights Reserved to Whitestack, LLC
   8
   9 # Licensed under the Apache License, Version 2.0 (the "License"); you may
  10 # not use this file except in compliance with the License. You may obtain
  11 # a copy of the License at
  12
  13 #         http://www.apache.org/licenses/LICENSE-2.0
  14
  15 # Unless required by applicable law or agreed to in writing, software
  16 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  17 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  18 # License for the specific language governing permissions and limitations
  19 # under the License.
  20 # For those usages not covered by the Apache License, Version 2.0 please
  21 # contact: bdiaz@whitestack.com or glavado@whitestack.com
  22 ##
  23 import logging
  24 import multiprocessing
  25 from enum import Enum
  26 from typing import Tuple, List
  27
  28 from osm_mon.core.common_db import CommonDbClient
  29 from osm_mon.core.config import Config
  30 from osm_mon.core.models import Alarm
  31 from osm_mon.evaluator.backends.prometheus import PrometheusBackend
  32
  33 log = logging.getLogger(__name__)
  34
  35 BACKENDS = {"prometheus": PrometheusBackend}
  36
  37
  38 class AlarmStatus(Enum):
  39     ALARM = "alarm"
  40     OK = "ok"
  41     INSUFFICIENT = "insufficient-data"
  42     DISABLED = "disabled"
  43
  44
  45 class EvaluatorService:
  46     def __init__(self, config: Config):
  47         self.conf = config
  48         self.common_db = CommonDbClient(self.conf)
  49         self.queue = multiprocessing.Queue()
  50
  51     def _get_metric_data(self, metric_name: str, tags: dict):
  52         return BACKENDS[self.conf.get("evaluator", "backend")](
  53             self.conf
  54         ).get_metric_data(metric_name, tags)
  55
  56     def _evaluate_metric(self, alarm: Alarm):
  57         """Method to evaluate a metric value comparing it against an alarm threshold.
  58
  59         Args:
  60             alarm (Alarm): the alarm with the threshold to compare the metric against
  61         """
  62
  63         log.debug("_evaluate_metric")
  64         metric_data = self._get_metric_data(alarm.metric, alarm.tags)
  65         if metric_data is None:
  66             log.warning("No metric result for alarm %s", alarm.uuid)
  67             self.queue.put((alarm, AlarmStatus.INSUFFICIENT))
  68         else:
  69             for metric in metric_data:
  70                 metric_value = metric["value"]
  71                 metric_labels = metric["labels"]
  72                 alarm.extra_labels.update(metric_labels)
  73                 if alarm.alarm_status.upper() != AlarmStatus.DISABLED.value.upper():
  74                     if (
  75                         (
  76                             alarm.operation.upper() == "GT"
  77                             and metric_value > alarm.threshold
  78                         )
  79                         or (
  80                             alarm.operation.upper() == "LT"
  81                             and metric_value < alarm.threshold
  82                         )
  83                         or (
  84                             alarm.operation.upper() == "GE"
  85                             and metric_value >= alarm.threshold
  86                         )
  87                         or (
  88                             alarm.operation.upper() == "LE"
  89                             and metric_value <= alarm.threshold
  90                         )
  91                         or (
  92                             alarm.operation.upper() == "EQ"
  93                             and metric_value == alarm.threshold
  94                         )
  95                         or (
  96                             alarm.operation.upper() == "NE"
  97                             and metric_value != alarm.threshold
  98                         )
  99                     ):
 100                         self.queue.put((alarm, AlarmStatus.ALARM))
 101                     elif alarm.operation.upper() in (
 102                         "GT",
 103                         "LT",
 104                         "GE",
 105                         "LE",
 106                         "EQ",
 107                         "NE",
 108                     ):
 109                         self.queue.put((alarm, AlarmStatus.OK))
 110
 111     def update_alarm_status(self, alarm_state, uuid):
 112         alarm_data = self.common_db.get_alarm_by_uuid(uuid)
 113         if alarm_data.get("alarm_status").upper() != AlarmStatus.DISABLED.value.upper():
 114             self.common_db.update_alarm_status(alarm_state, uuid)
 115         return
 116
 117     def update_alarm_extra_labels(self, alarm_labels, uuid):
 118         self.common_db.update_alarm_extra_labels(alarm_labels, uuid)
 119         return
 120
 121     def evaluate_alarms(self) -> List[Tuple[Alarm, AlarmStatus]]:
 122         log.debug("evaluate_alarms")
 123         processes = []
 124         for alarm in self.common_db.get_alarms():
 125             p = multiprocessing.Process(target=self._evaluate_metric, args=(alarm,))
 126             processes.append(p)
 127             p.start()
 128
 129         for process in processes:
 130             process.join(timeout=10)
 131         alarms_tuples = []
 132         while not self.queue.empty():
 133             alarms_tuples.append(self.queue.get())
 134         return alarms_tuples