Coverage for osm_mon/evaluator/service.py: 88%
58 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-05-06 19:04 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-05-06 19:04 +0000
1# -*- coding: utf-8 -*-
3# Copyright 2018 Whitestack, LLC
4# *************************************************************
6# This file is part of OSM Monitoring module
7# All Rights Reserved to Whitestack, LLC
9# Licensed under the Apache License, Version 2.0 (the "License"); you may
10# not use this file except in compliance with the License. You may obtain
11# a copy of the License at
13# http://www.apache.org/licenses/LICENSE-2.0
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
17# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
18# License for the specific language governing permissions and limitations
19# under the License.
20# For those usages not covered by the Apache License, Version 2.0 please
21# contact: bdiaz@whitestack.com or glavado@whitestack.com
22##
23import logging
24import multiprocessing
25from enum import Enum
26from typing import Tuple, List
28from osm_mon.core.common_db import CommonDbClient
29from osm_mon.core.config import Config
30from osm_mon.core.models import Alarm
31from osm_mon.evaluator.backends.prometheus import PrometheusBackend
33log = logging.getLogger(__name__)
35BACKENDS = {"prometheus": PrometheusBackend}
38class AlarmStatus(Enum):
39 ALARM = "alarm"
40 OK = "ok"
41 INSUFFICIENT = "insufficient-data"
42 DISABLED = "disabled"
45class EvaluatorService:
46 def __init__(self, config: Config):
47 self.conf = config
48 self.common_db = CommonDbClient(self.conf)
49 self.queue = multiprocessing.Queue()
51 def _get_metric_data(self, metric_name: str, tags: dict):
52 return BACKENDS[self.conf.get("evaluator", "backend")](
53 self.conf
54 ).get_metric_data(metric_name, tags)
56 def _evaluate_metric(self, alarm: Alarm):
57 """Method to evaluate a metric value comparing it against an alarm threshold.
59 Args:
60 alarm (Alarm): the alarm with the threshold to compare the metric against
61 """
63 log.debug("_evaluate_metric")
64 metric_data = self._get_metric_data(alarm.metric, alarm.tags)
65 if metric_data is None:
66 log.warning("No metric result for alarm %s", alarm.uuid)
67 self.queue.put((alarm, AlarmStatus.INSUFFICIENT))
68 else:
69 for metric in metric_data:
70 metric_value = metric["value"]
71 metric_labels = metric["labels"]
72 alarm.extra_labels.update(metric_labels)
73 if alarm.alarm_status.upper() != AlarmStatus.DISABLED.value.upper():
74 if (
75 (
76 alarm.operation.upper() == "GT"
77 and metric_value > alarm.threshold
78 )
79 or (
80 alarm.operation.upper() == "LT"
81 and metric_value < alarm.threshold
82 )
83 or (
84 alarm.operation.upper() == "GE"
85 and metric_value >= alarm.threshold
86 )
87 or (
88 alarm.operation.upper() == "LE"
89 and metric_value <= alarm.threshold
90 )
91 or (
92 alarm.operation.upper() == "EQ"
93 and metric_value == alarm.threshold
94 )
95 or (
96 alarm.operation.upper() == "NE"
97 and metric_value != alarm.threshold
98 )
99 ):
100 self.queue.put((alarm, AlarmStatus.ALARM))
101 elif alarm.operation.upper() in (
102 "GT",
103 "LT",
104 "GE",
105 "LE",
106 "EQ",
107 "NE",
108 ):
109 self.queue.put((alarm, AlarmStatus.OK))
111 def update_alarm_status(self, alarm_state, uuid):
112 alarm_data = self.common_db.get_alarm_by_uuid(uuid)
113 if alarm_data.get("alarm_status").upper() != AlarmStatus.DISABLED.value.upper():
114 self.common_db.update_alarm_status(alarm_state, uuid)
115 return
117 def update_alarm_extra_labels(self, alarm_labels, uuid):
118 self.common_db.update_alarm_extra_labels(alarm_labels, uuid)
119 return
121 def evaluate_alarms(self) -> List[Tuple[Alarm, AlarmStatus]]:
122 log.debug("evaluate_alarms")
123 processes = []
124 for alarm in self.common_db.get_alarms():
125 p = multiprocessing.Process(target=self._evaluate_metric, args=(alarm,))
126 processes.append(p)
127 p.start()
129 for process in processes:
130 process.join(timeout=10)
131 alarms_tuples = []
132 while not self.queue.empty():
133 alarms_tuples.append(self.queue.get())
134 return alarms_tuples