Coverage for osm_mon/evaluator/service.py: 88%

58 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-05-06 19:04 +0000

1# -*- coding: utf-8 -*- 

2 

3# Copyright 2018 Whitestack, LLC 

4# ************************************************************* 

5 

6# This file is part of OSM Monitoring module 

7# All Rights Reserved to Whitestack, LLC 

8 

9# Licensed under the Apache License, Version 2.0 (the "License"); you may 

10# not use this file except in compliance with the License. You may obtain 

11# a copy of the License at 

12 

13# http://www.apache.org/licenses/LICENSE-2.0 

14 

15# Unless required by applicable law or agreed to in writing, software 

16# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 

17# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 

18# License for the specific language governing permissions and limitations 

19# under the License. 

20# For those usages not covered by the Apache License, Version 2.0 please 

21# contact: bdiaz@whitestack.com or glavado@whitestack.com 

22## 

23import logging 

24import multiprocessing 

25from enum import Enum 

26from typing import Tuple, List 

27 

28from osm_mon.core.common_db import CommonDbClient 

29from osm_mon.core.config import Config 

30from osm_mon.core.models import Alarm 

31from osm_mon.evaluator.backends.prometheus import PrometheusBackend 

32 

33log = logging.getLogger(__name__) 

34 

35BACKENDS = {"prometheus": PrometheusBackend} 

36 

37 

38class AlarmStatus(Enum): 

39 ALARM = "alarm" 

40 OK = "ok" 

41 INSUFFICIENT = "insufficient-data" 

42 DISABLED = "disabled" 

43 

44 

45class EvaluatorService: 

46 def __init__(self, config: Config): 

47 self.conf = config 

48 self.common_db = CommonDbClient(self.conf) 

49 self.queue = multiprocessing.Queue() 

50 

51 def _get_metric_data(self, metric_name: str, tags: dict): 

52 return BACKENDS[self.conf.get("evaluator", "backend")]( 

53 self.conf 

54 ).get_metric_data(metric_name, tags) 

55 

56 def _evaluate_metric(self, alarm: Alarm): 

57 """Method to evaluate a metric value comparing it against an alarm threshold. 

58 

59 Args: 

60 alarm (Alarm): the alarm with the threshold to compare the metric against 

61 """ 

62 

63 log.debug("_evaluate_metric") 

64 metric_data = self._get_metric_data(alarm.metric, alarm.tags) 

65 if metric_data is None: 

66 log.warning("No metric result for alarm %s", alarm.uuid) 

67 self.queue.put((alarm, AlarmStatus.INSUFFICIENT)) 

68 else: 

69 for metric in metric_data: 

70 metric_value = metric["value"] 

71 metric_labels = metric["labels"] 

72 alarm.extra_labels.update(metric_labels) 

73 if alarm.alarm_status.upper() != AlarmStatus.DISABLED.value.upper(): 

74 if ( 

75 ( 

76 alarm.operation.upper() == "GT" 

77 and metric_value > alarm.threshold 

78 ) 

79 or ( 

80 alarm.operation.upper() == "LT" 

81 and metric_value < alarm.threshold 

82 ) 

83 or ( 

84 alarm.operation.upper() == "GE" 

85 and metric_value >= alarm.threshold 

86 ) 

87 or ( 

88 alarm.operation.upper() == "LE" 

89 and metric_value <= alarm.threshold 

90 ) 

91 or ( 

92 alarm.operation.upper() == "EQ" 

93 and metric_value == alarm.threshold 

94 ) 

95 or ( 

96 alarm.operation.upper() == "NE" 

97 and metric_value != alarm.threshold 

98 ) 

99 ): 

100 self.queue.put((alarm, AlarmStatus.ALARM)) 

101 elif alarm.operation.upper() in ( 

102 "GT", 

103 "LT", 

104 "GE", 

105 "LE", 

106 "EQ", 

107 "NE", 

108 ): 

109 self.queue.put((alarm, AlarmStatus.OK)) 

110 

111 def update_alarm_status(self, alarm_state, uuid): 

112 alarm_data = self.common_db.get_alarm_by_uuid(uuid) 

113 if alarm_data.get("alarm_status").upper() != AlarmStatus.DISABLED.value.upper(): 

114 self.common_db.update_alarm_status(alarm_state, uuid) 

115 return 

116 

117 def update_alarm_extra_labels(self, alarm_labels, uuid): 

118 self.common_db.update_alarm_extra_labels(alarm_labels, uuid) 

119 return 

120 

121 def evaluate_alarms(self) -> List[Tuple[Alarm, AlarmStatus]]: 

122 log.debug("evaluate_alarms") 

123 processes = [] 

124 for alarm in self.common_db.get_alarms(): 

125 p = multiprocessing.Process(target=self._evaluate_metric, args=(alarm,)) 

126 processes.append(p) 

127 p.start() 

128 

129 for process in processes: 

130 process.join(timeout=10) 

131 alarms_tuples = [] 

132 while not self.queue.empty(): 

133 alarms_tuples.append(self.queue.get()) 

134 return alarms_tuples