blob: 90c0c176ad48294f9a0aa3a669422ee527f6e2dc [file] [log] [blame]
Benjamin Diaz51f44862018-11-15 10:27:12 -03001# -*- coding: utf-8 -*-
2
3# Copyright 2018 Whitestack, LLC
4# *************************************************************
5
6# This file is part of OSM Monitoring module
7# All Rights Reserved to Whitestack, LLC
8
9# Licensed under the Apache License, Version 2.0 (the "License"); you may
10# not use this file except in compliance with the License. You may obtain
11# a copy of the License at
12
13# http://www.apache.org/licenses/LICENSE-2.0
14
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
17# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
18# License for the specific language governing permissions and limitations
19# under the License.
20# For those usages not covered by the Apache License, Version 2.0 please
21# contact: bdiaz@whitestack.com or glavado@whitestack.com
22##
Benjamin Diaz5ac7c082019-02-06 11:58:00 -030023import asyncio
Benjamin Diaz51f44862018-11-15 10:27:12 -030024import logging
25import multiprocessing
26import time
Benjamin Diaz2bdf4022019-03-06 15:53:56 -030027from enum import Enum
Benjamin Diaz51f44862018-11-15 10:27:12 -030028
Benjamin Diaz83038622019-01-28 19:03:39 -030029import peewee
Benjamin Diaz058d51d2018-11-20 14:01:43 -030030import requests
Benjamin Diaz51f44862018-11-15 10:27:12 -030031from osm_common.dbbase import DbException
32
Benjamin Diaz058d51d2018-11-20 14:01:43 -030033from osm_mon.collector.backends.prometheus import OSM_METRIC_PREFIX
Benjamin Diaz51f44862018-11-15 10:27:12 -030034from osm_mon.core.common_db import CommonDbClient
Benjamin Diaz5ac7c082019-02-06 11:58:00 -030035from osm_mon.core.config import Config
Benjamin Diaz51f44862018-11-15 10:27:12 -030036from osm_mon.core.database import DatabaseManager, Alarm
Benjamin Diaz5ac7c082019-02-06 11:58:00 -030037from osm_mon.core.message_bus_client import MessageBusClient
Benjamin Diaz51f44862018-11-15 10:27:12 -030038from osm_mon.core.response import ResponseBuilder
Benjamin Diaz51f44862018-11-15 10:27:12 -030039
40log = logging.getLogger(__name__)
41
42
Benjamin Diaz2bdf4022019-03-06 15:53:56 -030043class AlarmStatus(Enum):
44 ALARM = 'alarm'
45 OK = 'ok'
46 INSUFFICIENT = 'insufficient-data'
47
48
Benjamin Diaz51f44862018-11-15 10:27:12 -030049class Evaluator:
Benjamin Diaz2bdf4022019-03-06 15:53:56 -030050
Benjamin Diaz5ac7c082019-02-06 11:58:00 -030051 def __init__(self, config: Config, loop=None):
52 self.conf = config
53 if not loop:
54 loop = asyncio.get_event_loop()
55 self.loop = loop
56 self.common_db = CommonDbClient(self.conf)
Benjamin Diaz51f44862018-11-15 10:27:12 -030057 self.plugins = []
Benjamin Diaz5ac7c082019-02-06 11:58:00 -030058 self.database_manager = DatabaseManager(self.conf)
Benjamin Diaz51f44862018-11-15 10:27:12 -030059 self.database_manager.create_tables()
60 self.queue = multiprocessing.Queue()
Benjamin Diaz5ac7c082019-02-06 11:58:00 -030061 self.msg_bus = MessageBusClient(config)
Benjamin Diaz51f44862018-11-15 10:27:12 -030062
Benjamin Diaz058d51d2018-11-20 14:01:43 -030063 def _evaluate_metric(self,
64 nsr_id: str,
65 vnf_member_index: int,
66 vdur_name: str,
67 metric_name: str,
68 alarm: Alarm):
69 log.debug("_evaluate_metric")
70 # TODO: Refactor to fit backend plugin model
Benjamin Diaz058d51d2018-11-20 14:01:43 -030071 query_section = "query={0}{{ns_id=\"{1}\",vdu_name=\"{2}\",vnf_member_index=\"{3}\"}}".format(
72 OSM_METRIC_PREFIX + metric_name, nsr_id, vdur_name, vnf_member_index)
Benjamin Diaz5ac7c082019-02-06 11:58:00 -030073 request_url = self.conf.get('prometheus', 'url') + "/api/v1/query?" + query_section
Benjamin Diaz058d51d2018-11-20 14:01:43 -030074 log.info("Querying Prometheus: %s", request_url)
Benjamin Diaz5ac7c082019-02-06 11:58:00 -030075 r = requests.get(request_url, timeout=int(self.conf.get('global', 'request_timeout')))
Benjamin Diaz058d51d2018-11-20 14:01:43 -030076 if r.status_code == 200:
77 json_response = r.json()
78 if json_response['status'] == 'success':
79 result = json_response['data']['result']
80 if len(result):
81 metric_value = float(result[0]['value'][1])
82 log.info("Metric value: %s", metric_value)
83 if alarm.operation.upper() == 'GT':
84 if metric_value > alarm.threshold:
Benjamin Diaz2bdf4022019-03-06 15:53:56 -030085 self.queue.put((alarm, AlarmStatus.ALARM))
86 else:
87 self.queue.put((alarm, AlarmStatus.OK))
Benjamin Diaz058d51d2018-11-20 14:01:43 -030088 elif alarm.operation.upper() == 'LT':
89 if metric_value < alarm.threshold:
Benjamin Diaz2bdf4022019-03-06 15:53:56 -030090 self.queue.put((alarm, AlarmStatus.ALARM))
91 else:
92 self.queue.put((alarm, AlarmStatus.OK))
Benjamin Diaz058d51d2018-11-20 14:01:43 -030093 else:
94 log.warning("No metric result for alarm %s", alarm.id)
Benjamin Diaz2bdf4022019-03-06 15:53:56 -030095 self.queue.put((alarm, AlarmStatus.INSUFFICIENT))
96
Benjamin Diaz058d51d2018-11-20 14:01:43 -030097 else:
98 log.warning("Prometheus response is not success. Got status %s", json_response['status'])
Benjamin Diaz51f44862018-11-15 10:27:12 -030099 else:
Benjamin Diaz058d51d2018-11-20 14:01:43 -0300100 log.warning("Error contacting Prometheus. Got status code %s: %s", r.status_code, r.text)
Benjamin Diaz51f44862018-11-15 10:27:12 -0300101
102 def evaluate_forever(self):
Benjamin Diaz058d51d2018-11-20 14:01:43 -0300103 log.debug('evaluate_forever')
Benjamin Diaz51f44862018-11-15 10:27:12 -0300104 while True:
105 try:
106 self.evaluate()
Benjamin Diaz5ac7c082019-02-06 11:58:00 -0300107 time.sleep(int(self.conf.get('evaluator', 'interval')))
Benjamin Diaz83038622019-01-28 19:03:39 -0300108 except peewee.PeeweeException:
109 log.exception("Database error evaluating alarms: ")
110 raise
Benjamin Diaz51f44862018-11-15 10:27:12 -0300111 except Exception:
112 log.exception("Error evaluating alarms")
113
114 def evaluate(self):
Benjamin Diaz058d51d2018-11-20 14:01:43 -0300115 log.debug('evaluate')
Benjamin Diaz51f44862018-11-15 10:27:12 -0300116 processes = []
117 for alarm in Alarm.select():
118 try:
119 vnfr = self.common_db.get_vnfr(alarm.nsr_id, alarm.vnf_member_index)
120 except DbException:
121 log.exception("Error getting vnfr: ")
122 continue
123 vnfd = self.common_db.get_vnfd(vnfr['vnfd-id'])
124 try:
125 vdur = next(filter(lambda vdur: vdur['name'] == alarm.vdur_name, vnfr['vdur']))
126 except StopIteration:
127 log.warning("No vdur found with name %s for alarm %s", alarm.vdur_name, alarm.id)
128 continue
129 vdu = next(filter(lambda vdu: vdu['id'] == vdur['vdu-id-ref'], vnfd['vdu']))
130 vnf_monitoring_param = next(
131 filter(lambda param: param['id'] == alarm.monitoring_param, vnfd['monitoring-param']))
132 nsr_id = vnfr['nsr-id-ref']
133 vnf_member_index = vnfr['member-vnf-index-ref']
134 vdur_name = vdur['name']
135 if 'vdu-monitoring-param' in vnf_monitoring_param:
136 vdu_monitoring_param = next(filter(
137 lambda param: param['id'] == vnf_monitoring_param['vdu-monitoring-param'][
138 'vdu-monitoring-param-ref'], vdu['monitoring-param']))
139 nfvi_metric = vdu_monitoring_param['nfvi-metric']
140
Benjamin Diaz058d51d2018-11-20 14:01:43 -0300141 p = multiprocessing.Process(target=self._evaluate_metric,
Benjamin Diaz51f44862018-11-15 10:27:12 -0300142 args=(nsr_id,
143 vnf_member_index,
144 vdur_name,
145 nfvi_metric,
Benjamin Diaz51f44862018-11-15 10:27:12 -0300146 alarm))
147 processes.append(p)
148 p.start()
149 if 'vdu-metric' in vnf_monitoring_param:
150 vnf_metric_name = vnf_monitoring_param['vdu-metric']['vdu-metric-name-ref']
Benjamin Diaz058d51d2018-11-20 14:01:43 -0300151 p = multiprocessing.Process(target=self._evaluate_metric,
Benjamin Diaz51f44862018-11-15 10:27:12 -0300152 args=(nsr_id,
153 vnf_member_index,
154 vdur_name,
155 vnf_metric_name,
156 alarm))
157 processes.append(p)
158 p.start()
159 if 'vnf-metric' in vnf_monitoring_param:
Benjamin Diaz44ebeeb2018-11-24 00:05:11 -0300160 vnf_metric_name = vnf_monitoring_param['vnf-metric']['vnf-metric-name-ref']
161 p = multiprocessing.Process(target=self._evaluate_metric,
162 args=(nsr_id,
163 vnf_member_index,
164 '',
165 vnf_metric_name,
166 alarm))
167 processes.append(p)
168 p.start()
Benjamin Diaz51f44862018-11-15 10:27:12 -0300169
170 for process in processes:
Benjamin Diaz5ac7c082019-02-06 11:58:00 -0300171 process.join(timeout=10)
Benjamin Diaz2bdf4022019-03-06 15:53:56 -0300172 alarms_tuples = []
Benjamin Diaz51f44862018-11-15 10:27:12 -0300173 while not self.queue.empty():
Benjamin Diaz2bdf4022019-03-06 15:53:56 -0300174 alarms_tuples.append(self.queue.get())
175 for alarm, status in alarms_tuples:
Benjamin Diaz51f44862018-11-15 10:27:12 -0300176 p = multiprocessing.Process(target=self.notify_alarm,
Benjamin Diaz2bdf4022019-03-06 15:53:56 -0300177 args=(alarm, status))
Benjamin Diaz51f44862018-11-15 10:27:12 -0300178 p.start()
179
Benjamin Diaz2bdf4022019-03-06 15:53:56 -0300180 def notify_alarm(self, alarm: Alarm, status: AlarmStatus):
Benjamin Diaz058d51d2018-11-20 14:01:43 -0300181 log.debug("notify_alarm")
Benjamin Diaz2bdf4022019-03-06 15:53:56 -0300182 resp_message = self._build_alarm_response(alarm, status)
183 log.info("Sent alarm notification: %s", resp_message)
184 self.loop.run_until_complete(self.msg_bus.aiowrite('alarm_response', 'notify_alarm', resp_message))
185
186 def _build_alarm_response(self, alarm: Alarm, status: AlarmStatus):
Benjamin Diaz51f44862018-11-15 10:27:12 -0300187 response = ResponseBuilder()
188 now = time.strftime("%d-%m-%Y") + " " + time.strftime("%X")
Benjamin Diaz2bdf4022019-03-06 15:53:56 -0300189 return response.generate_response(
Benjamin Diaz51f44862018-11-15 10:27:12 -0300190 'notify_alarm',
Benjamin Diazde3d5702018-11-22 17:27:35 -0300191 alarm_id=alarm.uuid,
Benjamin Diaz51f44862018-11-15 10:27:12 -0300192 vdu_name=alarm.vdur_name,
193 vnf_member_index=alarm.vnf_member_index,
194 ns_id=alarm.nsr_id,
195 metric_name=alarm.monitoring_param,
196 operation=alarm.operation,
197 threshold_value=alarm.threshold,
198 sev=alarm.severity,
Benjamin Diaz2bdf4022019-03-06 15:53:56 -0300199 status=status.value,
Benjamin Diaz51f44862018-11-15 10:27:12 -0300200 date=now)