blob: 36b7b52e2f6217afd0c518dfb9357072272b2ead [file] [log] [blame]
bravofc973b572020-10-21 16:58:50 -03001#!/usr/bin/env python
2
3# Copyright 2021 Whitestack, LLC
4# *************************************************************
5
6# This file is part of OSM Monitoring module
7# All Rights Reserved to Whitestack, LLC
8
9# Licensed under the Apache License, Version 2.0 (the "License"); you may
10# not use this file except in compliance with the License. You may obtain
11# a copy of the License at
12
13# http://www.apache.org/licenses/LICENSE-2.0
14
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
17# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
18# License for the specific language governing permissions and limitations
19# under the License.
20
21# For those usages not covered by the Apache License, Version 2.0 please
22# contact: fbravo@whitestack.com
23##
24
bravofc973b572020-10-21 16:58:50 -030025import aiohttp
26import asyncio
bravofc973b572020-10-21 16:58:50 -030027from bson.json_util import dumps
28from bson import ObjectId
aguilardf91f9c32023-04-13 16:57:57 +000029import copy
30from datetime import datetime
31import json
32import os
33import pymongo
34import time
35import yaml
bravofc973b572020-10-21 16:58:50 -030036
garciadeblasefa4c2b2022-09-07 22:35:53 +020037# Env variables
38mongodb_url = os.environ["MONGODB_URL"]
39target_database = os.environ["TARGET_DATABASE"]
40prometheus_config_file = os.environ["PROMETHEUS_CONFIG_FILE"]
garciadeblas09eaa922022-09-07 22:57:53 +020041prometheus_base_config_file = os.environ["PROMETHEUS_BASE_CONFIG_FILE"]
aguilardf91f9c32023-04-13 16:57:57 +000042prometheus_alerts_file = os.environ["PROMETHEUS_ALERTS_FILE"]
43prometheus_base_alerts_file = os.environ["PROMETHEUS_BASE_ALERTS_FILE"]
44
garciadeblasefa4c2b2022-09-07 22:35:53 +020045prometheus_url = os.environ["PROMETHEUS_URL"]
46
bravofc973b572020-10-21 16:58:50 -030047
48def get_jobs(client):
garciadeblasefa4c2b2022-09-07 22:35:53 +020049 return json.loads(dumps(client[target_database].prometheus_jobs.find({})))
50
bravofc973b572020-10-21 16:58:50 -030051
aguilardf91f9c32023-04-13 16:57:57 +000052def get_alerts(client):
53 return json.loads(dumps(client[target_database].alerts.find({"prometheus_config": {"$exists": True}})))
54
55
bravofc973b572020-10-21 16:58:50 -030056def save_successful_jobs(client, jobs):
garciadeblasefa4c2b2022-09-07 22:35:53 +020057 for job in jobs:
58 client[target_database].prometheus_jobs.update_one(
garciadeblas84fe31f2022-11-15 14:12:48 +010059 {"_id": ObjectId(job["_id"]["$oid"])}, {"$set": {"is_active": True}}
garciadeblasefa4c2b2022-09-07 22:35:53 +020060 )
61
bravofc973b572020-10-21 16:58:50 -030062
63def clean_up_job(prometheus_job):
garciadeblasefa4c2b2022-09-07 22:35:53 +020064 cleaned_prometheus_job = copy.deepcopy(prometheus_job)
65 # take out _id and internal keys
66 cleaned_prometheus_job.pop("_id", None)
67 cleaned_prometheus_job.pop("is_active", None)
68 cleaned_prometheus_job.pop("vnfr_id", None)
69 cleaned_prometheus_job.pop("nsr_id", None)
70 return cleaned_prometheus_job
71
bravofc973b572020-10-21 16:58:50 -030072
73def generate_prometheus_config(prometheus_jobs, config_file_path):
garciadeblas09eaa922022-09-07 22:57:53 +020074 with open(config_file_path, encoding="utf-8", mode="r") as config_file:
75 config_file_yaml = yaml.safe_load(config_file)
garciadeblasefa4c2b2022-09-07 22:35:53 +020076 if config_file_yaml is None:
77 config_file_yaml = {}
garciadeblas09eaa922022-09-07 22:57:53 +020078 if "scrape_configs" not in config_file_yaml:
garciadeblasefa4c2b2022-09-07 22:35:53 +020079 config_file_yaml["scrape_configs"] = []
garciadeblasefa4c2b2022-09-07 22:35:53 +020080
garciadeblasbef284e2022-11-18 00:55:44 +010081 prometheus_jobs_to_be_added = []
82
garciadeblasefa4c2b2022-09-07 22:35:53 +020083 for prometheus_job in prometheus_jobs:
84 cleaned_up_job = clean_up_job(prometheus_job)
garciadeblasbef284e2022-11-18 00:55:44 +010085 job_to_be_added = True
86 for sc in config_file_yaml["scrape_configs"]:
87 if sc.get("job_name") == cleaned_up_job.get("job_name"):
88 job_to_be_added = False
89 break
90 if job_to_be_added:
91 prometheus_jobs_to_be_added.append(cleaned_up_job)
92
93 for job in prometheus_jobs_to_be_added:
94 config_file_yaml["scrape_configs"].append(job)
garciadeblasefa4c2b2022-09-07 22:35:53 +020095
bravofc973b572020-10-21 16:58:50 -030096 return config_file_yaml
97
bravofc973b572020-10-21 16:58:50 -030098
aguilardf91f9c32023-04-13 16:57:57 +000099def generate_prometheus_alerts(prometheus_alerts, config_file_path):
100 with open(config_file_path, encoding="utf-8", mode="r") as config_file:
101 config_file_yaml = yaml.safe_load(config_file)
102 if config_file_yaml is None:
103 config_file_yaml = {}
104 if "groups" not in config_file_yaml:
105 config_file_yaml["groups"] = []
106
107 timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
108 group = {
109 "name": f"_osm_alert_rules_{timestamp}_",
110 "rules": [],
111 }
112 for alert in prometheus_alerts:
113 if "prometheus_config" in alert:
114 group["rules"].append(alert["prometheus_config"])
115
116 if group["rules"]:
117 config_file_yaml["groups"].append(group)
118
119 return config_file_yaml
120
121
garciadeblasefa4c2b2022-09-07 22:35:53 +0200122async def reload_prometheus_config(prom_url):
123 async with aiohttp.ClientSession() as session:
124 async with session.post(prom_url + "/-/reload") as resp:
125 if resp.status > 204:
126 print(f"Error while updating prometheus config: {resp.text()}")
127 return False
128 await asyncio.sleep(5)
129 return True
bravofc973b572020-10-21 16:58:50 -0300130
bravofc973b572020-10-21 16:58:50 -0300131
132def check_configuration_equal(a_config, b_config):
garciadeblasefa4c2b2022-09-07 22:35:53 +0200133 if a_config is None and b_config is None:
134 return True
135 if a_config is None or b_config is None:
136 return False
137 if "scrape_configs" not in a_config and "scrape_configs" not in b_config:
138 return True
139 if "scrape_configs" not in a_config or "scrape_configs" not in b_config:
140 return False
141 a_jobs = [j["job_name"] for j in a_config["scrape_configs"]]
142 b_jobs = [j["job_name"] for j in b_config["scrape_configs"]]
bravofc973b572020-10-21 16:58:50 -0300143
garciadeblasefa4c2b2022-09-07 22:35:53 +0200144 return a_jobs == b_jobs
bravofc973b572020-10-21 16:58:50 -0300145
garciadeblasefa4c2b2022-09-07 22:35:53 +0200146
147async def validate_configuration(prom_url, new_config):
148 async with aiohttp.ClientSession() as session:
149 # Gets the configuration from prometheus
150 # and compares with the inserted one
151 # If prometheus does not admit this configuration,
152 # the old one will remain
153 async with session.get(prom_url + "/api/v1/status/config") as resp:
154 if resp.status > 204:
155 print(f"Error while updating prometheus config: {resp.text()}")
156 return False
157 current_config = await resp.json()
158 return check_configuration_equal(
159 yaml.safe_load(current_config["data"]["yaml"]), new_config
160 )
161
bravofc973b572020-10-21 16:58:50 -0300162
163async def main_task(client):
garciadeblasefa4c2b2022-09-07 22:35:53 +0200164 stored_jobs = get_jobs(client)
aguilardf91f9c32023-04-13 16:57:57 +0000165 print(f"Jobs detected: {len(stored_jobs):d}")
garciadeblasefa4c2b2022-09-07 22:35:53 +0200166 generated_prometheus_config = generate_prometheus_config(
garciadeblas09eaa922022-09-07 22:57:53 +0200167 stored_jobs, prometheus_base_config_file
garciadeblasefa4c2b2022-09-07 22:35:53 +0200168 )
169 print(f"Writing new config file to {prometheus_config_file}")
170 config_file = open(prometheus_config_file, "w")
171 config_file.truncate(0)
garciadeblas7688da42022-11-17 17:44:23 +0100172 print(yaml.safe_dump(generated_prometheus_config))
garciadeblas09eaa922022-09-07 22:57:53 +0200173 config_file.write(yaml.safe_dump(generated_prometheus_config))
garciadeblasefa4c2b2022-09-07 22:35:53 +0200174 config_file.close()
aguilardf91f9c32023-04-13 16:57:57 +0000175
176 if os.path.isfile(prometheus_base_alerts_file):
177 stored_alerts = get_alerts(client)
178 print(f"Alerts read: {len(stored_alerts):d}")
179 generated_prometheus_alerts = generate_prometheus_alerts(
180 stored_alerts, prometheus_base_alerts_file
181 )
182 print(f"Writing new alerts file to {prometheus_alerts_file}")
183 config_file = open(prometheus_alerts_file, "w")
184 config_file.truncate(0)
185 print(yaml.safe_dump(generated_prometheus_alerts))
186 config_file.write(yaml.safe_dump(generated_prometheus_alerts))
187 config_file.close()
188
garciadeblasefa4c2b2022-09-07 22:35:53 +0200189 print("New config written, updating prometheus")
190 update_resp = await reload_prometheus_config(prometheus_url)
garciadeblas84fe31f2022-11-15 14:12:48 +0100191 is_valid = await validate_configuration(prometheus_url, generated_prometheus_config)
garciadeblasefa4c2b2022-09-07 22:35:53 +0200192 if update_resp and is_valid:
193 print("Prometheus config update successful")
194 save_successful_jobs(client, stored_jobs)
195 else:
196 print(
197 "Error while updating prometheus config: "
198 "current config doesn't match with updated values"
199 )
200
bravofc973b572020-10-21 16:58:50 -0300201
202async def main():
garciadeblasefa4c2b2022-09-07 22:35:53 +0200203 client = pymongo.MongoClient(mongodb_url)
garciadeblas84fe31f2022-11-15 14:12:48 +0100204 print("Created MongoClient to connect to MongoDB!")
bravofc973b572020-10-21 16:58:50 -0300205
garciadeblas84fe31f2022-11-15 14:12:48 +0100206 # Initial loop. First refresh of prometheus config file
207 first_refresh_completed = False
208 tries = 1
aguilardf91f9c32023-04-13 16:57:57 +0000209 while tries <= 3 and first_refresh_completed == False:
garciadeblas84fe31f2022-11-15 14:12:48 +0100210 try:
aguilardf91f9c32023-04-13 16:57:57 +0000211 print("Generating prometheus config files")
garciadeblas84fe31f2022-11-15 14:12:48 +0100212 await main_task(client)
213 first_refresh_completed = True
214 except Exception as error:
215 print(f"Error in configuration attempt! Number of tries: {tries}/3")
216 print(error)
217 time.sleep(5)
218 tries += 1
219 if not first_refresh_completed:
220 print("Not possible to refresh prometheus config file for first time")
221 return
garciadeblasefa4c2b2022-09-07 22:35:53 +0200222
garciadeblas84fe31f2022-11-15 14:12:48 +0100223 # Main loop
garciadeblasefa4c2b2022-09-07 22:35:53 +0200224 while True:
225 try:
226 # Needs mongodb in replica mode as this feature relies in OpLog
aguilardf91f9c32023-04-13 16:57:57 +0000227 change_stream = client[target_database].watch(
garciadeblasefa4c2b2022-09-07 22:35:53 +0200228 [
229 {
230 "$match": {
aguilardf91f9c32023-04-13 16:57:57 +0000231 "operationType": {"$in": ["insert", "delete"]},
232 "ns.coll": { "$in": ["prometheus_jobs", "alerts"]},
garciadeblasefa4c2b2022-09-07 22:35:53 +0200233 }
234 }
235 ]
236 )
237
238 # Single thread, no race conditions and ops are queued up in order
aguilardf91f9c32023-04-13 16:57:57 +0000239 print("Listening to changes in prometheus jobs and alerts collections")
garciadeblasefa4c2b2022-09-07 22:35:53 +0200240 for change in change_stream:
aguilardf91f9c32023-04-13 16:57:57 +0000241 print("Changes detected, updating prometheus config")
garciadeblasefa4c2b2022-09-07 22:35:53 +0200242 await main_task(client)
243 print()
244 except Exception as error:
245 print(error)
246 print(
247 "Detected failure while listening to prometheus jobs collection, "
248 "retrying..."
249 )
250 time.sleep(5)
251
bravofc973b572020-10-21 16:58:50 -0300252
253asyncio.run(main())