X-Git-Url: https://osm.etsi.org/gitweb/?a=blobdiff_plain;f=docker%2FPrometheus%2Fsrc%2Fapp.py;fp=docker%2FPrometheus%2Fsrc%2Fapp.py;h=36b7b52e2f6217afd0c518dfb9357072272b2ead;hb=f91f9c32f0d3cd4011a288235d0e4e1ba44b85d5;hp=b06f448684eae8a6277f83d5f01d50873a75797c;hpb=0e49b4aa3a8853ea5b9702e0bc58134d8a4bcd88;p=osm%2Fdevops.git diff --git a/docker/Prometheus/src/app.py b/docker/Prometheus/src/app.py index b06f4486..36b7b52e 100755 --- a/docker/Prometheus/src/app.py +++ b/docker/Prometheus/src/app.py @@ -22,22 +22,26 @@ # contact: fbravo@whitestack.com ## -import os -import pymongo -import yaml import aiohttp import asyncio +from bson.json_util import dumps +from bson import ObjectId import copy +from datetime import datetime import json +import os +import pymongo import time -from bson.json_util import dumps -from bson import ObjectId +import yaml # Env variables mongodb_url = os.environ["MONGODB_URL"] target_database = os.environ["TARGET_DATABASE"] prometheus_config_file = os.environ["PROMETHEUS_CONFIG_FILE"] prometheus_base_config_file = os.environ["PROMETHEUS_BASE_CONFIG_FILE"] +prometheus_alerts_file = os.environ["PROMETHEUS_ALERTS_FILE"] +prometheus_base_alerts_file = os.environ["PROMETHEUS_BASE_ALERTS_FILE"] + prometheus_url = os.environ["PROMETHEUS_URL"] @@ -45,6 +49,10 @@ def get_jobs(client): return json.loads(dumps(client[target_database].prometheus_jobs.find({}))) +def get_alerts(client): + return json.loads(dumps(client[target_database].alerts.find({"prometheus_config": {"$exists": True}}))) + + def save_successful_jobs(client, jobs): for job in jobs: client[target_database].prometheus_jobs.update_one( @@ -88,6 +96,29 @@ def generate_prometheus_config(prometheus_jobs, config_file_path): return config_file_yaml +def generate_prometheus_alerts(prometheus_alerts, config_file_path): + with open(config_file_path, encoding="utf-8", mode="r") as config_file: + config_file_yaml = yaml.safe_load(config_file) + if config_file_yaml is None: + config_file_yaml = {} + if "groups" not in config_file_yaml: + config_file_yaml["groups"] = [] + + timestamp = datetime.now().strftime("%Y%m%d%H%M%S") + group = { + "name": f"_osm_alert_rules_{timestamp}_", + "rules": [], + } + for alert in prometheus_alerts: + if "prometheus_config" in alert: + group["rules"].append(alert["prometheus_config"]) + + if group["rules"]: + config_file_yaml["groups"].append(group) + + return config_file_yaml + + async def reload_prometheus_config(prom_url): async with aiohttp.ClientSession() as session: async with session.post(prom_url + "/-/reload") as resp: @@ -131,7 +162,7 @@ async def validate_configuration(prom_url, new_config): async def main_task(client): stored_jobs = get_jobs(client) - print(f"Jobs detected : {len(stored_jobs):d}") + print(f"Jobs detected: {len(stored_jobs):d}") generated_prometheus_config = generate_prometheus_config( stored_jobs, prometheus_base_config_file ) @@ -141,6 +172,20 @@ async def main_task(client): print(yaml.safe_dump(generated_prometheus_config)) config_file.write(yaml.safe_dump(generated_prometheus_config)) config_file.close() + + if os.path.isfile(prometheus_base_alerts_file): + stored_alerts = get_alerts(client) + print(f"Alerts read: {len(stored_alerts):d}") + generated_prometheus_alerts = generate_prometheus_alerts( + stored_alerts, prometheus_base_alerts_file + ) + print(f"Writing new alerts file to {prometheus_alerts_file}") + config_file = open(prometheus_alerts_file, "w") + config_file.truncate(0) + print(yaml.safe_dump(generated_prometheus_alerts)) + config_file.write(yaml.safe_dump(generated_prometheus_alerts)) + config_file.close() + print("New config written, updating prometheus") update_resp = await reload_prometheus_config(prometheus_url) is_valid = await validate_configuration(prometheus_url, generated_prometheus_config) @@ -161,9 +206,9 @@ async def main(): # Initial loop. First refresh of prometheus config file first_refresh_completed = False tries = 1 - while tries <= 3: + while tries <= 3 and first_refresh_completed == False: try: - print("Refreshing prometheus config file for first time") + print("Generating prometheus config files") await main_task(client) first_refresh_completed = True except Exception as error: @@ -179,23 +224,21 @@ async def main(): while True: try: # Needs mongodb in replica mode as this feature relies in OpLog - change_stream = client[target_database].prometheus_jobs.watch( + change_stream = client[target_database].watch( [ { "$match": { - # If you want to modify a particular job, - # delete and insert it again - "operationType": {"$in": ["insert", "delete"]} + "operationType": {"$in": ["insert", "delete"]}, + "ns.coll": { "$in": ["prometheus_jobs", "alerts"]}, } } ] ) # Single thread, no race conditions and ops are queued up in order - print("Listening to changes in prometheus jobs collection") + print("Listening to changes in prometheus jobs and alerts collections") for change in change_stream: - print("Change detected, updating prometheus config") - print(f"{change}") + print("Changes detected, updating prometheus config") await main_task(client) print() except Exception as error: