# contact: fbravo@whitestack.com
##
-import os
-import pymongo
-import yaml
import aiohttp
import asyncio
+from bson.json_util import dumps
+from bson import ObjectId
import copy
+from datetime import datetime
import json
+import os
+import pymongo
import time
-from bson.json_util import dumps
-from bson import ObjectId
+import yaml
# Env variables
mongodb_url = os.environ["MONGODB_URL"]
target_database = os.environ["TARGET_DATABASE"]
prometheus_config_file = os.environ["PROMETHEUS_CONFIG_FILE"]
+prometheus_base_config_file = os.environ["PROMETHEUS_BASE_CONFIG_FILE"]
+prometheus_alerts_file = os.environ["PROMETHEUS_ALERTS_FILE"]
+prometheus_base_alerts_file = os.environ["PROMETHEUS_BASE_ALERTS_FILE"]
+
prometheus_url = os.environ["PROMETHEUS_URL"]
return json.loads(dumps(client[target_database].prometheus_jobs.find({})))
+def get_alerts(client):
+ return json.loads(dumps(client[target_database].alerts.find({"prometheus_config": {"$exists": True}})))
+
+
def save_successful_jobs(client, jobs):
for job in jobs:
client[target_database].prometheus_jobs.update_one(
- {"_id": ObjectId(job["_id"]["$oid"])},
- {"$set": {"is_active": True}}
+ {"_id": ObjectId(job["_id"]["$oid"])}, {"$set": {"is_active": True}}
)
def generate_prometheus_config(prometheus_jobs, config_file_path):
- config_file = open(config_file_path, encoding="utf-8", mode="r")
- config_file_contents = config_file.read()
- config_file.close()
-
- config_file_yaml = yaml.load(config_file_contents, yaml.FullLoader)
+ with open(config_file_path, encoding="utf-8", mode="r") as config_file:
+ config_file_yaml = yaml.safe_load(config_file)
if config_file_yaml is None:
config_file_yaml = {}
-
- if len(prometheus_jobs) == 0:
+ if "scrape_configs" not in config_file_yaml:
config_file_yaml["scrape_configs"] = []
- return config_file_yaml
- config_file_yaml["scrape_configs"] = []
+ prometheus_jobs_to_be_added = []
for prometheus_job in prometheus_jobs:
cleaned_up_job = clean_up_job(prometheus_job)
- config_file_yaml["scrape_configs"].append(cleaned_up_job)
+ job_to_be_added = True
+ for sc in config_file_yaml["scrape_configs"]:
+ if sc.get("job_name") == cleaned_up_job.get("job_name"):
+ job_to_be_added = False
+ break
+ if job_to_be_added:
+ prometheus_jobs_to_be_added.append(cleaned_up_job)
+
+ for job in prometheus_jobs_to_be_added:
+ config_file_yaml["scrape_configs"].append(job)
+
+ return config_file_yaml
+
+
+def generate_prometheus_alerts(prometheus_alerts, config_file_path):
+ with open(config_file_path, encoding="utf-8", mode="r") as config_file:
+ config_file_yaml = yaml.safe_load(config_file)
+ if config_file_yaml is None:
+ config_file_yaml = {}
+ if "groups" not in config_file_yaml:
+ config_file_yaml["groups"] = []
+
+ timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+ group = {
+ "name": f"_osm_alert_rules_{timestamp}_",
+ "rules": [],
+ }
+ for alert in prometheus_alerts:
+ if "prometheus_config" in alert:
+ group["rules"].append(alert["prometheus_config"])
+
+ if group["rules"]:
+ config_file_yaml["groups"].append(group)
return config_file_yaml
async def main_task(client):
stored_jobs = get_jobs(client)
- print(f"Jobs detected : {len(stored_jobs):d}")
+ print(f"Jobs detected: {len(stored_jobs):d}")
generated_prometheus_config = generate_prometheus_config(
- stored_jobs, prometheus_config_file
+ stored_jobs, prometheus_base_config_file
)
print(f"Writing new config file to {prometheus_config_file}")
config_file = open(prometheus_config_file, "w")
config_file.truncate(0)
- config_file.write(yaml.dump(generated_prometheus_config))
+ print(yaml.safe_dump(generated_prometheus_config))
+ config_file.write(yaml.safe_dump(generated_prometheus_config))
config_file.close()
+
+ if os.path.isfile(prometheus_base_alerts_file):
+ stored_alerts = get_alerts(client)
+ print(f"Alerts read: {len(stored_alerts):d}")
+ generated_prometheus_alerts = generate_prometheus_alerts(
+ stored_alerts, prometheus_base_alerts_file
+ )
+ print(f"Writing new alerts file to {prometheus_alerts_file}")
+ config_file = open(prometheus_alerts_file, "w")
+ config_file.truncate(0)
+ print(yaml.safe_dump(generated_prometheus_alerts))
+ config_file.write(yaml.safe_dump(generated_prometheus_alerts))
+ config_file.close()
+
print("New config written, updating prometheus")
update_resp = await reload_prometheus_config(prometheus_url)
- is_valid = await validate_configuration(
- prometheus_url, generated_prometheus_config
- )
+ is_valid = await validate_configuration(prometheus_url, generated_prometheus_config)
if update_resp and is_valid:
print("Prometheus config update successful")
save_successful_jobs(client, stored_jobs)
async def main():
client = pymongo.MongoClient(mongodb_url)
- print("Connected to MongoDB!")
+ print("Created MongoClient to connect to MongoDB!")
- try:
- print("Refreshing prometheus config file for first time")
- await main_task(client)
- except Exception as error:
- print("Error in first configuration attempt!")
- print(error)
+ # Initial loop. First refresh of prometheus config file
+ first_refresh_completed = False
+ tries = 1
+ while tries <= 3 and first_refresh_completed == False:
+ try:
+ print("Generating prometheus config files")
+ await main_task(client)
+ first_refresh_completed = True
+ except Exception as error:
+ print(f"Error in configuration attempt! Number of tries: {tries}/3")
+ print(error)
+ time.sleep(5)
+ tries += 1
+ if not first_refresh_completed:
+ print("Not possible to refresh prometheus config file for first time")
+ return
+ # Main loop
while True:
try:
# Needs mongodb in replica mode as this feature relies in OpLog
- change_stream = client[target_database].prometheus_jobs.watch(
+ change_stream = client[target_database].watch(
[
{
"$match": {
- # If you want to modify a particular job,
- # delete and insert it again
- "operationType": {"$in": ["insert", "delete"]}
+ "operationType": {"$in": ["insert", "delete"]},
+ "ns.coll": { "$in": ["prometheus_jobs", "alerts"]},
}
}
]
)
# Single thread, no race conditions and ops are queued up in order
- print("Listening to changes in prometheus jobs collection")
+ print("Listening to changes in prometheus jobs and alerts collections")
for change in change_stream:
- print("Change detected, updating prometheus config")
+ print("Changes detected, updating prometheus config")
await main_task(client)
print()
except Exception as error: