From 9af7d42ffc9febc17a1f232b11e260832d0ec9e3 Mon Sep 17 00:00:00 2001 From: bravof Date: Tue, 23 Nov 2021 17:21:58 -0300 Subject: [PATCH] bugfix: prometheus config loop to avoid crashes during jobs collection changes listening Change-Id: Ieb4e4caec3fe7f89f6bf0b4fc400ff0db88f3c4c Signed-off-by: bravof --- docker/Prometheus/src/app.py | 42 +++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/docker/Prometheus/src/app.py b/docker/Prometheus/src/app.py index 78e70152..fc1e2bb1 100755 --- a/docker/Prometheus/src/app.py +++ b/docker/Prometheus/src/app.py @@ -29,6 +29,7 @@ import aiohttp import asyncio import copy import json +import time from bson.json_util import dumps from bson import ObjectId @@ -138,21 +139,32 @@ async def main(): client = pymongo.MongoClient(mongodb_url) print('Connected to MongoDB!') - print('Refreshing prometheus config file for first time') - await main_task(client) - - #Needs mongodb in replica mode as this feature relies in OpLog - change_stream = client[target_database].prometheus_jobs.watch([{ - '$match': { - #If you want to modify a particular job, delete and insert it again - 'operationType': { '$in': ['insert', 'delete'] } - } - }]) - - #Single thread, no race conditions and ops are queued up in order - for change in change_stream: - print("Change detected, updating prometheus config") + try: + print('Refreshing prometheus config file for first time') await main_task(client) - print() + except Exception as error: + print("Error in first configuration attempt!") + print(error) + + while(True): + try: + #Needs mongodb in replica mode as this feature relies in OpLog + change_stream = client[target_database].prometheus_jobs.watch([{ + '$match': { + #If you want to modify a particular job, delete and insert it again + 'operationType': { '$in': ['insert', 'delete'] } + } + }]) + + #Single thread, no race conditions and ops are queued up in order + print("Listening to changes in prometheus jobs collection") + for change in change_stream: + print("Change detected, updating prometheus config") + await main_task(client) + print() + except Exception as error: + print(error) + print("Detected failure while listening to prometheus jobs collection, retrying...") + time.sleep(5) asyncio.run(main()) -- 2.17.1