From: bravof Date: Tue, 23 Nov 2021 20:21:58 +0000 (-0300) Subject: bugfix: prometheus config loop to avoid crashes during jobs collection changes listening X-Git-Tag: v11.0.0rc2~2 X-Git-Url: https://osm.etsi.org/gitweb/?a=commitdiff_plain;h=refs%2Fchanges%2F18%2F11418%2F1;p=osm%2Fdevops.git bugfix: prometheus config loop to avoid crashes during jobs collection changes listening Change-Id: Ieb4e4caec3fe7f89f6bf0b4fc400ff0db88f3c4c Signed-off-by: bravof Signed-off-by: garciadeblas --- diff --git a/docker/Prometheus/src/app.py b/docker/Prometheus/src/app.py index 78e70152..fc1e2bb1 100755 --- a/docker/Prometheus/src/app.py +++ b/docker/Prometheus/src/app.py @@ -29,6 +29,7 @@ import aiohttp import asyncio import copy import json +import time from bson.json_util import dumps from bson import ObjectId @@ -138,21 +139,32 @@ async def main(): client = pymongo.MongoClient(mongodb_url) print('Connected to MongoDB!') - print('Refreshing prometheus config file for first time') - await main_task(client) - - #Needs mongodb in replica mode as this feature relies in OpLog - change_stream = client[target_database].prometheus_jobs.watch([{ - '$match': { - #If you want to modify a particular job, delete and insert it again - 'operationType': { '$in': ['insert', 'delete'] } - } - }]) - - #Single thread, no race conditions and ops are queued up in order - for change in change_stream: - print("Change detected, updating prometheus config") + try: + print('Refreshing prometheus config file for first time') await main_task(client) - print() + except Exception as error: + print("Error in first configuration attempt!") + print(error) + + while(True): + try: + #Needs mongodb in replica mode as this feature relies in OpLog + change_stream = client[target_database].prometheus_jobs.watch([{ + '$match': { + #If you want to modify a particular job, delete and insert it again + 'operationType': { '$in': ['insert', 'delete'] } + } + }]) + + #Single thread, no race conditions and ops are queued up in order + print("Listening to changes in prometheus jobs collection") + for change in change_stream: + print("Change detected, updating prometheus config") + await main_task(client) + print() + except Exception as error: + print(error) + print("Detected failure while listening to prometheus jobs collection, retrying...") + time.sleep(5) asyncio.run(main())