From c5f10c39b0081d27a1ac1a06bab967da0d633c25 Mon Sep 17 00:00:00 2001
From: bravof <fbravo@whitestack.com>
Date: Tue, 23 Nov 2021 17:21:58 -0300
Subject: [PATCH] bugfix: prometheus config loop to avoid crashes during jobs
 collection changes listening

Change-Id: Ieb4e4caec3fe7f89f6bf0b4fc400ff0db88f3c4c
Signed-off-by: bravof <fbravo@whitestack.com>
Signed-off-by: garciadeblas <gerardo.garciadeblas@telefonica.com>
---
 docker/Prometheus/src/app.py | 42 +++++++++++++++++++++++-------------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/docker/Prometheus/src/app.py b/docker/Prometheus/src/app.py
index 78e70152..fc1e2bb1 100755
--- a/docker/Prometheus/src/app.py
+++ b/docker/Prometheus/src/app.py
@@ -29,6 +29,7 @@ import aiohttp
 import asyncio
 import copy
 import json
+import time
 from bson.json_util import dumps
 from bson import ObjectId
 
@@ -138,21 +139,32 @@ async def main():
   client = pymongo.MongoClient(mongodb_url)
   print('Connected to MongoDB!')
 
-  print('Refreshing prometheus config file for first time')
-  await main_task(client)
-
-  #Needs mongodb in replica mode as this feature relies in OpLog
-  change_stream = client[target_database].prometheus_jobs.watch([{
-    '$match': {
-      #If you want to modify a particular job, delete and insert it again
-      'operationType': { '$in': ['insert', 'delete'] }
-    }
-  }])
-
-  #Single thread, no race conditions and ops are queued up in order
-  for change in change_stream:
-    print("Change detected, updating prometheus config")
+  try:
+    print('Refreshing prometheus config file for first time')
     await main_task(client)
-    print()
+  except Exception as error:
+    print("Error in first configuration attempt!")
+    print(error)
+
+  while(True):
+    try:
+      #Needs mongodb in replica mode as this feature relies in OpLog
+      change_stream = client[target_database].prometheus_jobs.watch([{
+        '$match': {
+          #If you want to modify a particular job, delete and insert it again
+          'operationType': { '$in': ['insert', 'delete'] }
+        }
+      }])
+
+      #Single thread, no race conditions and ops are queued up in order
+      print("Listening to changes in prometheus jobs collection")
+      for change in change_stream:
+        print("Change detected, updating prometheus config")
+        await main_task(client)
+        print()
+    except Exception as error:
+      print(error)
+    print("Detected failure while listening to prometheus jobs collection, retrying...")
+    time.sleep(5) 
 
 asyncio.run(main())
-- 
GitLab