From 89f8290b70918e151e6b6653c635ea6a05a22522 Mon Sep 17 00:00:00 2001
From: tierno <alfonso.tiernosepulveda@telefonica.com>
Date: Fri, 3 Jul 2020 14:52:28 +0000
Subject: [PATCH] fixing prometheus metric exporter issues

Use the same instance of ns at netslice. Avoid two N2VC connections

Change-Id: I346c08c111e5ffc7dbc1768851dc069d2cda10d1
Signed-off-by: tierno <alfonso.tiernosepulveda@telefonica.com>
---
 osm_lcm/lcm.cfg                  |   8 ++
 osm_lcm/lcm.py                   |  17 ++--
 osm_lcm/lcm_helm_conn.py         |  12 ++-
 osm_lcm/netslice.py              |   6 +-
 osm_lcm/ns.py                    |  82 +++++++++++++++----
 osm_lcm/prometheus.py            | 119 +++++++++++++++++++++-------
 osm_lcm/tests/test_prometheus.py | 130 +++++++++++++++++++++++++++++++
 tox.ini                          |   2 +-
 8 files changed, 323 insertions(+), 53 deletions(-)
 create mode 100644 osm_lcm/tests/test_prometheus.py

diff --git a/osm_lcm/lcm.cfg b/osm_lcm/lcm.cfg
index 8f141f1d..4aadf22f 100644
--- a/osm_lcm/lcm.cfg
+++ b/osm_lcm/lcm.cfg
@@ -84,3 +84,11 @@ message:
     # loglevel: DEBUG
     # logfile:  /var/log/osm/lcm-message.log
     group_id: lcm-server
+
+prometheus:
+    driver:   prometheus
+    # local file to store the configuration
+    path:     /etc/prometheus
+    uri:     http://prometheus:9090/
+    # loglevel: DEBUG
+    # logfile:  /var/log/osm/lcm-message.log
diff --git a/osm_lcm/lcm.py b/osm_lcm/lcm.py
index 5d27277f..b9146044 100644
--- a/osm_lcm/lcm.py
+++ b/osm_lcm/lcm.py
@@ -29,9 +29,7 @@ import logging.handlers
 import getopt
 import sys
 
-from osm_lcm import ns
-from osm_lcm import vim_sdn
-from osm_lcm import netslice
+from osm_lcm import ns, prometheus, vim_sdn, netslice
 from osm_lcm.ng_ro import NgRoException, NgRoClient
 from osm_lcm.ROclient import ROClient, ROClientException
 
@@ -192,8 +190,13 @@ class Lcm:
         # contains created tasks/futures to be able to cancel
         self.lcm_tasks = TaskRegistry(self.worker_id, self.db, self.logger)
 
-        self.ns = ns.NsLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.config, self.loop)
-        self.netslice = netslice.NetsliceLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.config, self.loop)
+        if self.config.get("prometheus"):
+            self.prometheus = prometheus.Prometheus(self.config["prometheus"], self.worker_id, self.db, self.loop)
+        else:
+            self.prometheus = None
+        self.ns = ns.NsLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.config, self.loop, self.prometheus)
+        self.netslice = netslice.NetsliceLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.config, self.loop,
+                                             self.ns)
         self.vim = vim_sdn.VimLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.config, self.loop)
         self.wim = vim_sdn.WimLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.config, self.loop)
         self.sdn = vim_sdn.SdnLcm(self.db, self.msg, self.fs, self.lcm_tasks, self.config, self.loop)
@@ -506,6 +509,10 @@ class Lcm:
         # check RO version
         self.loop.run_until_complete(self.check_RO_version())
 
+        # configure Prometheus
+        if self.prometheus:
+            self.loop.run_until_complete(self.prometheus.start())
+
         self.loop.run_until_complete(asyncio.gather(
             self.kafka_read(),
             self.kafka_ping()
diff --git a/osm_lcm/lcm_helm_conn.py b/osm_lcm/lcm_helm_conn.py
index 146da9d6..c18405bf 100644
--- a/osm_lcm/lcm_helm_conn.py
+++ b/osm_lcm/lcm_helm_conn.py
@@ -123,6 +123,7 @@ class LCMHelmConn(N2VCConnector):
                                            reuse_ee_id: str = None,
                                            progress_timeout: float = None,
                                            total_timeout: float = None,
+                                           config: dict = None,
                                            artifact_path: str = None,
                                            vca_type: str = None) -> (str, dict):
         """
@@ -137,8 +138,9 @@ class LCMHelmConn(N2VCConnector):
         :param str reuse_ee_id: ee id from an older execution. TODO - right now this params is not used
         :param float progress_timeout:
         :param float total_timeout:
-        :param str artifact_path  path of package content
-        :param str vca_type  Type of vca, not used as assumed of type helm
+        :param dict config:  General variables to instantiate KDU
+        :param str artifact_path:  path of package content
+        :param str vca_type:  Type of vca, not used as assumed of type helm
         :returns str, dict: id of the new execution environment including namespace.helm_id
         and credentials object set to None as all credentials should be osm kubernetes .kubeconfig
         """
@@ -177,10 +179,16 @@ class LCMHelmConn(N2VCConnector):
             # Call helm conn install
             # Obtain system cluster id from database
             system_cluster_uuid = self._get_system_cluster_id()
+            # Add parameter osm if exist to global
+            if config and config.get("osm"):
+                if not config.get("global"):
+                    config["global"] = {}
+                config["global"]["osm"] = config.get("osm")
 
             self.log.debug("install helm chart: {}".format(full_path))
             helm_id = await self._k8sclusterhelm.install(system_cluster_uuid, kdu_model=full_path,
                                                          namespace=self._KUBECTL_OSM_NAMESPACE,
+                                                         params=config,
                                                          db_dict=db_dict,
                                                          timeout=progress_timeout)
 
diff --git a/osm_lcm/netslice.py b/osm_lcm/netslice.py
index 4d731afe..6b0a6caa 100644
--- a/osm_lcm/netslice.py
+++ b/osm_lcm/netslice.py
@@ -17,7 +17,7 @@ import asyncio
 import logging
 import logging.handlers
 import traceback
-from osm_lcm import ROclient, ns
+from osm_lcm import ROclient
 from osm_lcm.lcm_utils import LcmException, LcmBase, populate_dict, get_iterable, deep_get
 from osm_common.dbbase import DbException
 from time import time
@@ -31,7 +31,7 @@ class NetsliceLcm(LcmBase):
 
     timeout_nsi_deploy = 2 * 3600  # default global timeout for deployment a nsi
 
-    def __init__(self, db, msg, fs, lcm_tasks, config, loop):
+    def __init__(self, db, msg, fs, lcm_tasks, config, loop, ns):
         """
         Init, Connect to database, filesystem storage, and messaging
         :param config: two level dictionary with configuration. Top level should contain 'database', 'storage',
@@ -41,7 +41,7 @@ class NetsliceLcm(LcmBase):
         self.logger = logging.getLogger('lcm.netslice')
         self.loop = loop
         self.lcm_tasks = lcm_tasks
-        self.ns = ns.NsLcm(db, msg, fs, lcm_tasks, config, loop)
+        self.ns = ns
         self.ro_config = config["ro_config"]
         self.timeout = config["timeout"]
 
diff --git a/osm_lcm/ns.py b/osm_lcm/ns.py
index 5141f6d0..62f010f1 100644
--- a/osm_lcm/ns.py
+++ b/osm_lcm/ns.py
@@ -43,6 +43,7 @@ from http import HTTPStatus
 from time import time
 from uuid import uuid4
 from functools import partial
+from random import randint
 
 __author__ = "Alfonso Tierno <alfonso.tiernosepulveda@telefonica.com>"
 
@@ -51,7 +52,8 @@ class N2VCJujuConnectorLCM(N2VCJujuConnector):
 
     async def create_execution_environment(self, namespace: str, db_dict: dict, reuse_ee_id: str = None,
                                            progress_timeout: float = None, total_timeout: float = None,
-                                           artifact_path: str = None, vca_type: str = None) -> (str, dict):
+                                           config: dict = None, artifact_path: str = None,
+                                           vca_type: str = None) -> (str, dict):
         # admit two new parameters, artifact_path and vca_type
         if vca_type == "k8s_proxy_charm":
             ee_id = await self.n2vc.install_k8s_proxy_charm(
@@ -88,7 +90,7 @@ class NsLcm(LcmBase):
     SUBOPERATION_STATUS_SKIP = -3
     task_name_deploy_vca = "Deploying VCA"
 
-    def __init__(self, db, msg, fs, lcm_tasks, config, loop):
+    def __init__(self, db, msg, fs, lcm_tasks, config, loop, prometheus=None):
         """
         Init, Connect to database, filesystem storage, and messaging
         :param config: two level dictionary with configuration. Top level should contain 'database', 'storage',
@@ -163,6 +165,8 @@ class NsLcm(LcmBase):
             "helm": self.conn_helm_ee
         }
 
+        self.prometheus = prometheus
+
         # create RO client
         if self.ng_ro:
             self.RO = NgRoClient(self.loop, **self.ro_config)
@@ -1369,11 +1373,13 @@ class NsLcm(LcmBase):
         raise LcmException("Configuration aborted because dependent charm/s timeout")
 
     async def instantiate_N2VC(self, logging_text, vca_index, nsi_id, db_nsr, db_vnfr, vdu_id, kdu_name, vdu_index,
-                               config_descriptor, deploy_params, base_folder, nslcmop_id, stage, vca_type, vca_name):
+                               config_descriptor, deploy_params, base_folder, nslcmop_id, stage, vca_type, vca_name,
+                               ee_config_descriptor):
         nsr_id = db_nsr["_id"]
         db_update_entry = "_admin.deployed.VCA.{}.".format(vca_index)
         vca_deployed_list = db_nsr["_admin"]["deployed"]["VCA"]
         vca_deployed = db_nsr["_admin"]["deployed"]["VCA"][vca_index]
+        osm_config = {"osm": {"ns_id": db_nsr["_id"]}}
         db_dict = {
             'collection': 'nsrs',
             'filter': {'_id': nsr_id},
@@ -1388,6 +1394,7 @@ class NsLcm(LcmBase):
             vnfr_id = None
             if db_vnfr:
                 vnfr_id = db_vnfr["_id"]
+                osm_config["osm"]["vnf_id"] = vnfr_id
 
             namespace = "{nsi}.{ns}".format(
                 nsi=nsi_id if nsi_id else "",
@@ -1401,10 +1408,12 @@ class NsLcm(LcmBase):
                     namespace += ".{}-{}".format(vdu_id, vdu_index or 0)
                     element_type = 'VDU'
                     element_under_configuration = "{}-{}".format(vdu_id, vdu_index or 0)
+                    osm_config["osm"]["vdu_id"] = vdu_id
                 elif kdu_name:
                     namespace += ".{}".format(kdu_name)
                     element_type = 'KDU'
                     element_under_configuration = kdu_name
+                    osm_config["osm"]["kdu_name"] = kdu_name
 
             # Get artifact path
             artifact_path = "{}/{}/{}/{}".format(
@@ -1436,6 +1445,7 @@ class NsLcm(LcmBase):
                     namespace=namespace,
                     reuse_ee_id=ee_id,
                     db_dict=db_dict,
+                    config=osm_config,
                     artifact_path=artifact_path,
                     vca_type=vca_type)
 
@@ -1637,6 +1647,19 @@ class NsLcm(LcmBase):
 
                 # TODO register in database that primitive is done
 
+            # STEP 7 Configure metrics
+            if vca_type == "helm":
+                prometheus_jobs = await self.add_prometheus_metrics(
+                    ee_id=ee_id,
+                    artifact_path=artifact_path,
+                    ee_config_descriptor=ee_config_descriptor,
+                    vnfr_id=vnfr_id,
+                    nsr_id=nsr_id,
+                    target_ip=rw_mgmt_ip,
+                )
+                if prometheus_jobs:
+                    self.update_db_2("nsrs", nsr_id, {db_update_entry + "prometheus_jobs": prometheus_jobs})
+
             step = "instantiated at VCA"
             self.logger.debug(logging_text + step)
 
@@ -2596,7 +2619,8 @@ class NsLcm(LcmBase):
                     nslcmop_id=nslcmop_id,
                     stage=stage,
                     vca_type=vca_type,
-                    vca_name=vca_name
+                    vca_name=vca_name,
+                    ee_config_descriptor=ee_item
                 )
             )
             self.lcm_tasks.register("ns", nsr_id, nslcmop_id, "instantiate_N2VC-{}".format(vca_index), task_n2vc)
@@ -2896,6 +2920,9 @@ class NsLcm(LcmBase):
                 db_update_entry = "_admin.deployed.VCA.{}.needed_terminate".format(vca_index)
                 self.update_db_2("nsrs", db_nslcmop["nsInstanceId"], {db_update_entry: False})
 
+        if vca_deployed.get("prometheus_jobs") and self.prometheus:
+            await self.prometheus.update(remove_jobs=vca_deployed["prometheus_jobs"])
+
         if destroy_ee:
             await self.vca_map[vca_type].delete_execution_environment(vca_deployed["ee_id"])
 
@@ -3122,7 +3149,7 @@ class NsLcm(LcmBase):
             self.logger.debug(logging_text + stage[0])
             stage[1] = "Looking execution environment that needs terminate."
             self.logger.debug(logging_text + stage[1])
-            self.logger.debug("nsr_deployed: {}".format(nsr_deployed))
+            # self.logger.debug("nsr_deployed: {}".format(nsr_deployed))
             for vca_index, vca in enumerate(get_iterable(nsr_deployed, "VCA")):
                 self.logger.debug("vca_index: {}, vca: {}".format(vca_index, vca))
                 config_descriptor = None
@@ -3143,17 +3170,14 @@ class NsLcm(LcmBase):
                         config_descriptor = kdud.get("kdu-configuration")
                 else:
                     config_descriptor = db_vnfds_from_member_index[vca["member-vnf-index"]].get("vnf-configuration")
-                # For helm we must destroy_ee
                 vca_type = vca.get("type")
                 exec_terminate_primitives = (not operation_params.get("skip_terminate_primitives") and
                                              vca.get("needed_terminate"))
-                self.logger.debug("vca type: {}".format(vca_type))
-                if not vca_type == "helm":
-                    task = asyncio.ensure_future(self.destroy_N2VC(logging_text, db_nslcmop, vca, config_descriptor,
-                                                                   vca_index, False, exec_terminate_primitives))
-                else:
-                    task = asyncio.ensure_future(self.destroy_N2VC(logging_text, db_nslcmop, vca, config_descriptor,
-                                                                   vca_index, True, exec_terminate_primitives))
+                # For helm we must destroy_ee
+                destroy_ee = "True" if vca_type == "helm" else "False"
+                task = asyncio.ensure_future(
+                    self.destroy_N2VC(logging_text, db_nslcmop, vca, config_descriptor, vca_index,
+                                      destroy_ee, exec_terminate_primitives))
                 tasks_dict_info[task] = "Terminating VCA {}".format(vca.get("ee_id"))
 
             # wait for pending tasks of terminate primitives
@@ -4178,3 +4202,35 @@ class NsLcm(LcmBase):
                     self.logger.error(logging_text + "kafka_write notification Exception {}".format(e))
             self.logger.debug(logging_text + "Exit")
             self.lcm_tasks.remove("ns", nsr_id, nslcmop_id, "ns_scale")
+
+    async def add_prometheus_metrics(self, ee_id, artifact_path, ee_config_descriptor, vnfr_id, nsr_id, target_ip):
+        if not self.prometheus:
+            return
+        # look if exist a file called 'prometheus*.j2' and
+        artifact_content = self.fs.dir_ls(artifact_path)
+        job_file = next((f for f in artifact_content if f.startswith("prometheus") and f.endswith(".j2")), None)
+        if not job_file:
+            return
+        with self.fs.file_open((artifact_path, job_file), "r") as f:
+            job_data = f.read()
+
+        # TODO get_service
+        _, _, service = ee_id.partition(".")   # remove prefix   "namespace."
+        host_name = "{}-{}".format(service, ee_config_descriptor["metric-service"])
+        host_port = "80"
+        vnfr_id = vnfr_id.replace("-", "")
+        variables = {
+            "JOB_NAME": vnfr_id,
+            "TARGET_IP": target_ip,
+            "EXPORTER_POD_IP": host_name,
+            "EXPORTER_POD_PORT": host_port,
+        }
+        job_list = self.prometheus.parse_job(job_data, variables)
+        # ensure job_name is using the vnfr_id. Adding the metadata nsr_id
+        for job in job_list:
+            if not isinstance(job.get("job_name"), str) or vnfr_id not in job["job_name"]:
+                job["job_name"] = vnfr_id + "_" + str(randint(1, 10000))
+            job["nsr_id"] = nsr_id
+        job_dict = {jl["job_name"]: jl for jl in job_list}
+        if await self.prometheus.update(job_dict):
+            return list(job_dict.keys())
diff --git a/osm_lcm/prometheus.py b/osm_lcm/prometheus.py
index 397764f2..c1f49b08 100644
--- a/osm_lcm/prometheus.py
+++ b/osm_lcm/prometheus.py
@@ -24,7 +24,7 @@ import yaml
 import os
 from osm_lcm.lcm_utils import LcmException
 from osm_common.dbbase import DbException
-
+from jinja2 import Template, TemplateError, TemplateNotFound, TemplateSyntaxError
 
 __author__ = "Alfonso Tierno <alfonso.tiernosepulveda@telefonica.com>"
 
@@ -37,14 +37,16 @@ initial_prometheus_data = {
         "created": 1593445184,
         "version": "1.0"    # to allow future version updates
     },
-    'scrape_configs': [{'static_configs': [{'targets': ['mon:8000']}], 'job_name': 'mon_exporter'}],
+    'scrape_configs': {   # Dictionary at database. Converted to list before sending to prometheus
+        'mon_exporter': {'static_configs': [{'targets': ['mon:8000']}], 'job_name': 'mon_exporter'},
+    },
     'global': {'evaluation_interval': '15s', 'scrape_interval': '15s'},
     'rule_files': None,
     'alerting': {'alertmanagers': [{'static_configs': [{'targets': None}]}]}
 }
 
 
-class Prometheus():
+class Prometheus:
     """
     Implements a class to update Prometheus
     """
@@ -55,7 +57,7 @@ class Prometheus():
         self.worker_id = worker_id
         self.db = db
         self.loop = loop
-        self.logger = logger or logging.get_legger("lcm.prometheus")
+        self.logger = logger or logging.getLogger("lcm.prometheus")
         self.server = config["uri"]
         self.path = config["path"]
         if not self.path.endswith("/"):
@@ -63,27 +65,47 @@ class Prometheus():
         self.cfg_file = self.path + "prometheus.yml"
         self.cfg_file_backup = self.path + "prometheus.yml-backup"
 
+    @staticmethod
+    def parse_job(job_data: str, variables: dict) -> dict:
+        try:
+            template = Template(job_data)
+            job_parsed = template.render(variables or {})
+            return yaml.safe_load(job_parsed)
+        except (TemplateError, TemplateNotFound, TemplateSyntaxError) as e:
+            # TODO yaml exceptions
+            raise LcmException("Error parsing Jinja2 to prometheus job. job_data={}, variables={}. Error={}".format(
+                job_data, variables, e))
+
     async def start(self):
         for retry in range(4):
             try:
+                # self.logger("Starting prometheus ")
                 # read from database
-                prometheus_data = self.db.get_one("admin", {"_id": "prometheus"}, fail_on_empty=True)
+                prometheus_data = self.db.get_one("admin", {"_id": "prometheus"}, fail_on_empty=False)
                 if not prometheus_data:
                     self.logger.info("Init db.admin.prometheus content")
                     self.db.create("admin", initial_prometheus_data)
                 # send database config file to prometheus. Ignore loading errors, as prometheus may be starting
                 # but at least an initial configuration file is set
                 await self.update()
+                return
             except DbException as e:
                 if retry == 3:
                     raise LcmException("Max retries trying to init prometheus configuration: {}".format(e))
                 await asyncio.sleep(5, loop=self.loop)
 
-    async def update(self, add_jobs=None, remove_jobs=None):
+    async def update(self, add_jobs: dict = None, remove_jobs: list = None) -> bool:
+        """
+
+        :param add_jobs: dictionary with {job_id_1: job_content, job_id_2: job_content}
+        :param remove_jobs: list with jobs to remove [job_id_1, job_id_2]
+        :return: result. If false prometheus denies this configuration. Exception on error
+        """
         for retry in range(4):
             result = True
             if retry:  # first time do not wait
                 await asyncio.sleep(self.PROMETHEUS_LOCKED_TIME / 2, loop=self.loop)
+
             # lock database
             now = time()
             if not self.db.set_one(
@@ -94,60 +116,99 @@ class Prometheus():
                 continue
             # read database
             prometheus_data = self.db.get_one("admin", {"_id": "prometheus"})
+            update_dict = {"_admin.locked_at": 0,
+                           "_admin.locked_by": None}
 
             # Make changes from prometheus_incremental
-            push_list = pull_list = None
+            push_dict = pull_dict = None
             if add_jobs or remove_jobs:
-                update_dict = {"_admin.locked_at": 0,
-                               "_admin.locked_by": None,
-                               "_admin.modified_at": now}
+                log_text_list = []
                 if add_jobs:
-                    push_list = {"scrape_configs.static_configs": add_jobs}
-                    prometheus_data["scrape_configs"]["static_configs"] += add_jobs
+                    log_text_list.append("adding jobs: {}".format(list(add_jobs.keys())))
+                    prometheus_data["scrape_configs"].update(add_jobs)
+                    push_dict = {"scrape_configs." + job_id: job_data for job_id, job_data in add_jobs.items()}
                 elif remove_jobs:
-                    pass    # TODO
-                if not self.send_data(prometheus_data):
-                    push_list = pull_list = None
-                    result = False
+                    log_text_list.append("removing jobs: {}".format(list(remove_jobs)))
+                    for job_id in remove_jobs:
+                        prometheus_data["scrape_configs"].pop(job_id, None)
+                    pull_dict = {"scrape_configs." + job_id: None for job_id in remove_jobs}
+                self.logger.debug(". ".join(log_text_list))
+
+            if not await self.send_data(prometheus_data):
+                push_dict = pull_dict = None
+                result = False
 
             # unblock database
+            if push_dict:
+                update_dict.update(push_dict)
+            if push_dict or pull_dict:
+                update_dict["_admin.modified_at"] = now
             if not self.db.set_one(
                     "admin", {"_id": "prometheus", "_admin.locked_at": now, "_admin.locked_by": self.worker_id},
-                    update_dict=update_dict, pull_list=pull_list, push_list=push_list, fail_on_empty=False):
+                    update_dict=update_dict, unset=pull_dict, fail_on_empty=False):
                 continue
             return result
         raise LcmException("Cannot update prometheus database. Reached max retries")
 
     async def send_data(self, new_config):
         restore_backup = False
+        del new_config["_id"]
+        del new_config["_admin"]
+        new_scrape_configs = []
+        
+        # generate a list with the values of scrape_configs
+        for scrape_config in new_config["scrape_configs"].values():
+            scrape_config = scrape_config.copy()
+            # remove nsr_id metadata from scrape_configs
+            scrape_config.pop("nsr_id", None)
+            new_scrape_configs.append(scrape_config)
+        new_config["scrape_configs"] = new_scrape_configs
+
         try:
             if os.path.exists(self.cfg_file):
                 os.rename(self.cfg_file, self.cfg_file_backup)
                 restore_backup = True
             with open(self.cfg_file, "w+") as f:
-                yaml.dump(new_config, f)
+                yaml.safe_dump(new_config, f, indent=4, default_flow_style=False)
+            # self.logger.debug("new configuration: {}".format(yaml.safe_dump(new_config, indent=4,
+            #                                                                 default_flow_style=False)))
             async with aiohttp.ClientSession() as session:
-                async with session.post(self.server + "/-/reload") as resp:
+                async with session.post(self.server + "-/reload") as resp:
                     if resp.status > 204:
-                        raise LcmException(resp.text)
+                        raise LcmException(await resp.text())
                 await asyncio.sleep(5, loop=self.loop)
-                async with session.get(self.server + "/api/v1/status/config") as resp:
+                # If prometheus does not admit this configuration, remains with the old one
+                # Then, to check if the configuration has been accepted, get the configuration from prometheus
+                # and compares with the inserted one
+                async with session.get(self.server + "api/v1/status/config") as resp:
                     if resp.status > 204:
-                        raise LcmException(resp.text)
-                    current_config = resp.json()
+                        raise LcmException(await resp.text())
+                    current_config = await resp.json()
                     if not self._check_configuration_equal(current_config, new_config):
                         return False
                     else:
                         restore_backup = False
             return True
         except Exception as e:
-            self.logger.error("Error updating prometheus configuration {}".format(e))
+            self.logger.error("Error updating prometheus configuration url={}: {}".format(self.server, e))
             return False
         finally:
             if restore_backup:
-                os.rename(self.cfg_file_backup, self.cfg_file)
+                try:
+                    os.rename(self.cfg_file_backup, self.cfg_file)
+                except Exception as e:
+                    self.logger.critical("Exception while rolling back: {}".format(e))
 
-    @staticmethod
-    def _check_configuration_equal(current_config, new_config):
-        # TODO compare and return True if equal
-        return True
+    def _check_configuration_equal(self, current_config, expected_config):
+        try:
+            # self.logger.debug("Comparing current_config='{}' with expected_config='{}'".format(current_config,
+            #                                                                                    expected_config))
+            current_config_yaml = yaml.safe_load(current_config['data']['yaml'])
+            current_jobs = [j["job_name"] for j in current_config_yaml["scrape_configs"]]
+            expected_jobs = [j["job_name"] for j in expected_config["scrape_configs"]]
+            return current_jobs == expected_jobs
+        except Exception as e:
+            self.logger.error("Invalid obtained prometheus status. Error: '{}'. Obtained data: '{}'".format(
+                e, current_config))
+            # if format is not understood, cannot be compared, assume it is ok
+            return True
diff --git a/osm_lcm/tests/test_prometheus.py b/osm_lcm/tests/test_prometheus.py
new file mode 100644
index 00000000..064ede83
--- /dev/null
+++ b/osm_lcm/tests/test_prometheus.py
@@ -0,0 +1,130 @@
+##
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+#
+# For those usages not covered by the Apache License, Version 2.0 please
+# contact: alfonso.tiernosepulveda@telefonica.com
+##
+
+import asynctest
+from osm_lcm.prometheus import Prometheus, initial_prometheus_data
+from asynctest.mock import Mock
+from osm_common.dbmemory import DbMemory
+
+__author__ = 'Alfonso Tierno <alfonso.tiernosepulveda@telefonica.com>'
+
+
+class TestPrometheus(asynctest.TestCase):
+
+    async def setUp(self):
+        config = {'uri': 'http:prometheus:9090',
+                  'path': '/etc/prometheus'}
+        self.db = Mock(DbMemory())
+        self.p = Prometheus(config, worker_id='1', db=self.db, loop=self.loop)
+
+    @asynctest.fail_on(active_handles=True)
+    async def test_start(self):
+        # test with database empty
+        self.db.get_one.return_value = False
+        self.p.update = asynctest.CoroutineMock()
+        await self.p.start()
+        self.db.create.assert_called_once_with('admin', initial_prometheus_data)
+        self.p.update.assert_called_once_with()
+
+        # test with database not empty
+        self.db.create.reset_mock()
+        self.db.get_one.return_value = initial_prometheus_data
+        self.p.update.reset_mock()
+        await self.p.start()
+        self.db.create.assert_not_called()
+        self.p.update.assert_called_once_with()
+
+    @asynctest.fail_on(active_handles=True)
+    async def test_update(self):
+        self.p.PROMETHEUS_LOCKED_TIME = 1
+        number_call_set_one = 0
+
+        def _db_set_one(*args, **kwargs):
+            # simulated that database is not locked at first call
+            nonlocal number_call_set_one
+
+            number_call_set_one += 1
+            if number_call_set_one == 1:
+                return
+            else:
+                return {'update': 1}
+
+        def _check_set_one_calls(set_one_calls):
+            # check the three calls to database set_one
+            self.assertEqual(len(set_one_calls), 3, 'Not called three times to db.set_one, two blocks, one unblock')
+            self.assertIn('admin', set_one_calls[0][0], 'db.set_one collection should be admin')
+            first_used_time = set_one_calls[0][1]['update_dict']['_admin.locked_at']
+            second_used_time = set_one_calls[1][1]['update_dict']['_admin.locked_at']
+            third_used_time = set_one_calls[2][1]['update_dict']['_admin.locked_at']
+            self.assertTrue(first_used_time != 0 and second_used_time != 0, 'blocking locked_at time must not be 0')
+            self.assertGreater(second_used_time, first_used_time,
+                               'Every blocking try must contain a new locked_at time')
+            self.assertEqual(third_used_time, 0, 'For unblocking must be set locked_at=0')
+
+        # check add_jobs
+        number_call_set_one = 0
+        self.db.get_one.return_value = initial_prometheus_data
+        self.db.set_one.side_effect = _db_set_one
+        self.p.send_data = asynctest.CoroutineMock(return_value=True)
+        add_jobs = {'job1': {'job_name': 'job1', 'nsr_id': 'nsr_id'}}
+        await self.p.update(add_jobs=add_jobs)
+        set_one_calls = self.db.set_one.call_args_list
+        _check_set_one_calls(set_one_calls)
+        update_dict = set_one_calls[2][1]['update_dict']
+        unset_dict = set_one_calls[2][1]['unset']
+        expected_final_set = {
+            '_admin.locked_at': 0,
+            '_admin.locked_by': None,
+            '_admin.modified_at': set_one_calls[1][1]['update_dict']['_admin.locked_at'],
+            'scrape_configs.job1': add_jobs['job1']}
+        self.assertEqual(update_dict, expected_final_set, 'invalid set and unlock values')
+        self.assertEqual(unset_dict, None, 'invalid unset and unlock values')
+
+        # check remove_jobs
+        number_call_set_one = 0
+        remove_jobs = ['job1']
+        self.db.set_one.reset_mock()
+        await self.p.update(remove_jobs=remove_jobs)
+        set_one_calls = self.db.set_one.call_args_list
+        _check_set_one_calls(set_one_calls)
+        update_dict = set_one_calls[2][1]['update_dict']
+        unset_dict = set_one_calls[2][1]['unset']
+        expected_final_set = {
+            '_admin.locked_at': 0,
+            '_admin.locked_by': None,
+            '_admin.modified_at': set_one_calls[1][1]['update_dict']['_admin.locked_at']
+        }
+        self.assertEqual(update_dict, expected_final_set, 'invalid set and unlock values')
+        self.assertEqual(unset_dict, {'scrape_configs.job1': None}, 'invalid unset and unlock values')
+
+    def test_parse_job(self):
+        text_to_parse = """
+            # yaml format with jinja2
+            key1: "parsing var1='{{ var1 }}'"
+            key2: "parsing var2='{{ var2 }}'"
+        """
+        vars = {'var1': 'VAR1', 'var2': 'VAR2', 'var3': 'VAR3'}
+        expected = {
+            'key1': "parsing var1='VAR1'",
+            'key2': "parsing var2='VAR2'"
+        }
+        result = self.p.parse_job(text_to_parse, vars)
+        self.assertEqual(result, expected, 'Error at jinja2 parse')
+
+
+if __name__ == '__main__':
+    asynctest.main()
diff --git a/tox.ini b/tox.ini
index 1e0b679e..85456e51 100644
--- a/tox.ini
+++ b/tox.ini
@@ -50,7 +50,7 @@ commands = flake8 osm_lcm --max-line-length 120 \
 [testenv:unittest]
 basepython = python3
 deps = asynctest
-commands = python3 -m unittest osm_lcm.tests.test_ns
+commands = python3 -m unittest osm_lcm.tests.test_ns osm_lcm.tests.test_prometheus
 
 [testenv:build]
 basepython = python3
-- 
2.25.1