Resolved Bug 1684 - MON fails to collect metrics from an OpenStack VIM with metrics...
[osm/MON.git] / osm_mon / collector / service.py
index 9dd1683..005f844 100644 (file)
 # This version uses a ProcessThreadPoolExecutor to limit the number of processes launched
 
 import logging
-import multiprocessing
 from typing import List
 import concurrent.futures
 import time
+import keystoneauth1.exceptions
 
 from osm_mon.collector.infra_collectors.onos import OnosInfraCollector
 from osm_mon.collector.infra_collectors.openstack import OpenstackInfraCollector
@@ -46,23 +46,17 @@ log = logging.getLogger(__name__)
 VIM_COLLECTORS = {
     "openstack": OpenstackCollector,
     "vmware": VMwareCollector,
-    "vio": VIOCollector
+    "vio": VIOCollector,
 }
 VIM_INFRA_COLLECTORS = {
     "openstack": OpenstackInfraCollector,
     "vmware": VMwareInfraCollector,
-    "vio": VIOInfraCollector
-}
-SDN_INFRA_COLLECTORS = {
-    "onosof": OnosInfraCollector,
-    "onos_vpls": OnosInfraCollector
+    "vio": VIOInfraCollector,
 }
+SDN_INFRA_COLLECTORS = {"onosof": OnosInfraCollector, "onos_vpls": OnosInfraCollector}
 
 
 class CollectorService:
-    # The processes getting metrics will store the results in this queue
-    queue = multiprocessing.Queue()
-
     def __init__(self, config: Config):
         self.conf = config
         self.common_db = CommonDbClient(self.conf)
@@ -73,78 +67,81 @@ class CollectorService:
     def _get_vim_type(conf: Config, vim_account_id: str) -> str:
         common_db = CommonDbClient(conf)
         vim_account = common_db.get_vim_account(vim_account_id)
-        vim_type = vim_account['vim_type']
-        if 'config' in vim_account and 'vim_type' in vim_account['config']:
-            vim_type = vim_account['config']['vim_type'].lower()
-            if vim_type == 'vio' and 'vrops_site' not in vim_account['config']:
-                vim_type = 'openstack'
+        vim_type = vim_account["vim_type"]
+        if "config" in vim_account and "vim_type" in vim_account["config"]:
+            vim_type = vim_account["config"]["vim_type"].lower()
+            if vim_type == "vio" and "vrops_site" not in vim_account["config"]:
+                vim_type = "openstack"
         return vim_type
 
     @staticmethod
     def _collect_vim_metrics(conf: Config, vnfr: dict, vim_account_id: str):
         # TODO(diazb) Add support for aws
+        metrics = []
         vim_type = CollectorService._get_vim_type(conf, vim_account_id)
         log.debug("vim type.....{}".format(vim_type))
         if vim_type in VIM_COLLECTORS:
             collector = VIM_COLLECTORS[vim_type](conf, vim_account_id)
             metrics = collector.collect(vnfr)
             log.debug("Collecting vim metrics.....{}".format(metrics))
-            for metric in metrics:
-                pass
-                CollectorService.queue.put(metric)
         else:
             log.debug("vimtype %s is not supported.", vim_type)
-        return
+        return metrics
 
     @staticmethod
     def _collect_vca_metrics(conf: Config, vnfr: dict):
+        metrics = []
         vca_collector = VCACollector(conf)
         metrics = vca_collector.collect(vnfr)
         log.debug("Collecting vca metrics.....{}".format(metrics))
-        for metric in metrics:
-            CollectorService.queue.put(metric)
-        return
+        return metrics
 
     @staticmethod
     def _collect_vim_infra_metrics(conf: Config, vim_account_id: str):
         log.info("Collecting vim infra metrics")
+        metrics = []
         vim_type = CollectorService._get_vim_type(conf, vim_account_id)
         if vim_type in VIM_INFRA_COLLECTORS:
             collector = VIM_INFRA_COLLECTORS[vim_type](conf, vim_account_id)
             metrics = collector.collect()
             log.debug("Collecting vim infra metrics.....{}".format(metrics))
-            for metric in metrics:
-                CollectorService.queue.put(metric)
         else:
             log.debug("vimtype %s is not supported.", vim_type)
-        return
+        return metrics
 
     @staticmethod
     def _collect_sdnc_infra_metrics(conf: Config, sdnc_id: str):
         log.info("Collecting sdnc metrics")
+        metrics = []
         common_db = CommonDbClient(conf)
-        sdn_type = common_db.get_sdnc(sdnc_id)['type']
+        sdn_type = common_db.get_sdnc(sdnc_id)["type"]
         if sdn_type in SDN_INFRA_COLLECTORS:
             collector = SDN_INFRA_COLLECTORS[sdn_type](conf, sdnc_id)
             metrics = collector.collect()
             log.debug("Collecting sdnc metrics.....{}".format(metrics))
-            for metric in metrics:
-                CollectorService.queue.put(metric)
         else:
             log.debug("sdn_type %s is not supported.", sdn_type)
-        return
+        return metrics
 
     @staticmethod
     def _stop_process_pool(executor):
-        log.info('Stopping all processes in the process pool')
+        log.info("Shutting down process pool")
         try:
+            log.debug("Stopping residual processes in the process pool")
             for pid, process in executor._processes.items():
                 if process.is_alive():
                     process.terminate()
         except Exception as e:
             log.info("Exception during process termination")
             log.debug("Exception %s" % (e))
-        executor.shutdown()
+
+        try:
+            # Shutting down executor
+            log.debug("Shutting down process pool executor")
+            executor.shutdown()
+        except RuntimeError as e:
+            log.info("RuntimeError in shutting down executer")
+            log.debug("RuntimeError %s" % (e))
         return
 
     def collect_metrics(self) -> List[Metric]:
@@ -153,40 +150,79 @@ class CollectorService:
 
         start_time = time.time()
         # Starting executor pool with pool size process_pool_size. Default process_pool_size is 20
-        with concurrent.futures.ProcessPoolExecutor(self.conf.get('collector', 'process_pool_size')) as executor:
-            log.debug('Started metric collector process pool with pool size %s' % (self.conf.get('collector',
-                                                                                                 'process_pool_size')))
+        with concurrent.futures.ProcessPoolExecutor(
+            self.conf.get("collector", "process_pool_size")
+        ) as executor:
+            log.info(
+                "Started metric collector process pool with pool size %s"
+                % (self.conf.get("collector", "process_pool_size"))
+            )
             futures = []
             for vnfr in vnfrs:
-                nsr_id = vnfr['nsr-id-ref']
-                vnf_member_index = vnfr['member-vnf-index-ref']
-                vim_account_id = self.common_db.get_vim_account_id(nsr_id, vnf_member_index)
-                futures.append(executor.submit(CollectorService._collect_vim_metrics, self.conf, vnfr, vim_account_id))
-                futures.append(executor.submit(CollectorService._collect_vca_metrics, self.conf, vnfr))
+                nsr_id = vnfr["nsr-id-ref"]
+                vnf_member_index = vnfr["member-vnf-index-ref"]
+                vim_account_id = self.common_db.get_vim_account_id(
+                    nsr_id, vnf_member_index
+                )
+                futures.append(
+                    executor.submit(
+                        CollectorService._collect_vim_metrics,
+                        self.conf,
+                        vnfr,
+                        vim_account_id,
+                    )
+                )
+                futures.append(
+                    executor.submit(
+                        CollectorService._collect_vca_metrics, self.conf, vnfr
+                    )
+                )
 
             vims = self.common_db.get_vim_accounts()
             for vim in vims:
-                futures.append(executor.submit(CollectorService._collect_vim_infra_metrics, self.conf, vim['_id']))
+                futures.append(
+                    executor.submit(
+                        CollectorService._collect_vim_infra_metrics,
+                        self.conf,
+                        vim["_id"],
+                    )
+                )
 
             sdncs = self.common_db.get_sdncs()
             for sdnc in sdncs:
-                futures.append(executor.submit(CollectorService._collect_sdnc_infra_metrics, self.conf, sdnc['_id']))
+                futures.append(
+                    executor.submit(
+                        CollectorService._collect_sdnc_infra_metrics,
+                        self.conf,
+                        sdnc["_id"],
+                    )
+                )
 
             try:
                 # Wait for future calls to complete till process_execution_timeout. Default is 50 seconds
-                for future in concurrent.futures.as_completed(futures, self.conf.get('collector',
-                                                                                     'process_execution_timeout')):
-                    result = future.result(timeout=int(self.conf.get('collector',
-                                                                     'process_execution_timeout')))
-                    log.debug('result = %s' % (result))
+                for future in concurrent.futures.as_completed(
+                    futures, self.conf.get("collector", "process_execution_timeout")
+                ):
+                    try:
+                        result = future.result(
+                            timeout=int(
+                                self.conf.get("collector", "process_execution_timeout")
+                            )
+                        )
+                        metrics.extend(result)
+                        log.debug("result = %s" % (result))
+                    except keystoneauth1.exceptions.connection.ConnectTimeout as e:
+                        log.info("Keystone connection timeout during metric collection")
+                        log.debug("Keystone connection timeout exception %s" % (e))
             except concurrent.futures.TimeoutError as e:
                 # Some processes have not completed due to timeout error
-                log.info(' Some processes have not finished due to TimeoutError exception')
-                log.debug('concurrent.futures.TimeoutError exception %s' % (e))
-                CollectorService._stop_process_pool(executor)
+                log.info(
+                    "Some processes have not finished due to TimeoutError exception"
+                )
+                log.debug("concurrent.futures.TimeoutError exception %s" % (e))
 
-            while not self.queue.empty():
-                metrics.append(self.queue.get())
+            # Shutting down process pool executor
+            CollectorService._stop_process_pool(executor)
 
         end_time = time.time()
         log.info("Collection completed in %s seconds", end_time - start_time)