Bug 1417 solved: Timeout contacting EE grpc server 85/10285/2
authorlloretgalleg <illoret@indra.es>
Tue, 26 Jan 2021 12:12:03 +0000 (12:12 +0000)
committerbravof <fbravo@whitestack.com>
Thu, 18 Feb 2021 12:35:48 +0000 (13:35 +0100)
Change-Id: Ie1b67cc20726bf0358f3a96fb0549455a32e7957
Signed-off-by: lloretgalleg <illoret@indra.es>
osm_lcm/lcm.cfg
osm_lcm/lcm_helm_conn.py

index 64a7da5..96fb373 100644 (file)
@@ -50,6 +50,8 @@ VCA:
     # pubkey: pubkey
     # cacert: cacert
     # apiproxy: apiproxy
+    #eegrpcinittimeout: 600
+    #eegrpctimeout: 30
 
     # loglevel: DEBUG
     # logfile:  /var/log/osm/lcm-vca.log
index 831190b..27c330f 100644 (file)
@@ -40,7 +40,7 @@ from n2vc.exceptions import N2VCBadArgumentsException, N2VCException, N2VCExecut
 from osm_lcm.lcm_utils import deep_get
 
 
-def retryer(max_wait_time=60, delay_time=10):
+def retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_delay"):
     def wrapper(func):
         retry_exceptions = (
             ConnectionRefusedError
@@ -48,6 +48,17 @@ def retryer(max_wait_time=60, delay_time=10):
 
         @functools.wraps(func)
         async def wrapped(*args, **kwargs):
+            # default values for wait time and delay_time
+            delay_time = 10
+            max_wait_time = 300
+
+            # obtain arguments from variable names
+            self = args[0]
+            if self.__dict__.get(max_wait_time_var):
+                max_wait_time = self.__dict__.get(max_wait_time_var)
+            if self.__dict__.get(delay_time_var):
+                delay_time = self.__dict__.get(delay_time_var)
+
             wait_time = max_wait_time
             while wait_time > 0:
                 try:
@@ -67,12 +78,12 @@ class LCMHelmConn(N2VCConnector, LcmBase):
     _KUBECTL_OSM_CLUSTER_NAME = "_system-osm-k8s"
     _EE_SERVICE_PORT = 50050
 
-    # Time beetween retries
-    _EE_RETRY_DELAY = 10
     # Initial max retry time
-    _MAX_INITIAL_RETRY_TIME = 300
-    # Other retry time
+    _MAX_INITIAL_RETRY_TIME = 600
+    # Max retry time for normal operations
     _MAX_RETRY_TIME = 30
+    # Time beetween retries, retry time after a connection error is raised
+    _EE_RETRY_DELAY = 10
 
     def __init__(self,
                  log: object = None,
@@ -102,13 +113,26 @@ class LCMHelmConn(N2VCConnector, LcmBase):
         )
 
         self.log.debug("Initialize helm N2VC connector")
+        self.log.debug("initial vca_config: {}".format(vca_config))
 
         # TODO - Obtain data from configuration
         self._ee_service_port = self._EE_SERVICE_PORT
 
         self._retry_delay = self._EE_RETRY_DELAY
-        self._max_retry_time = self._MAX_RETRY_TIME
-        self._initial_retry_time = self._MAX_INITIAL_RETRY_TIME
+
+        if self.vca_config and self.vca_config.get("eegrpcinittimeout"):
+            self._initial_retry_time = self.vca_config.get("eegrpcinittimeout")
+            self.log.debug("Initial retry time: {}".format(self._initial_retry_time))
+        else:
+            self._initial_retry_time = self._MAX_INITIAL_RETRY_TIME
+            self.log.debug("Applied default retry time: {}".format(self._initial_retry_time))
+
+        if self.vca_config and self.vca_config.get("eegrpctimeout"):
+            self._max_retry_time = self.vca_config.get("eegrpctimeout")
+            self.log.debug("Retry time: {}".format(self._max_retry_time))
+        else:
+            self._max_retry_time = self._MAX_RETRY_TIME
+            self.log.debug("Applied default retry time: {}".format(self._max_retry_time))
 
         # initialize helm connector for helmv2 and helmv3
         self._k8sclusterhelm2 = K8sHelmConnector(
@@ -458,7 +482,7 @@ class LCMHelmConn(N2VCConnector, LcmBase):
     ) -> str:
         pass
 
-    @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
+    @retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_delay")
     async def _get_ssh_key(self, ip_addr):
         channel = Channel(ip_addr, self._ee_service_port)
         try:
@@ -469,11 +493,11 @@ class LCMHelmConn(N2VCConnector, LcmBase):
         finally:
             channel.close()
 
-    @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
+    @retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_delay")
     async def _execute_config_primitive(self, ip_addr, params, db_dict=None):
         return await self._execute_primitive_internal(ip_addr, "config", params, db_dict=db_dict)
 
-    @retryer(max_wait_time=_MAX_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
+    @retryer(max_wait_time_var="_max_retry_time", delay_time_var="_retry_delay")
     async def _execute_primitive(self, ip_addr, primitive_name, params, db_dict=None):
         return await self._execute_primitive_internal(ip_addr, primitive_name, params, db_dict=db_dict)