Fix bug 1236: Retry if leader unit is not obtained
There a racing condition when N2VC tries to get the leader unit, but there
leader-elected hook has not been triggered. In that case, day-1 action might fail.
This commit solved this issue by retrying a couple of times.
Change-Id: If79cd243aa9ebdf8ed0e6235481eeb9fd2640612
Signed-off-by: David Garcia <david.garcia@canonical.com>
diff --git a/n2vc/libjuju.py b/n2vc/libjuju.py
index a00fa58..28474a0 100644
--- a/n2vc/libjuju.py
+++ b/n2vc/libjuju.py
@@ -22,6 +22,7 @@
from juju.model import Model
from juju.machine import Machine
from juju.application import Application
+from juju.unit import Unit
from juju.client._definitions import (
FullStatus,
QueryApplicationOffersResults,
@@ -629,11 +630,21 @@
if application is None:
raise JujuApplicationNotFound("Cannot execute action")
- # Get unit
+ # Get leader unit
+ # Racing condition:
+ # Ocassionally, self._get_leader_unit() will return None
+ # because the leader elected hook has not been triggered yet.
+ # Therefore, we are doing some retries. If it happens again,
+ # re-open bug 1236
+ attempts = 3
+ time_between_retries = 10
unit = None
- for u in application.units:
- if await u.is_leader_from_status():
- unit = u
+ for _ in range(attempts):
+ unit = await self._get_leader_unit(application)
+ if unit is None:
+ await asyncio.sleep(time_between_retries)
+ else:
+ break
if unit is None:
raise JujuLeaderUnitNotFound(
"Cannot execute action: leader unit not found"
@@ -1166,3 +1177,11 @@
await controller.remove_cloud(name)
finally:
await self.disconnect_controller(controller)
+
+ async def _get_leader_unit(self, application: Application) -> Unit:
+ unit = None
+ for u in application.units:
+ if await u.is_leader_from_status():
+ unit = u
+ break
+ return unit
diff --git a/n2vc/tests/unit/test_libjuju.py b/n2vc/tests/unit/test_libjuju.py
index 8c16c9d..123da4a 100644
--- a/n2vc/tests/unit/test_libjuju.py
+++ b/n2vc/tests/unit/test_libjuju.py
@@ -30,6 +30,7 @@
JujuActionNotFound,
JujuApplicationExists,
JujuInvalidK8sConfiguration,
+ JujuLeaderUnitNotFound,
)
@@ -579,7 +580,35 @@
mock_disconnect_controller.assert_called()
mock_disconnect_model.assert_called()
- # TODO no leader unit found exception
+ @asynctest.mock.patch("asyncio.sleep")
+ @asynctest.mock.patch("n2vc.tests.unit.utils.FakeUnit.is_leader_from_status")
+ def test_no_leader(
+ self,
+ mock_is_leader_from_status,
+ mock_sleep,
+ mock_get_action_status,
+ mock_get_action_output,
+ mock_wait_for,
+ mock__get_application,
+ mock_disconnect_controller,
+ mock_disconnect_model,
+ mock_get_model,
+ mock_get_controller,
+ ):
+ mock_get_model.return_value = juju.model.Model()
+ mock__get_application.return_value = FakeApplication()
+ mock_is_leader_from_status.return_value = False
+ output = None
+ status = None
+ with self.assertRaises(JujuLeaderUnitNotFound):
+ output, status = self.loop.run_until_complete(
+ self.libjuju.execute_action("app", "model", "action",)
+ )
+ self.assertIsNone(output)
+ self.assertIsNone(status)
+
+ mock_disconnect_controller.assert_called()
+ mock_disconnect_model.assert_called()
def test_succesful_exec(
self,