From: David Garcia Date: Thu, 15 Oct 2020 11:16:45 +0000 (+0200) Subject: Fix bug 1236: Retry if leader unit is not obtained X-Git-Tag: release-v9.0-start~12 X-Git-Url: https://osm.etsi.org/gitweb/?p=osm%2FN2VC.git;a=commitdiff_plain;h=59f520da90fb12b9d9871889dfbc5d57aa14c591;hp=7077e2601a209a87f7fe397b35586e7701ca759a Fix bug 1236: Retry if leader unit is not obtained There a racing condition when N2VC tries to get the leader unit, but there leader-elected hook has not been triggered. In that case, day-1 action might fail. This commit solved this issue by retrying a couple of times. Change-Id: If79cd243aa9ebdf8ed0e6235481eeb9fd2640612 Signed-off-by: David Garcia --- diff --git a/n2vc/libjuju.py b/n2vc/libjuju.py index a00fa58..28474a0 100644 --- a/n2vc/libjuju.py +++ b/n2vc/libjuju.py @@ -22,6 +22,7 @@ from juju.errors import JujuAPIError from juju.model import Model from juju.machine import Machine from juju.application import Application +from juju.unit import Unit from juju.client._definitions import ( FullStatus, QueryApplicationOffersResults, @@ -629,11 +630,21 @@ class Libjuju: if application is None: raise JujuApplicationNotFound("Cannot execute action") - # Get unit + # Get leader unit + # Racing condition: + # Ocassionally, self._get_leader_unit() will return None + # because the leader elected hook has not been triggered yet. + # Therefore, we are doing some retries. If it happens again, + # re-open bug 1236 + attempts = 3 + time_between_retries = 10 unit = None - for u in application.units: - if await u.is_leader_from_status(): - unit = u + for _ in range(attempts): + unit = await self._get_leader_unit(application) + if unit is None: + await asyncio.sleep(time_between_retries) + else: + break if unit is None: raise JujuLeaderUnitNotFound( "Cannot execute action: leader unit not found" @@ -1166,3 +1177,11 @@ class Libjuju: await controller.remove_cloud(name) finally: await self.disconnect_controller(controller) + + async def _get_leader_unit(self, application: Application) -> Unit: + unit = None + for u in application.units: + if await u.is_leader_from_status(): + unit = u + break + return unit diff --git a/n2vc/tests/unit/test_libjuju.py b/n2vc/tests/unit/test_libjuju.py index 8c16c9d..123da4a 100644 --- a/n2vc/tests/unit/test_libjuju.py +++ b/n2vc/tests/unit/test_libjuju.py @@ -30,6 +30,7 @@ from n2vc.exceptions import ( JujuActionNotFound, JujuApplicationExists, JujuInvalidK8sConfiguration, + JujuLeaderUnitNotFound, ) @@ -579,7 +580,35 @@ class ExecuteActionTest(LibjujuTestCase): mock_disconnect_controller.assert_called() mock_disconnect_model.assert_called() - # TODO no leader unit found exception + @asynctest.mock.patch("asyncio.sleep") + @asynctest.mock.patch("n2vc.tests.unit.utils.FakeUnit.is_leader_from_status") + def test_no_leader( + self, + mock_is_leader_from_status, + mock_sleep, + mock_get_action_status, + mock_get_action_output, + mock_wait_for, + mock__get_application, + mock_disconnect_controller, + mock_disconnect_model, + mock_get_model, + mock_get_controller, + ): + mock_get_model.return_value = juju.model.Model() + mock__get_application.return_value = FakeApplication() + mock_is_leader_from_status.return_value = False + output = None + status = None + with self.assertRaises(JujuLeaderUnitNotFound): + output, status = self.loop.run_until_complete( + self.libjuju.execute_action("app", "model", "action",) + ) + self.assertIsNone(output) + self.assertIsNone(status) + + mock_disconnect_controller.assert_called() + mock_disconnect_model.assert_called() def test_succesful_exec( self,