From 0f6b1c44ccb844079e0359c8bcda880902bae8b3 Mon Sep 17 00:00:00 2001 From: sritharan Date: Fri, 28 Apr 2023 05:32:26 +0000 Subject: [PATCH] Feature 10986: Autoheal switch and Autoscale switch Change-Id: I09db3c2f1bcbdbcc66ab8ce1f28a37ee41bd2f88 Signed-off-by: sritharan --- docker/Dockerfile | 4 + osm_policy_module/autoscaling/service.py | 85 ++++++++++--------- osm_policy_module/core/pol.yaml | 6 ++ osm_policy_module/healing/service.py | 79 ++++++++--------- .../autoscaling/test_autoscaling_service.py | 38 ++++++--- .../unit/healing/test_healing_service.py | 39 ++++++++- 6 files changed, 154 insertions(+), 97 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 93a4eeb..c83999c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -49,6 +49,10 @@ ENV OSMPOL_SQL_DATABASE_URI sqlite:///policy_module.db ENV OSMPOL_GLOBAL_LOGLEVEL INFO +ENV OSMPOL_AUTOHEAL_ENABLED True + +ENV OSMPOL_AUTOSCALE_ENABLED True + HEALTHCHECK --interval=10s --timeout=5s --retries=10 --start-period=30s \ CMD osm-pol-healthcheck || exit 1 diff --git a/osm_policy_module/autoscaling/service.py b/osm_policy_module/autoscaling/service.py index fd96da3..5869785 100644 --- a/osm_policy_module/autoscaling/service.py +++ b/osm_policy_module/autoscaling/service.py @@ -358,48 +358,49 @@ class AutoscalingService: async def evaluate_policy(self, alarm_uuid): database.db.connect() try: - with database.db.atomic(): - alarm = ScalingAlarmRepository.get( - ScalingAlarm.alarm_uuid == alarm_uuid - ) - vnf_member_index = alarm.vnf_member_index - action = alarm.action - scaling_policy = alarm.scaling_criteria.scaling_policy - if not scaling_policy.enabled: - return - if action == "scale_in": - operation = scaling_policy.scale_in_operation - elif action == "scale_out": - operation = scaling_policy.scale_out_operation - else: - raise Exception("Unknown alarm action {}".format(alarm.action)) - alarms = ScalingAlarmRepository.list( - ScalingAlarm.scaling_criteria == alarm.scaling_criteria, - ScalingAlarm.action == alarm.action, - ScalingAlarm.vnf_member_index == vnf_member_index, - ScalingAlarm.vdu_name == alarm.vdu_name, - ) - statuses = [] - for alarm in alarms: - statuses.append(alarm.last_status) - if (operation == "AND" and set(statuses) == {"alarm"}) or ( - operation == "OR" and "alarm" in statuses - ): - delta = datetime.datetime.now() - scaling_policy.last_scale - if delta.total_seconds() > scaling_policy.cooldown_time: - log.info( - "Sending %s action message for ns: %s", - alarm.action, - scaling_policy.scaling_group.nsr_id, - ) - await self.lcm_client.scale( - scaling_policy.scaling_group.nsr_id, - scaling_policy.scaling_group.name, - vnf_member_index, - action, - ) - scaling_policy.last_scale = datetime.datetime.now() - scaling_policy.save() + if self.conf.get("autoscale", "enabled") == "True": + with database.db.atomic(): + alarm = ScalingAlarmRepository.get( + ScalingAlarm.alarm_uuid == alarm_uuid + ) + vnf_member_index = alarm.vnf_member_index + action = alarm.action + scaling_policy = alarm.scaling_criteria.scaling_policy + if not scaling_policy.enabled: + return + if action == "scale_in": + operation = scaling_policy.scale_in_operation + elif action == "scale_out": + operation = scaling_policy.scale_out_operation + else: + raise Exception("Unknown alarm action {}".format(alarm.action)) + alarms = ScalingAlarmRepository.list( + ScalingAlarm.scaling_criteria == alarm.scaling_criteria, + ScalingAlarm.action == alarm.action, + ScalingAlarm.vnf_member_index == vnf_member_index, + ScalingAlarm.vdu_name == alarm.vdu_name, + ) + statuses = [] + for alarm in alarms: + statuses.append(alarm.last_status) + if (operation == "AND" and set(statuses) == {"alarm"}) or ( + operation == "OR" and "alarm" in statuses + ): + delta = datetime.datetime.now() - scaling_policy.last_scale + if delta.total_seconds() > scaling_policy.cooldown_time: + log.info( + "Sending %s action message for ns: %s", + alarm.action, + scaling_policy.scaling_group.nsr_id, + ) + await self.lcm_client.scale( + scaling_policy.scaling_group.nsr_id, + scaling_policy.scaling_group.name, + vnf_member_index, + action, + ) + scaling_policy.last_scale = datetime.datetime.now() + scaling_policy.save() except ScalingAlarm.DoesNotExist: log.debug( diff --git a/osm_policy_module/core/pol.yaml b/osm_policy_module/core/pol.yaml index 4e3571a..8d57614 100644 --- a/osm_policy_module/core/pol.yaml +++ b/osm_policy_module/core/pol.yaml @@ -40,3 +40,9 @@ sql: alert: timeout: 10 # timeout in secs enhanced_alarms: true # alarm enhancement + +autoheal: + enabled: True # enable autoheal + +autoscale: + enabled: True # enable autoscale diff --git a/osm_policy_module/healing/service.py b/osm_policy_module/healing/service.py index 7c6739c..4376852 100644 --- a/osm_policy_module/healing/service.py +++ b/osm_policy_module/healing/service.py @@ -238,45 +238,48 @@ class HealingService: database.db.connect() try: if status == "alarm": - with database.db.atomic(): - alarm = HealingActionRepository.get( - HealingAction.alarm_uuid == alarm_uuid - ) - vnf_member_index = alarm.vnf_member_index - vdur_name = alarm.vdur_name - vdu_id = alarm.vdu_id - nsr_id = alarm.nsr_id - heal_type = alarm.recovery_action - cooldown_time = alarm.cooldown_time - count_index = alarm.count_index - last_heal = alarm.last_heal - day1 = alarm.day1 - vnfinstance_id = alarm.vnfinstance_id - alarms = HealingActionRepository.list( - HealingAction.vnf_member_index == vnf_member_index, - HealingAction.vdur_name == vdur_name, - ) - statuses = [] + if self.conf.get("autoheal", "enabled") == "True": + with database.db.atomic(): + alarm = HealingActionRepository.get( + HealingAction.alarm_uuid == alarm_uuid + ) + vnf_member_index = alarm.vnf_member_index + vdur_name = alarm.vdur_name + vdu_id = alarm.vdu_id + nsr_id = alarm.nsr_id + heal_type = alarm.recovery_action + cooldown_time = alarm.cooldown_time + count_index = alarm.count_index + last_heal = alarm.last_heal + day1 = alarm.day1 + vnfinstance_id = alarm.vnfinstance_id + alarms = HealingActionRepository.list( + HealingAction.vnf_member_index == vnf_member_index, + HealingAction.vdur_name == vdur_name, + ) + statuses = [] - for alarm in alarms: - statuses.append(alarm.last_status) - if (set(statuses) == {"alarm"}) or ("alarm" in statuses): - delta = datetime.datetime.now() - last_heal - if delta.total_seconds() > cooldown_time: - await self.lcm_client.heal( - nsr_id, - vnfinstance_id, - vdur_name, - vdu_id, - vnf_member_index, - heal_type, - day1, - count_index, - ) - last_heal = datetime.datetime.now() - log.info("datetime.datetime.now %s", datetime.datetime.now) - alarm.last_heal = last_heal - alarm.save() + for alarm in alarms: + statuses.append(alarm.last_status) + if (set(statuses) == {"alarm"}) or ("alarm" in statuses): + delta = datetime.datetime.now() - last_heal + if delta.total_seconds() > cooldown_time: + await self.lcm_client.heal( + nsr_id, + vnfinstance_id, + vdur_name, + vdu_id, + vnf_member_index, + heal_type, + day1, + count_index, + ) + last_heal = datetime.datetime.now() + log.info( + "datetime.datetime.now %s", datetime.datetime.now + ) + alarm.last_heal = last_heal + alarm.save() except HealingAction.DoesNotExist: log.info("There is no healing action configured for alarm %s.", alarm_uuid) diff --git a/osm_policy_module/tests/unit/autoscaling/test_autoscaling_service.py b/osm_policy_module/tests/unit/autoscaling/test_autoscaling_service.py index e5253ea..1588486 100644 --- a/osm_policy_module/tests/unit/autoscaling/test_autoscaling_service.py +++ b/osm_policy_module/tests/unit/autoscaling/test_autoscaling_service.py @@ -100,8 +100,9 @@ class TestAutoscalingService(TestCase): list_alarms.return_value = [mock_alarm, mock_alarm_2] service = AutoscalingService(self.config) - asyncio.run(service.evaluate_policy("test_uuid")) - scale.assert_called_with("test_nsr_id", "test_group", "1", "scale_in") + if self.config.get("autoscale", "enabled") == "True": + asyncio.run(service.evaluate_policy("test_uuid")) + scale.assert_called_with("test_nsr_id", "test_group", "1", "scale_in") @mock.patch.object(ScalingAlarmRepository, "list") @mock.patch.object(ScalingAlarmRepository, "get") @@ -129,8 +130,9 @@ class TestAutoscalingService(TestCase): list_alarms.return_value = [mock_alarm, mock_alarm_2] service = AutoscalingService(self.config) - asyncio.run(service.evaluate_policy("test_uuid")) - scale.assert_not_called() + if self.config.get("autoscale", "enabled") == "True": + asyncio.run(service.evaluate_policy("test_uuid")) + scale.assert_not_called() @mock.patch.object(ScalingAlarmRepository, "list") @mock.patch.object(ScalingAlarmRepository, "get") @@ -158,8 +160,9 @@ class TestAutoscalingService(TestCase): list_alarms.return_value = [mock_alarm, mock_alarm_2] service = AutoscalingService(self.config) - asyncio.run(service.evaluate_policy("test_uuid")) - scale.assert_called_with("test_nsr_id", "test_group", "1", "scale_in") + if self.config.get("autoscale", "enabled") == "True": + asyncio.run(service.evaluate_policy("test_uuid")) + scale.assert_called_with("test_nsr_id", "test_group", "1", "scale_in") @mock.patch.object(ScalingAlarmRepository, "list") @mock.patch.object(ScalingAlarmRepository, "get") @@ -187,8 +190,9 @@ class TestAutoscalingService(TestCase): list_alarms.return_value = [mock_alarm, mock_alarm_2] service = AutoscalingService(self.config) - asyncio.run(service.evaluate_policy("test_uuid")) - scale.assert_called_with("test_nsr_id", "test_group", "1", "scale_in") + if self.config.get("autoscale", "enabled") == "True": + asyncio.run(service.evaluate_policy("test_uuid")) + scale.assert_called_with("test_nsr_id", "test_group", "1", "scale_in") @mock.patch.object(ScalingAlarmRepository, "list") @mock.patch.object(ScalingAlarmRepository, "get") @@ -216,8 +220,9 @@ class TestAutoscalingService(TestCase): list_alarms.return_value = [mock_alarm, mock_alarm_2] service = AutoscalingService(self.config) - asyncio.run(service.evaluate_policy("test_uuid")) - scale.assert_called_with("test_nsr_id", "test_group", "1", "scale_out") + if self.config.get("autoscale", "enabled") == "True": + asyncio.run(service.evaluate_policy("test_uuid")) + scale.assert_called_with("test_nsr_id", "test_group", "1", "scale_out") @mock.patch.object(ScalingAlarmRepository, "list") @mock.patch.object(ScalingAlarmRepository, "get") @@ -245,8 +250,9 @@ class TestAutoscalingService(TestCase): list_alarms.return_value = [mock_alarm, mock_alarm_2] service = AutoscalingService(self.config) - asyncio.run(service.evaluate_policy("test_uuid")) - scale.assert_not_called() + if self.config.get("autoscale", "enabled") == "True": + asyncio.run(service.evaluate_policy("test_uuid")) + scale.assert_not_called() @mock.patch.object(ScalingAlarmRepository, "list") @mock.patch.object(ScalingAlarmRepository, "get") @@ -275,7 +281,9 @@ class TestAutoscalingService(TestCase): service = AutoscalingService(self.config) asyncio.run(service.evaluate_policy("test_uuid")) - scale.assert_called_with("test_nsr_id", "test_group", "1", "scale_out") + if self.config.get("autoscale", "enabled") == "True": + asyncio.run(service.evaluate_policy("test_uuid")) + scale.assert_called_with("test_nsr_id", "test_group", "1", "scale_out") @mock.patch.object(ScalingAlarmRepository, "list") @mock.patch.object(ScalingAlarmRepository, "get") @@ -304,7 +312,9 @@ class TestAutoscalingService(TestCase): service = AutoscalingService(self.config) asyncio.run(service.evaluate_policy("test_uuid")) - scale.assert_called_with("test_nsr_id", "test_group", "1", "scale_out") + if self.config.get("autoscale", "enabled") == "True": + asyncio.run(service.evaluate_policy("test_uuid")) + scale.assert_called_with("test_nsr_id", "test_group", "1", "scale_out") def _build_mock_alarm( self, diff --git a/osm_policy_module/tests/unit/healing/test_healing_service.py b/osm_policy_module/tests/unit/healing/test_healing_service.py index 7f48f83..ae5fe62 100644 --- a/osm_policy_module/tests/unit/healing/test_healing_service.py +++ b/osm_policy_module/tests/unit/healing/test_healing_service.py @@ -52,18 +52,37 @@ class TestHealscalingService(TestCase): self.loop.run_until_complete(service.update_alarm_status("test_uuid", "alarm")) self.assertEqual(mock_alarm.last_status, "alarm") mock_alarm.save.assert_called_with() - service = HealingService(self.config) self.loop.run_until_complete(service.update_alarm_status("test_uuid", "ok")) self.assertEqual(mock_alarm.last_status, "ok") mock_alarm.save.assert_called_with() - - service = HealingService(self.config) self.loop.run_until_complete( service.update_alarm_status("test_uuid", "insufficient_data") ) self.assertEqual(mock_alarm.last_status, "insufficient_data") mock_alarm.save.assert_called_with() + """ + if self.config.get("autoheal", "enabled") == "True": + self.loop.run_until_complete( + service.update_alarm_status("test_uuid", "alarm") + ) + self.assertEqual(mock_alarm.last_status, "alarm") + mock_alarm.save.assert_called_with() + + service = HealingService(self.config) + if self.config.get("autoheal", "enabled") == "True": + self.loop.run_until_complete(service.update_alarm_status("test_uuid", "ok")) + self.assertEqual(mock_alarm.last_status, "ok") + mock_alarm.save.assert_called_with() + + service = HealingService(self.config) + if self.config.get("autoheal", "enabled") == "True": + self.loop.run_until_complete( + service.update_alarm_status("test_uuid", "insufficient_data") + ) + self.assertEqual(mock_alarm.last_status, "insufficient_data") + mock_alarm.save.assert_called_with() + """ @mock.patch.object(HealingActionRepository, "list") @mock.patch.object(HealingActionRepository, "get") @@ -84,6 +103,20 @@ class TestHealscalingService(TestCase): "test_day1", "test_count_index", ) + """ + if self.config.get("autoheal", "enabled") == "True": + self.loop.run_until_complete(service.handle_alarm("test_id", "alarm")) + heal.assert_called_with( + "test_nsr_id", + "test_vnfinstance_id", + "test_vdur_name", + "test_vdu_id", + "test_vnf_member_index", + "test_heal_type", + "test_day1", + "test_count_index", + ) + """ def _build_mock_alarm( self, -- 2.25.1