From: garciadeblas Date: Tue, 4 Feb 2025 15:08:51 +0000 (+0100) Subject: Improve logging in cluster operations in case of issues in ODU workflow X-Git-Tag: v18.0.0~40 X-Git-Url: https://osm.etsi.org/gitweb/?a=commitdiff_plain;h=refs%2Fchanges%2F08%2F14908%2F1;p=osm%2FLCM.git Improve logging in cluster operations in case of issues in ODU workflow Change-Id: I09fc87cf33889fde4b4a94b8f530291888218813 Signed-off-by: garciadeblas --- diff --git a/osm_lcm/k8s.py b/osm_lcm/k8s.py index fd3667da..87b71600 100644 --- a/osm_lcm/k8s.py +++ b/osm_lcm/k8s.py @@ -332,11 +332,27 @@ class ClusterLcm(GitOpsLcm): db_vim = self.db.get_one("vim_accounts", {"name": db_cluster["vim_account"]}) workflow_content["vim_account"] = db_vim - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "create_cluster", op_id, op_params, workflow_content ) - self.logger.info("workflow_name is :{}".format(workflow_name)) + if not workflow_res: + self.logger.error(f"Failed to launch workflow: {workflow_name}") + db_cluster["state"] = "FAILED_CREATION" + db_cluster["resourceState"] = "ERROR" + db_cluster = self.update_operation_history( + db_cluster, op_id, workflow_status=False, resource_status=None + ) + self.db.set_one("clusters", {"_id": db_cluster["_id"]}, db_cluster) + # Clean items used in the workflow, no matter if the workflow succeeded + clean_status, clean_msg = await self.odu.clean_items_workflow( + "create_cluster", op_id, op_params, workflow_content + ) + self.logger.info( + f"clean_status is :{clean_status} and clean_msg is :{clean_msg}" + ) + return + self.logger.info("workflow_name is :{}".format(workflow_name)) workflow_status, workflow_msg = await self.odu.check_workflow_status( workflow_name ) @@ -564,11 +580,27 @@ class ClusterLcm(GitOpsLcm): if db_cluster["created"] == "false": return await self.deregister(params, order_id) - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "delete_cluster", op_id, op_params, workflow_content ) - self.logger.info("workflow_name is :{}".format(workflow_name)) + if not workflow_res: + self.logger.error(f"Failed to launch workflow: {workflow_name}") + db_cluster["state"] = "FAILED_DELETION" + db_cluster["resourceState"] = "ERROR" + db_cluster = self.update_operation_history( + db_cluster, op_id, workflow_status=False, resource_status=None + ) + self.db.set_one("clusters", {"_id": db_cluster["_id"]}, db_cluster) + # Clean items used in the workflow, no matter if the workflow succeeded + clean_status, clean_msg = await self.odu.clean_items_workflow( + "delete_cluster", op_id, op_params, workflow_content + ) + self.logger.info( + f"clean_status is :{clean_status} and clean_msg is :{clean_msg}" + ) + return + self.logger.info("workflow_name is :{}".format(workflow_name)) workflow_status, workflow_msg = await self.odu.check_workflow_status( workflow_name ) @@ -742,11 +774,19 @@ class ClusterLcm(GitOpsLcm): # content["profile"] = db_profile workflow_content["profile"] = db_profile - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "attach_profile_to_cluster", op_id, op_params, workflow_content ) - self.logger.info("workflow_name is :{}".format(workflow_name)) + if not workflow_res: + self.logger.error(f"Failed to launch workflow: {workflow_name}") + db_cluster["resourceState"] = "ERROR" + self.db.set_one("clusters", {"_id": db_cluster["_id"]}, db_cluster) + db_cluster = self.update_operation_history( + db_cluster, op_id, workflow_status=False, resource_status=None + ) + return + self.logger.info("workflow_name is :{}".format(workflow_name)) workflow_status, workflow_msg = await self.odu.check_workflow_status( workflow_name ) @@ -822,11 +862,19 @@ class ClusterLcm(GitOpsLcm): # content["profile"] = db_profile workflow_content["profile"] = db_profile - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "detach_profile_from_cluster", op_id, op_params, workflow_content ) - self.logger.info("workflow_name is :{}".format(workflow_name)) + if not workflow_res: + self.logger.error(f"Failed to launch workflow: {workflow_name}") + db_cluster["resourceState"] = "ERROR" + db_cluster = self.update_operation_history( + db_cluster, op_id, workflow_status=False, resource_status=None + ) + self.db.set_one("clusters", {"_id": db_cluster["_id"]}, db_cluster) + return + self.logger.info("workflow_name is :{}".format(workflow_name)) workflow_status, workflow_msg = await self.odu.check_workflow_status( workflow_name ) @@ -894,11 +942,27 @@ class ClusterLcm(GitOpsLcm): "cluster": self.decrypted_copy(db_cluster), } - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "register_cluster", op_id, op_params, workflow_content ) - self.logger.info("workflow_name is :{}".format(workflow_name)) + if not workflow_res: + self.logger.error(f"Failed to launch workflow: {workflow_name}") + db_cluster["state"] = "FAILED_CREATION" + db_cluster["resourceState"] = "ERROR" + db_cluster = self.update_operation_history( + db_cluster, op_id, workflow_status=False, resource_status=None + ) + self.db.set_one("clusters", {"_id": db_cluster["_id"]}, db_cluster) + # Clean items used in the workflow, no matter if the workflow succeeded + clean_status, clean_msg = await self.odu.clean_items_workflow( + "register_cluster", op_id, op_params, workflow_content + ) + self.logger.info( + f"clean_status is :{clean_status} and clean_msg is :{clean_msg}" + ) + return + self.logger.info("workflow_name is :{}".format(workflow_name)) workflow_status, workflow_msg = await self.odu.check_workflow_status( workflow_name ) @@ -1010,11 +1074,27 @@ class ClusterLcm(GitOpsLcm): "cluster": self.decrypted_copy(db_cluster), } - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "deregister_cluster", op_id, op_params, workflow_content ) - self.logger.info("workflow_name is :{}".format(workflow_name)) + if not workflow_res: + self.logger.error(f"Failed to launch workflow: {workflow_name}") + db_cluster["state"] = "FAILED_DELETION" + db_cluster["resourceState"] = "ERROR" + db_cluster = self.update_operation_history( + db_cluster, op_id, workflow_status=False, resource_status=None + ) + self.db.set_one("clusters", {"_id": db_cluster["_id"]}, db_cluster) + # Clean items used in the workflow, no matter if the workflow succeeded + clean_status, clean_msg = await self.odu.clean_items_workflow( + "deregister_cluster", op_id, op_params, workflow_content + ) + self.logger.info( + f"clean_status is :{clean_status} and clean_msg is :{clean_msg}" + ) + return + self.logger.info("workflow_name is :{}".format(workflow_name)) workflow_status, workflow_msg = await self.odu.check_workflow_status( workflow_name ) @@ -1104,9 +1184,25 @@ class ClusterLcm(GitOpsLcm): db_vim = self.db.get_one("vim_accounts", {"name": db_cluster["vim_account"]}) workflow_content["vim_account"] = db_vim - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "update_cluster", op_id, op_params, workflow_content ) + if not workflow_res: + self.logger.error(f"Failed to launch workflow: {workflow_name}") + db_cluster["resourceState"] = "ERROR" + db_cluster = self.update_operation_history( + db_cluster, op_id, workflow_status=False, resource_status=None + ) + self.db.set_one("clusters", {"_id": db_cluster["_id"]}, db_cluster) + # Clean items used in the workflow, no matter if the workflow succeeded + clean_status, clean_msg = await self.odu.clean_items_workflow( + "update_cluster", op_id, op_params, workflow_content + ) + self.logger.info( + f"clean_status is :{clean_status} and clean_msg is :{clean_msg}" + ) + return + self.logger.info("workflow_name is :{}".format(workflow_name)) workflow_status, workflow_msg = await self.odu.check_workflow_status( workflow_name ) @@ -1263,7 +1359,7 @@ class CloudCredentialsLcm(GitOpsLcm): salt=vim_id, ) - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "create_cloud_credentials", op_id, op_params, db_content ) @@ -1316,7 +1412,7 @@ class CloudCredentialsLcm(GitOpsLcm): salt=vim_id, ) - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "update_cloud_credentials", op_id, op_params, db_content ) workflow_status, workflow_msg = await self.odu.check_workflow_status( @@ -1352,7 +1448,7 @@ class CloudCredentialsLcm(GitOpsLcm): op_params = params db_content = self.db.get_one("vim_accounts", {"_id": vim_id}) - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "delete_cloud_credentials", op_id, op_params, db_content ) workflow_status, workflow_msg = await self.odu.check_workflow_status( @@ -1400,7 +1496,7 @@ class K8sAppLcm(GitOpsLcm): op_params = self.get_operation_params(content, op_id) self.db.set_one("k8sapp", {"_id": content["_id"]}, content) - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "create_profile", op_id, op_params, content ) self.logger.info("workflow_name is :{}".format(workflow_name)) @@ -1429,7 +1525,7 @@ class K8sAppLcm(GitOpsLcm): content = self.db.get_one("k8sapp", {"_id": profile_id}) op_params = self.get_operation_params(content, op_id) - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "delete_profile", op_id, op_params, content ) self.logger.info("workflow_name is :{}".format(workflow_name)) @@ -1476,7 +1572,7 @@ class K8sResourceLcm(GitOpsLcm): op_params = self.get_operation_params(content, op_id) self.db.set_one("k8sresource", {"_id": content["_id"]}, content) - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "create_profile", op_id, op_params, content ) self.logger.info("workflow_name is :{}".format(workflow_name)) @@ -1507,7 +1603,7 @@ class K8sResourceLcm(GitOpsLcm): content = self.db.get_one("k8sresource", {"_id": profile_id}) op_params = self.get_operation_params(content, op_id) - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "delete_profile", op_id, op_params, content ) self.logger.info("workflow_name is :{}".format(workflow_name)) @@ -1556,7 +1652,7 @@ class K8sInfraControllerLcm(GitOpsLcm): op_params = self.get_operation_params(content, op_id) self.db.set_one("k8sinfra_controller", {"_id": content["_id"]}, content) - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "create_profile", op_id, op_params, content ) self.logger.info("workflow_name is :{}".format(workflow_name)) @@ -1587,7 +1683,7 @@ class K8sInfraControllerLcm(GitOpsLcm): content = self.db.get_one("k8sinfra_controller", {"_id": profile_id}) op_params = self.get_operation_params(content, op_id) - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "delete_profile", op_id, op_params, content ) self.logger.info("workflow_name is :{}".format(workflow_name)) @@ -1636,7 +1732,7 @@ class K8sInfraConfigLcm(GitOpsLcm): op_params = self.get_operation_params(content, op_id) self.db.set_one("k8sinfra_config", {"_id": content["_id"]}, content) - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "create_profile", op_id, op_params, content ) self.logger.info("workflow_name is :{}".format(workflow_name)) @@ -1667,7 +1763,7 @@ class K8sInfraConfigLcm(GitOpsLcm): content = self.db.get_one("k8sinfra_config", {"_id": profile_id}) op_params = self.get_operation_params(content, op_id) - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "delete_profile", op_id, op_params, content ) self.logger.info("workflow_name is :{}".format(workflow_name)) @@ -1711,7 +1807,7 @@ class OkaLcm(GitOpsLcm): db_content = self.db.get_one(self.db_collection, {"_id": oka_id}) op_params = self.get_operation_params(db_content, op_id) - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "create_oka", op_id, op_params, db_content ) @@ -1735,7 +1831,7 @@ class OkaLcm(GitOpsLcm): db_content = self.db.get_one(self.db_collection, {"_id": oka_id}) op_params = self.get_operation_params(db_content, op_id) - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "update_oka", op_id, op_params, db_content ) workflow_status = await self.check_workflow_and_update_db( @@ -1758,7 +1854,7 @@ class OkaLcm(GitOpsLcm): db_content = self.db.get_one(self.db_collection, {"_id": oka_id}) op_params = self.get_operation_params(db_content, op_id) - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "delete_oka", op_id, op_params, db_content ) workflow_status = await self.check_workflow_and_update_db( @@ -1845,7 +1941,7 @@ class KsuLcm(GitOpsLcm): op_params.append(ksu_params) # A single workflow is launched for all KSUs - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "create_ksus", op_id, op_params, db_content ) # Update workflow status in all KSUs @@ -1910,7 +2006,7 @@ class KsuLcm(GitOpsLcm): ] = f"{oka_type}/{db_oka['git_name']}/templates" op_params.append(ksu_params) - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "update_ksus", op_id, op_params, db_content ) @@ -1964,7 +2060,7 @@ class KsuLcm(GitOpsLcm): ksu_params["profile"]["age_pubkey"] = db_profile.get("age_pubkey", "") op_params.append(ksu_params) - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "delete_ksus", op_id, op_params, db_content ) @@ -1993,7 +2089,7 @@ class KsuLcm(GitOpsLcm): self.initialize_operation(ksus_id, op_id) db_content = self.db.get_one(self.db_collection, {"_id": ksus_id}) op_params = self.get_operation_params(db_content, op_id) - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "clone_ksus", op_id, op_params, db_content ) @@ -2017,7 +2113,7 @@ class KsuLcm(GitOpsLcm): self.initialize_operation(ksus_id, op_id) db_content = self.db.get_one(self.db_collection, {"_id": ksus_id}) op_params = self.get_operation_params(db_content, op_id) - _, workflow_name = await self.odu.launch_workflow( + workflow_res, workflow_name = await self.odu.launch_workflow( "move_ksus", op_id, op_params, db_content ) diff --git a/osm_lcm/odu_libs/cluster_mgmt.py b/osm_lcm/odu_libs/cluster_mgmt.py index bed2781d..efca1e2e 100644 --- a/osm_lcm/odu_libs/cluster_mgmt.py +++ b/osm_lcm/odu_libs/cluster_mgmt.py @@ -51,8 +51,8 @@ async def create_cluster(self, op_id, op_params, content): # Get age key public_key_new_cluster, private_key_new_cluster = gather_age_key(db_cluster) - self.logger.debug(f"public_key_new_cluster={public_key_new_cluster}") - self.logger.debug(f"private_key_new_cluster={private_key_new_cluster}") + # self.logger.debug(f"public_key_new_cluster={public_key_new_cluster}") + # self.logger.debug(f"private_key_new_cluster={private_key_new_cluster}") # Test kubectl connection self.logger.debug(self._kubectl._get_kubectl_version()) @@ -71,7 +71,7 @@ async def create_cluster(self, op_id, op_params, content): ) except Exception as e: self.logger.info(f"Cannot create secret {secret_name}: {e}") - return False, "" + return False, f"Cannot create secret {secret_name}: {e}" # Additional params for the workflow cluster_kustomization_name = cluster_name @@ -170,7 +170,7 @@ async def update_cluster(self, op_id, op_params, content): ) except Exception as e: self.logger.info(f"Cannot create secret {secret_name}: {e}") - return False, "" + return False, f"Cannot create secret {secret_name}: {e}" # Additional params for the workflow cluster_kustomization_name = cluster_name @@ -299,7 +299,10 @@ async def register_cluster(self, op_id, op_params, content): self.logger.info( f"Cannot create secret {secret_name} in namespace {secret_namespace}: {e}" ) - return False, "" + return ( + False, + f"Cannot create secret {secret_name} in namespace {secret_namespace}: {e}", + ) # Create secret with kubeconfig secret_name2 = f"kubeconfig-{cluster_name}" @@ -319,7 +322,10 @@ async def register_cluster(self, op_id, op_params, content): self.logger.info( f"Cannot create secret {secret_name} in namespace {secret_namespace}: {e}" ) - return False, "" + return ( + False, + f"Cannot create secret {secret_name} in namespace {secret_namespace}: {e}", + ) # Additional params for the workflow cluster_kustomization_name = cluster_name diff --git a/osm_lcm/odu_workflows.py b/osm_lcm/odu_workflows.py index 9c8e6e04..e812d866 100644 --- a/osm_lcm/odu_workflows.py +++ b/osm_lcm/odu_workflows.py @@ -181,7 +181,12 @@ class OduWorkflow(LcmBase): ) workflow_function = self._workflows[key]["workflow_function"] self.logger.info("workflow function : {}".format(workflow_function)) - return await workflow_function(op_id, op_params, content) + try: + result, workflow_name = await workflow_function(op_id, op_params, content) + return result, workflow_name + except Exception as e: + self.logger.error(f"Error launching workflow: {e}") + return False, str(e) async def dummy_clean_items(self, op_id, op_params, content): self.logger.info(