Improve logging in cluster operations in case of issues in ODU workflow
Change-Id: I09fc87cf33889fde4b4a94b8f530291888218813
Signed-off-by: garciadeblas <gerardo.garciadeblas@telefonica.com>
diff --git a/osm_lcm/k8s.py b/osm_lcm/k8s.py
index fd3667d..87b7160 100644
--- a/osm_lcm/k8s.py
+++ b/osm_lcm/k8s.py
@@ -332,11 +332,27 @@
db_vim = self.db.get_one("vim_accounts", {"name": db_cluster["vim_account"]})
workflow_content["vim_account"] = db_vim
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"create_cluster", op_id, op_params, workflow_content
)
- self.logger.info("workflow_name is :{}".format(workflow_name))
+ if not workflow_res:
+ self.logger.error(f"Failed to launch workflow: {workflow_name}")
+ db_cluster["state"] = "FAILED_CREATION"
+ db_cluster["resourceState"] = "ERROR"
+ db_cluster = self.update_operation_history(
+ db_cluster, op_id, workflow_status=False, resource_status=None
+ )
+ self.db.set_one("clusters", {"_id": db_cluster["_id"]}, db_cluster)
+ # Clean items used in the workflow, no matter if the workflow succeeded
+ clean_status, clean_msg = await self.odu.clean_items_workflow(
+ "create_cluster", op_id, op_params, workflow_content
+ )
+ self.logger.info(
+ f"clean_status is :{clean_status} and clean_msg is :{clean_msg}"
+ )
+ return
+ self.logger.info("workflow_name is :{}".format(workflow_name))
workflow_status, workflow_msg = await self.odu.check_workflow_status(
workflow_name
)
@@ -564,11 +580,27 @@
if db_cluster["created"] == "false":
return await self.deregister(params, order_id)
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"delete_cluster", op_id, op_params, workflow_content
)
- self.logger.info("workflow_name is :{}".format(workflow_name))
+ if not workflow_res:
+ self.logger.error(f"Failed to launch workflow: {workflow_name}")
+ db_cluster["state"] = "FAILED_DELETION"
+ db_cluster["resourceState"] = "ERROR"
+ db_cluster = self.update_operation_history(
+ db_cluster, op_id, workflow_status=False, resource_status=None
+ )
+ self.db.set_one("clusters", {"_id": db_cluster["_id"]}, db_cluster)
+ # Clean items used in the workflow, no matter if the workflow succeeded
+ clean_status, clean_msg = await self.odu.clean_items_workflow(
+ "delete_cluster", op_id, op_params, workflow_content
+ )
+ self.logger.info(
+ f"clean_status is :{clean_status} and clean_msg is :{clean_msg}"
+ )
+ return
+ self.logger.info("workflow_name is :{}".format(workflow_name))
workflow_status, workflow_msg = await self.odu.check_workflow_status(
workflow_name
)
@@ -742,11 +774,19 @@
# content["profile"] = db_profile
workflow_content["profile"] = db_profile
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"attach_profile_to_cluster", op_id, op_params, workflow_content
)
- self.logger.info("workflow_name is :{}".format(workflow_name))
+ if not workflow_res:
+ self.logger.error(f"Failed to launch workflow: {workflow_name}")
+ db_cluster["resourceState"] = "ERROR"
+ self.db.set_one("clusters", {"_id": db_cluster["_id"]}, db_cluster)
+ db_cluster = self.update_operation_history(
+ db_cluster, op_id, workflow_status=False, resource_status=None
+ )
+ return
+ self.logger.info("workflow_name is :{}".format(workflow_name))
workflow_status, workflow_msg = await self.odu.check_workflow_status(
workflow_name
)
@@ -822,11 +862,19 @@
# content["profile"] = db_profile
workflow_content["profile"] = db_profile
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"detach_profile_from_cluster", op_id, op_params, workflow_content
)
- self.logger.info("workflow_name is :{}".format(workflow_name))
+ if not workflow_res:
+ self.logger.error(f"Failed to launch workflow: {workflow_name}")
+ db_cluster["resourceState"] = "ERROR"
+ db_cluster = self.update_operation_history(
+ db_cluster, op_id, workflow_status=False, resource_status=None
+ )
+ self.db.set_one("clusters", {"_id": db_cluster["_id"]}, db_cluster)
+ return
+ self.logger.info("workflow_name is :{}".format(workflow_name))
workflow_status, workflow_msg = await self.odu.check_workflow_status(
workflow_name
)
@@ -894,11 +942,27 @@
"cluster": self.decrypted_copy(db_cluster),
}
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"register_cluster", op_id, op_params, workflow_content
)
- self.logger.info("workflow_name is :{}".format(workflow_name))
+ if not workflow_res:
+ self.logger.error(f"Failed to launch workflow: {workflow_name}")
+ db_cluster["state"] = "FAILED_CREATION"
+ db_cluster["resourceState"] = "ERROR"
+ db_cluster = self.update_operation_history(
+ db_cluster, op_id, workflow_status=False, resource_status=None
+ )
+ self.db.set_one("clusters", {"_id": db_cluster["_id"]}, db_cluster)
+ # Clean items used in the workflow, no matter if the workflow succeeded
+ clean_status, clean_msg = await self.odu.clean_items_workflow(
+ "register_cluster", op_id, op_params, workflow_content
+ )
+ self.logger.info(
+ f"clean_status is :{clean_status} and clean_msg is :{clean_msg}"
+ )
+ return
+ self.logger.info("workflow_name is :{}".format(workflow_name))
workflow_status, workflow_msg = await self.odu.check_workflow_status(
workflow_name
)
@@ -1010,11 +1074,27 @@
"cluster": self.decrypted_copy(db_cluster),
}
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"deregister_cluster", op_id, op_params, workflow_content
)
- self.logger.info("workflow_name is :{}".format(workflow_name))
+ if not workflow_res:
+ self.logger.error(f"Failed to launch workflow: {workflow_name}")
+ db_cluster["state"] = "FAILED_DELETION"
+ db_cluster["resourceState"] = "ERROR"
+ db_cluster = self.update_operation_history(
+ db_cluster, op_id, workflow_status=False, resource_status=None
+ )
+ self.db.set_one("clusters", {"_id": db_cluster["_id"]}, db_cluster)
+ # Clean items used in the workflow, no matter if the workflow succeeded
+ clean_status, clean_msg = await self.odu.clean_items_workflow(
+ "deregister_cluster", op_id, op_params, workflow_content
+ )
+ self.logger.info(
+ f"clean_status is :{clean_status} and clean_msg is :{clean_msg}"
+ )
+ return
+ self.logger.info("workflow_name is :{}".format(workflow_name))
workflow_status, workflow_msg = await self.odu.check_workflow_status(
workflow_name
)
@@ -1104,9 +1184,25 @@
db_vim = self.db.get_one("vim_accounts", {"name": db_cluster["vim_account"]})
workflow_content["vim_account"] = db_vim
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"update_cluster", op_id, op_params, workflow_content
)
+ if not workflow_res:
+ self.logger.error(f"Failed to launch workflow: {workflow_name}")
+ db_cluster["resourceState"] = "ERROR"
+ db_cluster = self.update_operation_history(
+ db_cluster, op_id, workflow_status=False, resource_status=None
+ )
+ self.db.set_one("clusters", {"_id": db_cluster["_id"]}, db_cluster)
+ # Clean items used in the workflow, no matter if the workflow succeeded
+ clean_status, clean_msg = await self.odu.clean_items_workflow(
+ "update_cluster", op_id, op_params, workflow_content
+ )
+ self.logger.info(
+ f"clean_status is :{clean_status} and clean_msg is :{clean_msg}"
+ )
+ return
+ self.logger.info("workflow_name is :{}".format(workflow_name))
workflow_status, workflow_msg = await self.odu.check_workflow_status(
workflow_name
)
@@ -1263,7 +1359,7 @@
salt=vim_id,
)
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"create_cloud_credentials", op_id, op_params, db_content
)
@@ -1316,7 +1412,7 @@
salt=vim_id,
)
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"update_cloud_credentials", op_id, op_params, db_content
)
workflow_status, workflow_msg = await self.odu.check_workflow_status(
@@ -1352,7 +1448,7 @@
op_params = params
db_content = self.db.get_one("vim_accounts", {"_id": vim_id})
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"delete_cloud_credentials", op_id, op_params, db_content
)
workflow_status, workflow_msg = await self.odu.check_workflow_status(
@@ -1400,7 +1496,7 @@
op_params = self.get_operation_params(content, op_id)
self.db.set_one("k8sapp", {"_id": content["_id"]}, content)
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"create_profile", op_id, op_params, content
)
self.logger.info("workflow_name is :{}".format(workflow_name))
@@ -1429,7 +1525,7 @@
content = self.db.get_one("k8sapp", {"_id": profile_id})
op_params = self.get_operation_params(content, op_id)
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"delete_profile", op_id, op_params, content
)
self.logger.info("workflow_name is :{}".format(workflow_name))
@@ -1476,7 +1572,7 @@
op_params = self.get_operation_params(content, op_id)
self.db.set_one("k8sresource", {"_id": content["_id"]}, content)
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"create_profile", op_id, op_params, content
)
self.logger.info("workflow_name is :{}".format(workflow_name))
@@ -1507,7 +1603,7 @@
content = self.db.get_one("k8sresource", {"_id": profile_id})
op_params = self.get_operation_params(content, op_id)
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"delete_profile", op_id, op_params, content
)
self.logger.info("workflow_name is :{}".format(workflow_name))
@@ -1556,7 +1652,7 @@
op_params = self.get_operation_params(content, op_id)
self.db.set_one("k8sinfra_controller", {"_id": content["_id"]}, content)
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"create_profile", op_id, op_params, content
)
self.logger.info("workflow_name is :{}".format(workflow_name))
@@ -1587,7 +1683,7 @@
content = self.db.get_one("k8sinfra_controller", {"_id": profile_id})
op_params = self.get_operation_params(content, op_id)
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"delete_profile", op_id, op_params, content
)
self.logger.info("workflow_name is :{}".format(workflow_name))
@@ -1636,7 +1732,7 @@
op_params = self.get_operation_params(content, op_id)
self.db.set_one("k8sinfra_config", {"_id": content["_id"]}, content)
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"create_profile", op_id, op_params, content
)
self.logger.info("workflow_name is :{}".format(workflow_name))
@@ -1667,7 +1763,7 @@
content = self.db.get_one("k8sinfra_config", {"_id": profile_id})
op_params = self.get_operation_params(content, op_id)
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"delete_profile", op_id, op_params, content
)
self.logger.info("workflow_name is :{}".format(workflow_name))
@@ -1711,7 +1807,7 @@
db_content = self.db.get_one(self.db_collection, {"_id": oka_id})
op_params = self.get_operation_params(db_content, op_id)
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"create_oka", op_id, op_params, db_content
)
@@ -1735,7 +1831,7 @@
db_content = self.db.get_one(self.db_collection, {"_id": oka_id})
op_params = self.get_operation_params(db_content, op_id)
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"update_oka", op_id, op_params, db_content
)
workflow_status = await self.check_workflow_and_update_db(
@@ -1758,7 +1854,7 @@
db_content = self.db.get_one(self.db_collection, {"_id": oka_id})
op_params = self.get_operation_params(db_content, op_id)
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"delete_oka", op_id, op_params, db_content
)
workflow_status = await self.check_workflow_and_update_db(
@@ -1845,7 +1941,7 @@
op_params.append(ksu_params)
# A single workflow is launched for all KSUs
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"create_ksus", op_id, op_params, db_content
)
# Update workflow status in all KSUs
@@ -1910,7 +2006,7 @@
] = f"{oka_type}/{db_oka['git_name']}/templates"
op_params.append(ksu_params)
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"update_ksus", op_id, op_params, db_content
)
@@ -1964,7 +2060,7 @@
ksu_params["profile"]["age_pubkey"] = db_profile.get("age_pubkey", "")
op_params.append(ksu_params)
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"delete_ksus", op_id, op_params, db_content
)
@@ -1993,7 +2089,7 @@
self.initialize_operation(ksus_id, op_id)
db_content = self.db.get_one(self.db_collection, {"_id": ksus_id})
op_params = self.get_operation_params(db_content, op_id)
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"clone_ksus", op_id, op_params, db_content
)
@@ -2017,7 +2113,7 @@
self.initialize_operation(ksus_id, op_id)
db_content = self.db.get_one(self.db_collection, {"_id": ksus_id})
op_params = self.get_operation_params(db_content, op_id)
- _, workflow_name = await self.odu.launch_workflow(
+ workflow_res, workflow_name = await self.odu.launch_workflow(
"move_ksus", op_id, op_params, db_content
)
diff --git a/osm_lcm/odu_libs/cluster_mgmt.py b/osm_lcm/odu_libs/cluster_mgmt.py
index bed2781..efca1e2 100644
--- a/osm_lcm/odu_libs/cluster_mgmt.py
+++ b/osm_lcm/odu_libs/cluster_mgmt.py
@@ -51,8 +51,8 @@
# Get age key
public_key_new_cluster, private_key_new_cluster = gather_age_key(db_cluster)
- self.logger.debug(f"public_key_new_cluster={public_key_new_cluster}")
- self.logger.debug(f"private_key_new_cluster={private_key_new_cluster}")
+ # self.logger.debug(f"public_key_new_cluster={public_key_new_cluster}")
+ # self.logger.debug(f"private_key_new_cluster={private_key_new_cluster}")
# Test kubectl connection
self.logger.debug(self._kubectl._get_kubectl_version())
@@ -71,7 +71,7 @@
)
except Exception as e:
self.logger.info(f"Cannot create secret {secret_name}: {e}")
- return False, ""
+ return False, f"Cannot create secret {secret_name}: {e}"
# Additional params for the workflow
cluster_kustomization_name = cluster_name
@@ -170,7 +170,7 @@
)
except Exception as e:
self.logger.info(f"Cannot create secret {secret_name}: {e}")
- return False, ""
+ return False, f"Cannot create secret {secret_name}: {e}"
# Additional params for the workflow
cluster_kustomization_name = cluster_name
@@ -299,7 +299,10 @@
self.logger.info(
f"Cannot create secret {secret_name} in namespace {secret_namespace}: {e}"
)
- return False, ""
+ return (
+ False,
+ f"Cannot create secret {secret_name} in namespace {secret_namespace}: {e}",
+ )
# Create secret with kubeconfig
secret_name2 = f"kubeconfig-{cluster_name}"
@@ -319,7 +322,10 @@
self.logger.info(
f"Cannot create secret {secret_name} in namespace {secret_namespace}: {e}"
)
- return False, ""
+ return (
+ False,
+ f"Cannot create secret {secret_name} in namespace {secret_namespace}: {e}",
+ )
# Additional params for the workflow
cluster_kustomization_name = cluster_name
diff --git a/osm_lcm/odu_workflows.py b/osm_lcm/odu_workflows.py
index 9c8e6e0..e812d86 100644
--- a/osm_lcm/odu_workflows.py
+++ b/osm_lcm/odu_workflows.py
@@ -181,7 +181,12 @@
)
workflow_function = self._workflows[key]["workflow_function"]
self.logger.info("workflow function : {}".format(workflow_function))
- return await workflow_function(op_id, op_params, content)
+ try:
+ result, workflow_name = await workflow_function(op_id, op_params, content)
+ return result, workflow_name
+ except Exception as e:
+ self.logger.error(f"Error launching workflow: {e}")
+ return False, str(e)
async def dummy_clean_items(self, op_id, op_params, content):
self.logger.info(