Feature 10981: added autohealing DAG and updated requirements
[osm/NG-SA.git] / src / osm_ngsa / dags / alert_vdu.py
1 #######################################################################################
2 # Copyright ETSI Contributors and Others.
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 # http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13 # implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #######################################################################################
17 import asyncio
18 from datetime import datetime, timedelta
19 import logging
20 import time
21 import uuid
22
23 from airflow.decorators import dag, task
24 from airflow.operators.python import get_current_context
25 from osm_mon.core.common_db import CommonDbClient
26 from osm_mon.core.config import Config
27 from osm_mon.core.message_bus_client import MessageBusClient
28
29 # Logging
30 logger = logging.getLogger("airflow.task")
31
32
33 @dag(
34 catchup=False,
35 default_args={
36 "depends_on_past": False,
37 "retries": 1,
38 "retry_delay": timedelta(seconds=5),
39 },
40 description="Webhook callback for VDU alarm from Prometheus AlertManager",
41 is_paused_upon_creation=False,
42 schedule_interval=None,
43 start_date=datetime(2022, 1, 1),
44 tags=["osm", "webhook"],
45 )
46 def alert_vdu():
47 @task(task_id="main_task")
48 def main_task():
49 logger.debug("Running main task...")
50 context = get_current_context()
51 conf = context["dag_run"].conf
52 for alarm in conf["alerts"]:
53 logger.info("VDU alarm:")
54 status = alarm["status"]
55 logger.info(f" status: {status}")
56 logger.info(f' annotations: {alarm["annotations"]}')
57 logger.info(f' startsAt: {alarm["startsAt"]}')
58 logger.info(f' endsAt: {alarm["endsAt"]}')
59 logger.info(f' labels: {alarm["labels"]}')
60 # vdu_down alert type
61 if alarm["labels"]["alertname"] != "vdu_down":
62 continue
63 config = Config()
64 common_db = CommonDbClient(config)
65 ns_id = alarm["labels"]["ns_id"]
66 vdu_name = alarm["labels"]["vdu_name"]
67 vnf_member_index = alarm["labels"]["vnf_member_index"]
68 vm_id = alarm["labels"]["vm_id"]
69 if status == "firing":
70 # Searching alerting rule in MongoDB
71 logger.info(
72 f"Searching alert rule in MongoDB: ns_id {ns_id}, "
73 f"vnf_member_index {vnf_member_index}, "
74 f"vdu_name {vdu_name}, "
75 f"vm_id {vm_id}"
76 )
77 alert = common_db.get_alert(
78 nsr_id=ns_id, vnf_member_index=vnf_member_index, vdu_name=vdu_name
79 )
80 if alert and alert["action_type"] == "healing":
81 logger.info("Found an alert rule:")
82 logger.info(alert)
83 # Update alert status
84 common_db.update_alert_status(
85 uuid=alert["uuid"], alarm_status="alarm"
86 )
87 # Get VNFR from MongoDB
88 vnfr = common_db.get_vnfr(
89 nsr_id=ns_id, member_index=vnf_member_index
90 )
91 logger.info(
92 f"Found VNFR ns_id: {ns_id}, vnf_member_index: {vnf_member_index}"
93 )
94 count_index = None
95 for vdu in vnfr.get("vdur", []):
96 if vdu["vim-id"] == vm_id:
97 count_index = vdu["count-index"]
98 break
99 if count_index is None:
100 logger.error(f"VDU {vm_id} not found in VNFR")
101 break
102 # Auto-healing type rule
103 vnf_id = alarm["labels"]["vnf_id"]
104 msg_bus = MessageBusClient(config)
105 loop = asyncio.get_event_loop()
106 _id = str(uuid.uuid4())
107 now = time.time()
108 vdu_id = alert["action"]["vdu-id"]
109 day1 = alert["action"]["day1"]
110 projects_read = vnfr["_admin"]["projects_read"]
111 projects_write = vnfr["_admin"]["projects_write"]
112 params = {
113 "lcmOperationType": "heal",
114 "nsInstanceId": ns_id,
115 "healVnfData": [
116 {
117 "vnfInstanceId": vnf_id,
118 "cause": "default",
119 "additionalParams": {
120 "run-day1": day1,
121 "vdu": [
122 {
123 "run-day1": day1,
124 "count-index": count_index,
125 "vdu-id": vdu_id,
126 }
127 ],
128 },
129 }
130 ],
131 }
132 nslcmop = {
133 "id": _id,
134 "_id": _id,
135 "operationState": "PROCESSING",
136 "statusEnteredTime": now,
137 "nsInstanceId": ns_id,
138 "member-vnf-index": vnf_member_index,
139 "lcmOperationType": "heal",
140 "startTime": now,
141 "location": "default",
142 "isAutomaticInvocation": True,
143 "operationParams": params,
144 "isCancelPending": False,
145 "links": {
146 "self": "/osm/nslcm/v1/ns_lcm_op_occs/" + _id,
147 "nsInstance": "/osm/nslcm/v1/ns_instances/" + ns_id,
148 },
149 "_admin": {
150 "projects_read": projects_read,
151 "projects_write": projects_write,
152 },
153 }
154 common_db.create_nslcmop(nslcmop)
155 logger.info("Sending heal action message:")
156 logger.info(nslcmop)
157 loop.run_until_complete(msg_bus.aiowrite("ns", "heal", nslcmop))
158 else:
159 logger.info("No alert rule was found")
160 elif status == "resolved":
161 # Searching alerting rule in MongoDB
162 logger.info(
163 f"Searching alert rule in MongoDB: ns_id {ns_id}, "
164 f"vnf_member_index {vnf_member_index}, "
165 f"vdu_name {vdu_name}, "
166 f"vm_id {vm_id}"
167 )
168 alert = common_db.get_alert(
169 nsr_id=ns_id, vnf_member_index=vnf_member_index, vdu_name=vdu_name
170 )
171 if alert:
172 logger.info("Found an alert rule, updating status")
173 # Update alert status
174 common_db.update_alert_status(uuid=alert["uuid"], alarm_status="ok")
175
176 main_task()
177
178
179 dag = alert_vdu()