c3148935c9813a6ebbe9fd5177fc3fe11aa79429
[osm/NG-SA.git] / src / osm_ngsa / dags / alert_vdu.py
1 #######################################################################################
2 # Copyright ETSI Contributors and Others.
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 # http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13 # implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #######################################################################################
17 import asyncio
18 from datetime import datetime, timedelta
19 import logging
20 import time
21 import uuid
22
23 from airflow.decorators import dag, task
24 from airflow.operators.python import get_current_context
25 from osm_mon.core.common_db import CommonDbClient
26 from osm_mon.core.config import Config
27 from osm_mon.core.message_bus_client import MessageBusClient
28
29 # Logging
30 logger = logging.getLogger("airflow.task")
31
32
33 @dag(
34 catchup=False,
35 default_args={
36 "depends_on_past": False,
37 "retries": 1,
38 "retry_delay": timedelta(seconds=5),
39 },
40 description="Webhook callback for VDU alarm from Prometheus AlertManager",
41 is_paused_upon_creation=False,
42 schedule_interval=None,
43 start_date=datetime(2022, 1, 1),
44 tags=["osm", "webhook"],
45 )
46 def alert_vdu():
47 @task(task_id="main_task")
48 def main_task():
49 logger.debug("Running main task...")
50 context = get_current_context()
51 conf = context["dag_run"].conf
52 for alarm in conf["alerts"]:
53 logger.info("VDU alarm:")
54 status = alarm["status"]
55 logger.info(f" status: {status}")
56 logger.info(f' annotations: {alarm["annotations"]}')
57 logger.info(f' startsAt: {alarm["startsAt"]}')
58 logger.info(f' endsAt: {alarm["endsAt"]}')
59 logger.info(f' labels: {alarm["labels"]}')
60 # vdu_down alert type
61 if alarm["labels"]["alertname"] != "vdu_down":
62 continue
63 config = Config()
64 common_db = CommonDbClient(config)
65 ns_id = alarm["labels"]["ns_id"]
66 vdu_name = alarm["labels"]["vdu_name"]
67 vnf_member_index = alarm["labels"]["vnf_member_index"]
68 vm_id = alarm["labels"]["vm_id"]
69 if status == "firing":
70 # Searching alerting rule in MongoDB
71 logger.info(
72 f"Searching healing alert rule in MongoDB: ns_id {ns_id}, "
73 f"vnf_member_index {vnf_member_index}, "
74 f"vdu_name {vdu_name}, "
75 f"vm_id {vm_id}"
76 )
77 alert = common_db.get_alert(
78 nsr_id=ns_id,
79 vnf_member_index=vnf_member_index,
80 vdu_id=None,
81 vdu_name=vdu_name,
82 action_type="healing",
83 )
84 if alert:
85 logger.info("Found an alert rule:")
86 logger.info(alert)
87 # Update alert status
88 common_db.update_alert_status(
89 uuid=alert["uuid"], alarm_status="alarm"
90 )
91 # Get VNFR from MongoDB
92 vnfr = common_db.get_vnfr(
93 nsr_id=ns_id, member_index=vnf_member_index
94 )
95 logger.info(
96 f"Found VNFR ns_id: {ns_id}, vnf_member_index: {vnf_member_index}"
97 )
98 count_index = None
99 for vdu in vnfr.get("vdur", []):
100 if vdu["vim-id"] == vm_id:
101 count_index = vdu["count-index"]
102 break
103 if count_index is None:
104 logger.error(f"VDU {vm_id} not found in VNFR")
105 break
106 # Auto-healing type rule
107 vnf_id = alarm["labels"]["vnf_id"]
108 msg_bus = MessageBusClient(config)
109 loop = asyncio.get_event_loop()
110 _id = str(uuid.uuid4())
111 now = time.time()
112 vdu_id = alert["action"]["vdu-id"]
113 day1 = alert["action"]["day1"]
114 projects_read = vnfr["_admin"]["projects_read"]
115 projects_write = vnfr["_admin"]["projects_write"]
116 params = {
117 "lcmOperationType": "heal",
118 "nsInstanceId": ns_id,
119 "healVnfData": [
120 {
121 "vnfInstanceId": vnf_id,
122 "cause": "default",
123 "additionalParams": {
124 "run-day1": day1,
125 "vdu": [
126 {
127 "run-day1": day1,
128 "count-index": count_index,
129 "vdu-id": vdu_id,
130 }
131 ],
132 },
133 }
134 ],
135 }
136 nslcmop = {
137 "id": _id,
138 "_id": _id,
139 "operationState": "PROCESSING",
140 "statusEnteredTime": now,
141 "nsInstanceId": ns_id,
142 "member-vnf-index": vnf_member_index,
143 "lcmOperationType": "heal",
144 "startTime": now,
145 "location": "default",
146 "isAutomaticInvocation": True,
147 "operationParams": params,
148 "isCancelPending": False,
149 "links": {
150 "self": "/osm/nslcm/v1/ns_lcm_op_occs/" + _id,
151 "nsInstance": "/osm/nslcm/v1/ns_instances/" + ns_id,
152 },
153 "_admin": {
154 "projects_read": projects_read,
155 "projects_write": projects_write,
156 },
157 }
158 common_db.create_nslcmop(nslcmop)
159 logger.info("Sending heal action message:")
160 logger.info(nslcmop)
161 loop.run_until_complete(msg_bus.aiowrite("ns", "heal", nslcmop))
162 else:
163 logger.info("No alert rule was found")
164 elif status == "resolved":
165 # Searching alerting rule in MongoDB
166 logger.info(
167 f"Searching alert rule in MongoDB: ns_id {ns_id}, "
168 f"vnf_member_index {vnf_member_index}, "
169 f"vdu_name {vdu_name}, "
170 f"vm_id {vm_id}"
171 )
172 alert = common_db.get_alert(
173 nsr_id=ns_id,
174 vnf_member_index=vnf_member_index,
175 vdu_id=None,
176 vdu_name=vdu_name,
177 action_type="healing",
178 )
179 if alert:
180 logger.info("Found an alert rule, updating status")
181 # Update alert status
182 common_db.update_alert_status(uuid=alert["uuid"], alarm_status="ok")
183
184 main_task()
185
186
187 dag = alert_vdu()