Feature 10981: installation of AlertManager as part of NG-SA 04/13104/5
authorgarciadeblas <gerardo.garciadeblas@telefonica.com>
Tue, 28 Mar 2023 16:27:20 +0000 (18:27 +0200)
committergarciadeblas <gerardo.garciadeblas@telefonica.com>
Tue, 11 Apr 2023 07:35:40 +0000 (09:35 +0200)
Change-Id: I99bb5785081df4395be336f323d5d4ac3dfd68b6
Signed-off-by: garciadeblas <gerardo.garciadeblas@telefonica.com>
installers/docker/osm_pods/ng-prometheus.yaml
installers/helm/values/airflow-values.yaml
installers/helm/values/alertmanager-values.yaml [new file with mode: 0644]
installers/install_ngsa.sh

index 0172aaf..77ccbd1 100644 (file)
@@ -30,10 +30,13 @@ spec:
   type: NodePort
 ---
 apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: prom
 data:
-  osm_rules.yml: |
+  osm_metric_rules.yml: |
     groups:
-      - name: osm_rules
+      - name: osm_metric_rules
         rules:
         - record: vm_status_extended
           expr: (last_over_time(vm_status[1m]) * on (vm_id, vim_id) group_left(ns_id, vnf_id, vdu_id, project_id, job, vdu_name, vnf_member_index) last_over_time(ns_topology[1m])) or (last_over_time(ns_topology[1m]) * -1)
@@ -47,6 +50,16 @@ data:
           expr: (0 * (count (vm_status_extended==0) by (ns_id)>=0)) or (min by (ns_id) (vm_status_extended))
           labels:
             job: osm_prometheus
+  osm_alert_rules.yml: |
+    groups:
+      - name: osm_alert_rules
+        rules:
+        - alert: vdu_down
+          expr: vm_status_extended != 1
+          for: 3m
+          annotations:
+            summary: "VDU {{ $labels.vm_id }} in VIM {{ $labels.vim_id }} is down"
+            description: "VDU {{ $labels.vm_id }} in VIM {{ $labels.vim_id }} has been down for more than 3 minutes. NS instance id is {{ $labels.ns_id }}"
   prometheus.yml: |
     # Copyright 2018 The Prometheus Authors
     # Copyright 2018 Whitestack
@@ -75,12 +88,12 @@ data:
       alertmanagers:
       - static_configs:
         - targets:
-          - alertmanager:9093
+          - alertmanager:9093
 
     # Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
     rule_files:
-      - "osm_rules.yml"
-      # - "second_rules.yml"
+      - "osm_metric_rules.yml"
+      - "osm_alert_rules.yml"
 
     # A scrape configuration containing exactly one endpoint to scrape:
     # Here it's Prometheus itself.
@@ -94,9 +107,6 @@ data:
         static_configs:
         - targets:
           - pushgateway-prometheus-pushgateway:9091
-kind: ConfigMap
-metadata:
-  name: prom
 ---
 apiVersion: apps/v1
 kind: StatefulSet
@@ -119,7 +129,7 @@ spec:
       - name: prometheus-init-config
         image: busybox
         command: ["/bin/sh", "-c"]
-        args: ['if [ ! -f "/etc/prometheus/prometheus.yml" ]; then cp /config/prometheus.yml /etc/prometheus; fi; cp /config/osm_rules.yml /etc/prometheus']
+        args: ['if [ ! -f "/etc/prometheus/prometheus.yml" ]; then cp /config/prometheus.yml /etc/prometheus; fi; cp /config/osm_metric_rules.yml /config/osm_alert_rules.yml /etc/prometheus']
         volumeMounts:
           - name: prom-config
             mountPath: /etc/prometheus
index 0a62abf..9bde121 100644 (file)
@@ -1,19 +1,16 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
 #
-#   http://www.apache.org/licenses/LICENSE-2.0
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
 #
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
 ---
 defaultAirflowRepository: opensourcemano/airflow
 defaultAirflowTag: "13"
diff --git a/installers/helm/values/alertmanager-values.yaml b/installers/helm/values/alertmanager-values.yaml
new file mode 100644 (file)
index 0000000..0f43f93
--- /dev/null
@@ -0,0 +1,35 @@
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+#
+---
+extraArgs:
+  log.level: debug
+service:
+  type: NodePort
+  nodePort: 9093
+  port: 9093
+config:
+  receivers:
+    - name: default-receiver
+    - name: vdu-webhook
+      webhook_configs:
+       - url: http://webhook-translator/alert_vdu
+  route:
+    group_wait: 10s
+    group_interval: 2m
+    receiver: default-receiver
+    repeat_interval: 3h
+    routes:
+    - receiver: vdu-webhook
+      matchers:
+      - alertname = "vdu_down"
index 5d7ad68..b90c3dc 100755 (executable)
@@ -18,6 +18,7 @@ set +eux
 # Helm chart 1.6.0 correspondes to Airflow 2.3.0
 AIRFLOW_HELM_VERSION=1.6.0
 PROMPUSHGW_HELM_VERSION=1.18.2
+ALERTMANAGER_HELM_VERSION=0.22.0
 
 # Install Airflow helm chart
 function install_airflow() {
@@ -58,6 +59,22 @@ function install_prometheus_pushgateway() {
     [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function
 }
 
+# Install Prometheus AlertManager helm chart
+function install_prometheus_alertmanager() {
+    [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function
+    if ! helm -n osm status alertmanager 2> /dev/null ; then
+        # if it does not exist, install
+        helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+        helm repo update
+        helm -n osm install alertmanager prometheus-community/alertmanager -f ${OSM_HELM_WORK_DIR}/alertmanager-values.yaml --version ${ALERTMANAGER_HELM_VERSION}
+    else
+        # if it exists, upgrade
+        helm repo update
+        helm -n osm upgrade alertmanager prometheus-community/alertmanager -f ${OSM_HELM_WORK_DIR}/alertmanager-values.yaml --version ${ALERTMANAGER_HELM_VERSION}
+    fi
+    [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function
+}
+
 # main
 
 OSM_DEVOPS="/usr/share/osm-devops"
@@ -106,4 +123,6 @@ install_airflow
 track deploy_osm airflow_ok
 install_prometheus_pushgateway
 track deploy_osm pushgateway_ok
+install_prometheus_alertmanager
+track deploy_osm alertmanager_ok