From bae51f695443abcd0e410a92161af05b497ddc7a Mon Sep 17 00:00:00 2001 From: garciadeblas Date: Tue, 28 Mar 2023 18:27:20 +0200 Subject: [PATCH] Feature 10981: installation of AlertManager as part of NG-SA Change-Id: I99bb5785081df4395be336f323d5d4ac3dfd68b6 Signed-off-by: garciadeblas --- installers/docker/osm_pods/ng-prometheus.yaml | 28 ++++++++++----- installers/helm/values/airflow-values.yaml | 25 ++++++------- .../helm/values/alertmanager-values.yaml | 35 +++++++++++++++++++ installers/install_ngsa.sh | 19 ++++++++++ 4 files changed, 84 insertions(+), 23 deletions(-) create mode 100644 installers/helm/values/alertmanager-values.yaml diff --git a/installers/docker/osm_pods/ng-prometheus.yaml b/installers/docker/osm_pods/ng-prometheus.yaml index 0172aafd..77ccbd1b 100644 --- a/installers/docker/osm_pods/ng-prometheus.yaml +++ b/installers/docker/osm_pods/ng-prometheus.yaml @@ -30,10 +30,13 @@ spec: type: NodePort --- apiVersion: v1 +kind: ConfigMap +metadata: + name: prom data: - osm_rules.yml: | + osm_metric_rules.yml: | groups: - - name: osm_rules + - name: osm_metric_rules rules: - record: vm_status_extended expr: (last_over_time(vm_status[1m]) * on (vm_id, vim_id) group_left(ns_id, vnf_id, vdu_id, project_id, job, vdu_name, vnf_member_index) last_over_time(ns_topology[1m])) or (last_over_time(ns_topology[1m]) * -1) @@ -47,6 +50,16 @@ data: expr: (0 * (count (vm_status_extended==0) by (ns_id)>=0)) or (min by (ns_id) (vm_status_extended)) labels: job: osm_prometheus + osm_alert_rules.yml: | + groups: + - name: osm_alert_rules + rules: + - alert: vdu_down + expr: vm_status_extended != 1 + for: 3m + annotations: + summary: "VDU {{ $labels.vm_id }} in VIM {{ $labels.vim_id }} is down" + description: "VDU {{ $labels.vm_id }} in VIM {{ $labels.vim_id }} has been down for more than 3 minutes. NS instance id is {{ $labels.ns_id }}" prometheus.yml: | # Copyright 2018 The Prometheus Authors # Copyright 2018 Whitestack @@ -75,12 +88,12 @@ data: alertmanagers: - static_configs: - targets: - # - alertmanager:9093 + - alertmanager:9093 # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: - - "osm_rules.yml" - # - "second_rules.yml" + - "osm_metric_rules.yml" + - "osm_alert_rules.yml" # A scrape configuration containing exactly one endpoint to scrape: # Here it's Prometheus itself. @@ -94,9 +107,6 @@ data: static_configs: - targets: - pushgateway-prometheus-pushgateway:9091 -kind: ConfigMap -metadata: - name: prom --- apiVersion: apps/v1 kind: StatefulSet @@ -119,7 +129,7 @@ spec: - name: prometheus-init-config image: busybox command: ["/bin/sh", "-c"] - args: ['if [ ! -f "/etc/prometheus/prometheus.yml" ]; then cp /config/prometheus.yml /etc/prometheus; fi; cp /config/osm_rules.yml /etc/prometheus'] + args: ['if [ ! -f "/etc/prometheus/prometheus.yml" ]; then cp /config/prometheus.yml /etc/prometheus; fi; cp /config/osm_metric_rules.yml /config/osm_alert_rules.yml /etc/prometheus'] volumeMounts: - name: prom-config mountPath: /etc/prometheus diff --git a/installers/helm/values/airflow-values.yaml b/installers/helm/values/airflow-values.yaml index 0a62abfe..9bde1214 100644 --- a/installers/helm/values/airflow-values.yaml +++ b/installers/helm/values/airflow-values.yaml @@ -1,19 +1,16 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. --- defaultAirflowRepository: opensourcemano/airflow defaultAirflowTag: "13" diff --git a/installers/helm/values/alertmanager-values.yaml b/installers/helm/values/alertmanager-values.yaml new file mode 100644 index 00000000..0f43f93a --- /dev/null +++ b/installers/helm/values/alertmanager-values.yaml @@ -0,0 +1,35 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +--- +extraArgs: + log.level: debug +service: + type: NodePort + nodePort: 9093 + port: 9093 +config: + receivers: + - name: default-receiver + - name: vdu-webhook + webhook_configs: + - url: http://webhook-translator/alert_vdu + route: + group_wait: 10s + group_interval: 2m + receiver: default-receiver + repeat_interval: 3h + routes: + - receiver: vdu-webhook + matchers: + - alertname = "vdu_down" diff --git a/installers/install_ngsa.sh b/installers/install_ngsa.sh index 5d7ad683..b90c3dc0 100755 --- a/installers/install_ngsa.sh +++ b/installers/install_ngsa.sh @@ -18,6 +18,7 @@ set +eux # Helm chart 1.6.0 correspondes to Airflow 2.3.0 AIRFLOW_HELM_VERSION=1.6.0 PROMPUSHGW_HELM_VERSION=1.18.2 +ALERTMANAGER_HELM_VERSION=0.22.0 # Install Airflow helm chart function install_airflow() { @@ -58,6 +59,22 @@ function install_prometheus_pushgateway() { [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function } +# Install Prometheus AlertManager helm chart +function install_prometheus_alertmanager() { + [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function + if ! helm -n osm status alertmanager 2> /dev/null ; then + # if it does not exist, install + helm repo add prometheus-community https://prometheus-community.github.io/helm-charts + helm repo update + helm -n osm install alertmanager prometheus-community/alertmanager -f ${OSM_HELM_WORK_DIR}/alertmanager-values.yaml --version ${ALERTMANAGER_HELM_VERSION} + else + # if it exists, upgrade + helm repo update + helm -n osm upgrade alertmanager prometheus-community/alertmanager -f ${OSM_HELM_WORK_DIR}/alertmanager-values.yaml --version ${ALERTMANAGER_HELM_VERSION} + fi + [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function +} + # main OSM_DEVOPS="/usr/share/osm-devops" @@ -106,4 +123,6 @@ install_airflow track deploy_osm airflow_ok install_prometheus_pushgateway track deploy_osm pushgateway_ok +install_prometheus_alertmanager +track deploy_osm alertmanager_ok -- 2.25.1