type: NodePort
---
apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: prom
data:
- osm_rules.yml: |
+ osm_metric_rules.yml: |
groups:
- - name: osm_rules
+ - name: osm_metric_rules
rules:
- record: vm_status_extended
expr: (last_over_time(vm_status[1m]) * on (vm_id, vim_id) group_left(ns_id, vnf_id, vdu_id, project_id, job, vdu_name, vnf_member_index) last_over_time(ns_topology[1m])) or (last_over_time(ns_topology[1m]) * -1)
expr: (0 * (count (vm_status_extended==0) by (ns_id)>=0)) or (min by (ns_id) (vm_status_extended))
labels:
job: osm_prometheus
+ osm_alert_rules.yml: |
+ groups:
+ - name: osm_alert_rules
+ rules:
+ - alert: vdu_down
+ expr: vm_status_extended != 1
+ for: 3m
+ annotations:
+ summary: "VDU {{ $labels.vm_id }} in VIM {{ $labels.vim_id }} is down"
+ description: "VDU {{ $labels.vm_id }} in VIM {{ $labels.vim_id }} has been down for more than 3 minutes. NS instance id is {{ $labels.ns_id }}"
prometheus.yml: |
# Copyright 2018 The Prometheus Authors
# Copyright 2018 Whitestack
alertmanagers:
- static_configs:
- targets:
- # - alertmanager:9093
+ - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- - "osm_rules.yml"
- # - "second_rules.yml"
+ - "osm_metric_rules.yml"
+ - "osm_alert_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
static_configs:
- targets:
- pushgateway-prometheus-pushgateway:9091
-kind: ConfigMap
-metadata:
- name: prom
---
apiVersion: apps/v1
kind: StatefulSet
- name: prometheus-init-config
image: busybox
command: ["/bin/sh", "-c"]
- args: ['if [ ! -f "/etc/prometheus/prometheus.yml" ]; then cp /config/prometheus.yml /etc/prometheus; fi; cp /config/osm_rules.yml /etc/prometheus']
+ args: ['if [ ! -f "/etc/prometheus/prometheus.yml" ]; then cp /config/prometheus.yml /etc/prometheus; fi; cp /config/osm_metric_rules.yml /config/osm_alert_rules.yml /etc/prometheus']
volumeMounts:
- name: prom-config
mountPath: /etc/prometheus
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
---
defaultAirflowRepository: opensourcemano/airflow
defaultAirflowTag: "13"
--- /dev/null
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+---
+extraArgs:
+ log.level: debug
+service:
+ type: NodePort
+ nodePort: 9093
+ port: 9093
+config:
+ receivers:
+ - name: default-receiver
+ - name: vdu-webhook
+ webhook_configs:
+ - url: http://webhook-translator/alert_vdu
+ route:
+ group_wait: 10s
+ group_interval: 2m
+ receiver: default-receiver
+ repeat_interval: 3h
+ routes:
+ - receiver: vdu-webhook
+ matchers:
+ - alertname = "vdu_down"
# Helm chart 1.6.0 correspondes to Airflow 2.3.0
AIRFLOW_HELM_VERSION=1.6.0
PROMPUSHGW_HELM_VERSION=1.18.2
+ALERTMANAGER_HELM_VERSION=0.22.0
# Install Airflow helm chart
function install_airflow() {
[ -z "${DEBUG_INSTALL}" ] || DEBUG end of function
}
+# Install Prometheus AlertManager helm chart
+function install_prometheus_alertmanager() {
+ [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function
+ if ! helm -n osm status alertmanager 2> /dev/null ; then
+ # if it does not exist, install
+ helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+ helm repo update
+ helm -n osm install alertmanager prometheus-community/alertmanager -f ${OSM_HELM_WORK_DIR}/alertmanager-values.yaml --version ${ALERTMANAGER_HELM_VERSION}
+ else
+ # if it exists, upgrade
+ helm repo update
+ helm -n osm upgrade alertmanager prometheus-community/alertmanager -f ${OSM_HELM_WORK_DIR}/alertmanager-values.yaml --version ${ALERTMANAGER_HELM_VERSION}
+ fi
+ [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function
+}
+
# main
OSM_DEVOPS="/usr/share/osm-devops"
track deploy_osm airflow_ok
install_prometheus_pushgateway
track deploy_osm pushgateway_ok
+install_prometheus_alertmanager
+track deploy_osm alertmanager_ok