V_OPERATOR=""
V_MONGODB_EXPORTER=""
V_MYSQL_EXPORTER=""
+V_KAFKA_EXPORTER=""
V_OPERATOR=$PROMETHEUS_OPERATOR
V_MONGODB_EXPORTER=$PROMETHEUS_MONGODB_EXPORTER
V_MYSQL_EXPORTER=$PROMETHEUS_MYSQL_EXPORTER
+V_KAFKA_EXPORTER=$PROMETHEUS_KAFKA_EXPORTER
function usage(){
echo "V_OPERATOR=$V_OPERATOR"
echo "V_MONGODB_EXPORTER=$V_MONGODB_EXPORTER"
echo "V_MYSQL_EXPORTER=$V_MYSQL_EXPORTER"
+ echo "V_KAFKA_EXPORTER=$V_KAFKA_EXPORTER"
}
+# Check K8s version
+kubernetes_version=`kubectl version --short | grep Server | awk '{print $3}'`
+min_kubernetes_version="v1.16.0"
+if [[ "$kubernetes_version" < "$min_kubernetes_version" ]]
+then
+ echo "K8s monitoring could not be installed: Kube-prometheus-stack requires a Kubernetes 1.16+ (current version: $kubernetes_version)"
+ exit 1
+fi
+
if [ -n "$SERVICE_TYPE" ] ; then
if [ [ $SERVICE_TYPE != "ClusterIP" ] || [ $SERVICE_TYPE != "NodePort" ] || [ $SERVICE_TYPE != "LoadBalancer" ] ] ; then
echo "Wrong service type..."
helm > /dev/null 2>&1
if [ $? != 0 ] ; then
echo "Helm is not installed, installing ....."
- curl https://get.helm.sh/helm-v2.15.2-linux-amd64.tar.gz --output helm-v2.15.2.tar.gz
- tar -zxvf helm-v2.15.2.tar.gz
+ curl https://get.helm.sh/helm-v3.7.2-linux-amd64.tar.gz --output helm-v3.7.2.tar.gz
+ tar -zxvf helm-v3.7.2.tar.gz
sudo mv linux-amd64/helm /usr/local/bin/helm
rm -r linux-amd64
- rm helm-v2.15.2.tar.gz
-fi
-
-echo "Checking if helm-tiller is installed..."
-kubectl --namespace kube-system get serviceaccount tiller > /dev/null 2>&1
-if [ $? == 1 ] ; then
- # tiller account for kubernetes
- kubectl --namespace kube-system create serviceaccount tiller
- kubectl create clusterrolebinding tiller-cluster-rule --clusterrole=cluster-admin --serviceaccount=kube-system:tiller
- # HELM initialization
- helm init --stable-repo-url https://charts.helm.sh/stable --service-account tiller
-
- # Wait for Tiller to be up and running
- while true
- do
- tiller_status=`kubectl -n kube-system get deployment.apps/tiller-deploy --no-headers | awk '{print $2'}`
- if [ ! -z "$tiller_status" ]
- then
- if [ $tiller_status == "1/1" ]
- then
- echo "Go...."
- break
- fi
- fi
- echo "Waiting for tiller READY...."
- sleep 2
- done
+ rm helm-v3.7.2.tar.gz
fi
-# create monitoring namespace
+# Create monitoring namespace
echo "Creating namespace $NAMESPACE"
kubectl create namespace $NAMESPACE
-# Prometheus operator installation
+# Needed changes for Kube-Prometheus on Kubeadm installation
+# Kube-Controller-Manager
+sudo sed -e "s/- --bind-address=127.0.0.1/- --bind-address=0.0.0.0/" -i /etc/kubernetes/manifests/kube-controller-manager.yaml
+# Kube-Scheduler
+sudo sed -e "s/- --bind-address=127.0.0.1/- --bind-address=0.0.0.0/" -i /etc/kubernetes/manifests/kube-scheduler.yaml
+# Kube-Proxy
+kubectl -n kube-system get cm/kube-proxy -o yaml > $HERE/kube-proxy-cm.yaml
+sed -e "s/metricsBindAddress: \"\"/metricsBindAddress: 0.0.0.0:10249/" -i $HERE/kube-proxy-cm.yaml
+kubectl -n kube-system delete cm kube-proxy
+kubectl -n kube-system apply -f $HERE/kube-proxy-cm.yaml
+rm $HERE/kube-proxy-cm.yaml
+kubectl delete pod -l k8s-app=kube-proxy -n kube-system
+# Etcd
+sudo cp /etc/kubernetes/pki/etcd/healthcheck-client.key $HERE/healthcheck-client.key
+sudo chmod a+r $HERE/healthcheck-client.key
+kubectl -n $NAMESPACE create secret generic etcd-client-cert --from-file=/etc/kubernetes/pki/etcd/ca.crt --from-file=/etc/kubernetes/pki/etcd/healthcheck-client.crt --from-file=$HERE/healthcheck-client.key
+sudo awk '/--trusted-ca-file=\/etc\/kubernetes\/pki\/etcd\/ca.crt/ { print; print " - --metrics=extensive"; next }1' /etc/kubernetes/manifests/etcd.yaml > $HERE/tmp && sudo mv $HERE/tmp /etc/kubernetes/manifests/etcd.yaml
+sudo chown root:root /etc/kubernetes/manifests/etcd.yaml
+sudo chmod 600 /etc/kubernetes/manifests/etcd.yaml
+
+# Add Helm prometheus-community repo
+helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+helm repo update
+
+# kube-prometheus-stack installation (previously called prometheus-operator)
$HERE/change-charts-prometheus-operator.sh
-echo "Creating stable/prometheus-operator"
-helm install --namespace $NAMESPACE --version=$V_OPERATOR --name osm-monitoring --set kubelet.serviceMonitor.https=true,prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false,alertmanager.service.type=$SERVICE_TYPE,prometheus.service.type=$SERVICE_TYPE,grafana.serviceMonitor.selfMonitor=false $HERE/helm_charts/prometheus-operator
+echo "Creating stable/kube-prometheus-stack"
+cat > $HERE/kube-prometheus-stack-values.yaml <<EOF
+kubeControllerManager:
+ service:
+ enabled: true
+ port: 10257
+ targetPort: 10257
+ serviceMonitor:
+ https: true
+ insecureSkipVerify: true
+kubeScheduler:
+ service:
+ enabled: true
+ port: 10259
+ targetPort: 10259
+ serviceMonitor:
+ https: true
+ insecureSkipVerify: true
+kubelet:
+ serviceMonitor:
+ https: true
+kubeEtcd:
+ serviceMonitor:
+ scheme: https
+ insecureSkipVerify: false
+ serverName: localhost
+ caFile: /etc/prometheus/secrets/etcd-client-cert/ca.crt
+ certFile: /etc/prometheus/secrets/etcd-client-cert/healthcheck-client.crt
+ keyFile: /etc/prometheus/secrets/etcd-client-cert/healthcheck-client.key
+alertmanager:
+ service:
+ type: $SERVICE_TYPE
+grafana:
+ enabled: false
+ forceDeployDashboards: true
+prometheus:
+ service:
+ type: $SERVICE_TYPE
+ prometheusSpec:
+ serviceMonitorSelectorNilUsesHelmValues: false
+ secrets: ['etcd-client-cert']
+EOF
+helm install osm-monitoring --namespace $NAMESPACE --version=$V_OPERATOR -f $HERE/kube-prometheus-stack-values.yaml $HERE/helm_charts/kube-prometheus-stack
# Exporters installation
-
# MongoDB
# exporter
-echo "Creating stable/prometheus-mongodb-exporter"
-helm install --namespace $NAMESPACE --version=$V_MONGODB_EXPORTER --name osm-mongodb-exporter --set image.tag='0.10.0',mongodb.uri='mongodb://mongodb-k8s.osm:27017' stable/prometheus-mongodb-exporter
+echo "Creating prometheus-community/prometheus-mongodb-exporter"
+helm install osm-mongodb-exporter --namespace $NAMESPACE --version=$V_MONGODB_EXPORTER --set serviceMonitor.additionalLabels.release=osm-monitoring,mongodb.uri='mongodb://mongodb-k8s.osm:27017' prometheus-community/prometheus-mongodb-exporter
#dashboard:
kubectl -n $NAMESPACE apply -f $HERE/mongodb-exporter-dashboard.yaml
-
# Mysql
# exporter
-echo "Creating stable/prometheus-mysql-exporter"
-helm install --namespace $NAMESPACE --version=$V_MYSQL_EXPORTER --name osm-mysql-exporter --set serviceMonitor.enabled=true,mysql.user="root",mysql.pass=`kubectl -n osm get secret ro-db-secret -o yaml | grep MYSQL_ROOT_PASSWORD | awk '{print $2}' | base64 -d`,mysql.host="mysql.osm",mysql.port="3306" stable/prometheus-mysql-exporter
+echo "Creating prometheus-community/prometheus-mysql-exporter"
+helm install osm-mysql-exporter --namespace $NAMESPACE --version=$V_MYSQL_EXPORTER --set serviceMonitor.enabled=true,serviceMonitor.additionalLabels.release=osm-monitoring,mysql.user="root",mysql.pass=`kubectl -n osm get secret ro-db-secret -o yaml | grep -i -A1 '^data:$' | grep MYSQL_ROOT_PASSWORD | awk '{print $2}' | base64 -d`,mysql.host="mysql.osm",mysql.port="3306",'collectors.info_schema\.tables=true' prometheus-community/prometheus-mysql-exporter
#dashboard:
kubectl -n $NAMESPACE apply -f $HERE/mysql-exporter-dashboard.yaml
-
# Kafka
# exporter
-helm install --namespace $NAMESPACE --name osm-kafka-exporter $HERE/helm_charts/prometheus-kafka-exporter
+echo "Creating prometheus-community/prometheus-kafka-exporter"
+helm install osm-kafka-exporter --namespace $NAMESPACE --version=$V_KAFKA_EXPORTER --set prometheus.serviceMonitor.enabled=true,prometheus.serviceMonitor.additionalLabels.release=osm-monitoring,kafkaServer={kafka.osm.svc.cluster.local:9092},service.port=9092 prometheus-community/prometheus-kafka-exporter
# dashboard:
kubectl -n $NAMESPACE apply -f $HERE/kafka-exporter-dashboard.yaml
--- /dev/null
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+{{- /*
+Generated from 'kubelet' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubelet.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ namespace: {{ template "kube-prometheus-stack.namespace" . }}
+ name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "kubelet" | trunc 63 | trimSuffix "-" }}
+ annotations:
+{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
+ labels:
+ {{- if $.Values.grafana.sidecar.dashboards.label }}
+ {{ $.Values.grafana.sidecar.dashboards.label }}: "1"
+ {{- end }}
+ app: {{ template "kube-prometheus-stack.name" $ }}-grafana
+{{ include "kube-prometheus-stack.labels" $ | indent 4 }}
+data:
+ kubelet.json: |-
+ {
+ "__inputs": [
+
+ ],
+ "__requires": [
+
+ ],
+ "annotations": {
+ "list": [
+
+ ]
+ },
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [
+
+ ],
+ "refresh": "10s",
+ "rows": [
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+
+ },
+ "id": 2,
+ "interval": null,
+ "links": [
+
+ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(kubelet_node_name{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Running Kubelets",
+ "tooltip": {
+ "shared": false
+ },
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "min"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+
+ },
+ "id": 3,
+ "interval": null,
+ "links": [
+
+ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Running Pods",
+ "tooltip": {
+ "shared": false
+ },
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "min"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+
+ },
+ "id": 4,
+ "interval": null,
+ "links": [
+
+ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Running Container",
+ "tooltip": {
+ "shared": false
+ },
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "min"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+
+ },
+ "id": 5,
+ "interval": null,
+ "links": [
+
+ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Actual Volume Count",
+ "tooltip": {
+ "shared": false
+ },
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "min"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+
+ },
+ "id": 6,
+ "interval": null,
+ "links": [
+
+ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Desired Volume Count",
+ "tooltip": {
+ "shared": false
+ },
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "min"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+
+ },
+ "id": 7,
+ "interval": null,
+ "links": [
+
+ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Config Error Count",
+ "tooltip": {
+ "shared": false
+ },
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "min"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 8,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (operation_type, instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Operation Rate",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 9,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Operation Error Rate",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 10,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Operation duration 99th quantile",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 11,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}} pod",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}} worker",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Pod Start Rate",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 12,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}} pod",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}} worker",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Pod Start Duration",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 13,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Storage Operation Rate",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 14,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Storage Operation Error Rate",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 15,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Storage Operation Duration 99th quantile",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 16,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}operation_type{{`}}`}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Cgroup manager operation rate",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 17,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Cgroup manager 99th quantile",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "Pod lifecycle event generator",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 18,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "PLEG relist rate",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 19,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "PLEG relist interval",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 20,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "PLEG relist duration",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 21,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "2xx",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "3xx",
+ "refId": "B"
+ },
+ {
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "4xx",
+ "refId": "C"
+ },
+ {
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "5xx",
+ "refId": "D"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "RPC Rate",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 22,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}verb{{`}}`}} {{`{{`}}url{{`}}`}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Request duration 99th quantile",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 23,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 4,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 24,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 4,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU usage",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 25,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 4,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{`{{`}}instance{{`}}`}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Goroutines",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6",
+ "type": "row"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "kubernetes-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": null,
+ "name": "datasource",
+ "options": [
+
+ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {
+
+ },
+ "datasource": "$datasource",
+ "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }},
+ "includeAll": false,
+ "label": "cluster",
+ "multi": false,
+ "name": "cluster",
+ "options": [
+
+ ],
+ "query": "label_values(kube_pod_info, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+
+ },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "instance",
+ "options": [
+
+ ],
+ "query": "label_values(kubelet_runtime_operations_total{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"}, instance)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "UTC",
+ "title": "Kubernetes / Kubelet",
+ "uid": "3138fa155d5915769fbded898ac09fd9",
+ "version": 0
+ }
+{{- end }}
\ No newline at end of file