blob: 6c314fbd3c81580c23021b7b3f1393c95bdf4402 [file] [log] [blame]
ramonsalguer917ce8c2020-07-16 14:42:04 +02001#!/usr/bin/env bash
Mike Marchetti9d9192b2018-09-21 12:03:05 -04002
ramonsalguer917ce8c2020-07-16 14:42:04 +02003# Copyright 2020 Telefónica Investigación y Desarrollo S.A.U.
vijaynag8339ed22019-07-25 17:10:58 +05304#
ramonsalguer917ce8c2020-07-16 14:42:04 +02005# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
vijaynag8339ed22019-07-25 17:10:58 +05308#
ramonsalguer917ce8c2020-07-16 14:42:04 +02009# http://www.apache.org/licenses/LICENSE-2.0
vijaynag8339ed22019-07-25 17:10:58 +053010#
ramonsalguer917ce8c2020-07-16 14:42:04 +020011# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
vijaynag8339ed22019-07-25 17:10:58 +053016
ramonsalguer917ce8c2020-07-16 14:42:04 +020017# Default values
18sampling_period=5 # seconds
19time_for_readiness=2 # minutes ready
garciadeblas6e2e0072021-09-07 18:58:35 +020020time_for_failure=7 # minutes broken
ramonsalguer917ce8c2020-07-16 14:42:04 +020021KUBERNETES= # By default, assumes Docker Swarm installation
22STACK_NAME=osm # By default, "osm"
Mike Marchetti9d9192b2018-09-21 12:03:05 -040023
ramonsalguer917ce8c2020-07-16 14:42:04 +020024while getopts "p:r:f:s:k" o; do
Mike Marchetti9d9192b2018-09-21 12:03:05 -040025 case "${o}" in
ramonsalguer917ce8c2020-07-16 14:42:04 +020026 p)
27 sampling_period=${OPTARG}
28 ;;
29 r)
30 time_for_readiness=${OPTARG}
31 ;;
32 f)
33 time_for_failure=${OPTARG}
Mike Marchetti9d9192b2018-09-21 12:03:05 -040034 ;;
35 s)
36 STACK_NAME=${OPTARG}
37 ;;
vijaynag8339ed22019-07-25 17:10:58 +053038 k)
39 KUBERNETES="y"
40 ;;
Mike Marchetti9d9192b2018-09-21 12:03:05 -040041 esac
42done
43
ramonsalguer917ce8c2020-07-16 14:42:04 +020044oks_threshold=$((time_for_readiness*60/${sampling_period})) # No. ok samples to declare the system ready
45failures_threshold=$((time_for_failure*60/${sampling_period})) # No. nok samples to declare the system broken
46failures_in_a_row=0
47oks_in_a_row=0
Mike Marchetti9d9192b2018-09-21 12:03:05 -040048
ramonsalguer917ce8c2020-07-16 14:42:04 +020049
50####################################################################################
51# Loop to check system readiness
52####################################################################################
53while [[ (${failures_in_a_row} -lt ${failures_threshold}) && (${oks_in_a_row} -lt ${oks_threshold}) ]]
54do
55
56 #------------ CHECKS FOR KUBERNETES INSTALLATION
57 if [ -n "$KUBERNETES" ]
58 then
59
60 # State of Deployments
61 DEPLOYMENTS_STATE=$(kubectl get deployment -n ${STACK_NAME} --no-headers 2>&1)
62 DEPLOYMENTS_READY=$(echo "${DEPLOYMENTS_STATE}" | awk '$2=="1/1" && $4=="1" {printf ("%20s\t%s\t%s\n", $1, $2, $4)}')
63 DEPLOYMENTS_NOT_READY=$(echo "${DEPLOYMENTS_STATE}" | awk '$2!="1/1" || $4!="1" {printf ("%20s\t%s\t%s\n", $1, $2, $4)}')
64 COUNT_DEPLOYMENTS_READY=$(echo "${DEPLOYMENTS_READY}"| grep -v -e '^$' | wc -l)
65 COUNT_DEPLOYMENTS_NOT_READY=$(echo "${DEPLOYMENTS_NOT_READY}" | grep -v -e '^$' | wc -l)
66
67 # State of Statefulsets
68 STS_STATE=$(kubectl get statefulset -n ${STACK_NAME} --no-headers 2>&1)
69 STS_READY=$(echo "${STS_STATE}" | awk '$2=="1/1" {printf ("%20s\t%s\t%s\n", $1, $2, $4)}')
70 STS_NOT_READY=$(echo "${STS_STATE}" | awk '$2!="1/1" {printf ("%20s\t%s\t%s\n", $1, $2, $4)}')
71 COUNT_STS_READY=$(echo "${STS_READY}" | grep -v -e '^$' | wc -l)
72 COUNT_STS_NOT_READY=$(echo "${STS_NOT_READY}" | grep -v -e '^$' | wc -l)
73
74 # OK sample
75 if [[ $((${COUNT_DEPLOYMENTS_NOT_READY}+${COUNT_STS_NOT_READY})) -eq 0 ]]
76 then
77 ((++oks_in_a_row))
78 failures_in_a_row=0
79 echo -ne ===\> Successful checks: "${oks_in_a_row}"/${oks_threshold}\\r
80 # NOK sample
81 else
82 ((++failures_in_a_row))
83 oks_in_a_row=0
84 echo
85 echo Bootstraping... "${failures_in_a_row}" attempts of ${failures_threshold}
86
87 # Reports failed deployments
88 if [[ "${COUNT_DEPLOYMENTS_NOT_READY}" -ne 0 ]]
89 then
90 echo ${COUNT_DEPLOYMENTS_NOT_READY} of $((${COUNT_DEPLOYMENTS_NOT_READY}+${COUNT_DEPLOYMENTS_READY})) deployments starting:
91 echo "${DEPLOYMENTS_NOT_READY}"
92 echo
93 fi
94
95 # Reports failed statefulsets
96 if [[ "${COUNT_STS_NOT_READY}" -ne 0 ]]
97 then
98 echo ${COUNT_STS_NOT_READY} of $((${COUNT_STS_NOT_READY}+${COUNT_STS_READY})) statefulsets starting:
99 echo "${STS_NOT_READY}"
100 echo
101 fi
vijaynag8339ed22019-07-25 17:10:58 +0530102 fi
ramonsalguer917ce8c2020-07-16 14:42:04 +0200103
104 #------------ CHECKS FOR DOCKER SWARM INSTALLATION
vijaynag8339ed22019-07-25 17:10:58 +0530105 else
ramonsalguer917ce8c2020-07-16 14:42:04 +0200106 # State of Docker Services
107 SERVICES_STATE=$(sg docker -c "docker service ls" 2>&1 | grep " ${STACK_NAME}_")
108 SERVICES_READY=$(echo "${SERVICES_STATE}" | awk '$3=="replicated" && $4=="1/1" {printf ("%20s\t%s\n", $2, $4)}')
109 SERVICES_NOT_READY=$(echo "${SERVICES_STATE}" | awk '$3=="replicated" && $4!="1/1" {printf ("%20s\t%s\n", $2, $4)}')
110 COUNT_SERVICES_READY=$(echo "${SERVICES_READY}" | grep -v -e '^$' | wc -l)
111 COUNT_SERVICES_NOT_READY=$(echo "${SERVICES_NOT_READY}" | grep -v -e '^$' | wc -l)
112
113 # OK sample
114 if [[ ${COUNT_SERVICES_NOT_READY} -eq 0 ]]
115 then
116 ((++oks_in_a_row))
117 failures_in_a_row=0
118 echo -ne ===\> Successful checks: "${oks_in_a_row}"/${oks_threshold}\\r
119 # NOK sample
120 else
121 ((++failures_in_a_row))
122 oks_in_a_row=0
123 echo
124 echo Bootstraping... "${failures_in_a_row}" attempts of ${failures_threshold}
125 echo ${COUNT_SERVICES_NOT_READY} of $((${COUNT_SERVICES_NOT_READY}+${COUNT_SERVICES_READY})) services starting:
126 echo "${SERVICES_NOT_READY}"
vijaynag8339ed22019-07-25 17:10:58 +0530127 fi
Mike Marchetti9d9192b2018-09-21 12:03:05 -0400128 fi
129
ramonsalguer917ce8c2020-07-16 14:42:04 +0200130 #------------ NEXT SAMPLE
131 sleep ${sampling_period}
132
Mike Marchetti9d9192b2018-09-21 12:03:05 -0400133done
Mike Marchetti37c3f512018-09-24 10:27:00 -0400134
tiernobc983ec2018-10-11 15:03:06 +0200135
ramonsalguer917ce8c2020-07-16 14:42:04 +0200136####################################################################################
137# OUTCOME
138####################################################################################
139if [[ (${failures_in_a_row} -ge ${failures_threshold}) ]]
140then
141 echo
142 echo SYSTEM IS BROKEN
143 exit 1
144else
145 echo
146 echo SYSTEM IS READY
147fi