16b74cf59600b2b061cc59b459d9bd1e7bb014f7
[osm/devops.git] / installers / osm_health.sh
1 #!/usr/bin/env bash
2
3 # Copyright 2020 Telefónica Investigación y Desarrollo S.A.U.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16
17 # Default values
18 sampling_period=5 # seconds
19 time_for_readiness=2 # minutes ready
20 time_for_failure=7 # minutes broken
21 KUBERNETES= # By default, assumes Docker Swarm installation
22 STACK_NAME=osm # By default, "osm"
23
24 while getopts "p:r:f:s:k" o; do
25 case "${o}" in
26 p)
27 sampling_period=${OPTARG}
28 ;;
29 r)
30 time_for_readiness=${OPTARG}
31 ;;
32 f)
33 time_for_failure=${OPTARG}
34 ;;
35 s)
36 STACK_NAME=${OPTARG}
37 ;;
38 k)
39 KUBERNETES="y"
40 ;;
41 esac
42 done
43
44 oks_threshold=$((time_for_readiness*60/${sampling_period})) # No. ok samples to declare the system ready
45 failures_threshold=$((time_for_failure*60/${sampling_period})) # No. nok samples to declare the system broken
46 failures_in_a_row=0
47 oks_in_a_row=0
48
49
50 # Show status of the OSM services deployed with helm
51 echo "helm -n ${STACK_NAME} list"
52 helm -n ${STACK_NAME} list
53 echo "helm -n ${STACK_NAME} status ${STACK_NAME}"
54 helm -n ${STACK_NAME} status ${STACK_NAME}
55
56 ####################################################################################
57 # Loop to check system readiness
58 ####################################################################################
59 while [[ (${failures_in_a_row} -lt ${failures_threshold}) && (${oks_in_a_row} -lt ${oks_threshold}) ]]
60 do
61
62 #------------ CHECKS FOR KUBERNETES INSTALLATION
63 if [ -n "$KUBERNETES" ]
64 then
65
66 # State of Deployments
67 DEPLOYMENTS_STATE=$(kubectl get deployment -n ${STACK_NAME} --no-headers 2>&1)
68 DEPLOYMENTS_READY=$(echo "${DEPLOYMENTS_STATE}" | awk '$2=="1/1" && $4=="1" {printf ("%20s\t%s\t%s\n", $1, $2, $4)}')
69 DEPLOYMENTS_NOT_READY=$(echo "${DEPLOYMENTS_STATE}" | awk '$2!="1/1" || $4!="1" {printf ("%20s\t%s\t%s\n", $1, $2, $4)}')
70 COUNT_DEPLOYMENTS_READY=$(echo "${DEPLOYMENTS_READY}"| grep -v -e '^$' | wc -l)
71 COUNT_DEPLOYMENTS_NOT_READY=$(echo "${DEPLOYMENTS_NOT_READY}" | grep -v -e '^$' | wc -l)
72
73 # State of Statefulsets
74 STS_STATE=$(kubectl get statefulset -n ${STACK_NAME} --no-headers 2>&1)
75 STS_READY=$(echo "${STS_STATE}" | awk '$2=="1/1" || $2=="2/2" {printf ("%20s\t%s\t%s\n", $1, $2, $4)}')
76 STS_NOT_READY=$(echo "${STS_STATE}" | awk '$2!="1/1" && $2!="2/2" {printf ("%20s\t%s\t%s\n", $1, $2, $4)}')
77 COUNT_STS_READY=$(echo "${STS_READY}" | grep -v -e '^$' | wc -l)
78 COUNT_STS_NOT_READY=$(echo "${STS_NOT_READY}" | grep -v -e '^$' | wc -l)
79
80 # OK sample
81 if [[ $((${COUNT_DEPLOYMENTS_NOT_READY}+${COUNT_STS_NOT_READY})) -eq 0 ]]
82 then
83 ((++oks_in_a_row))
84 failures_in_a_row=0
85 echo -ne ===\> Successful checks: "${oks_in_a_row}"/${oks_threshold}\\r
86 # NOK sample
87 else
88 ((++failures_in_a_row))
89 oks_in_a_row=0
90 echo
91 echo Bootstraping... "${failures_in_a_row}" attempts of ${failures_threshold}
92
93 # Reports failed deployments
94 if [[ "${COUNT_DEPLOYMENTS_NOT_READY}" -ne 0 ]]
95 then
96 echo ${COUNT_DEPLOYMENTS_NOT_READY} of $((${COUNT_DEPLOYMENTS_NOT_READY}+${COUNT_DEPLOYMENTS_READY})) deployments starting:
97 echo "${DEPLOYMENTS_NOT_READY}"
98 echo
99 fi
100
101 # Reports failed statefulsets
102 if [[ "${COUNT_STS_NOT_READY}" -ne 0 ]]
103 then
104 echo ${COUNT_STS_NOT_READY} of $((${COUNT_STS_NOT_READY}+${COUNT_STS_READY})) statefulsets starting:
105 echo "${STS_NOT_READY}"
106 echo
107 fi
108 fi
109
110 #------------ CHECKS FOR DOCKER SWARM INSTALLATION
111 else
112 # State of Docker Services
113 SERVICES_STATE=$(sg docker -c "docker service ls" 2>&1 | grep " ${STACK_NAME}_")
114 SERVICES_READY=$(echo "${SERVICES_STATE}" | awk '$3=="replicated" && $4=="1/1" {printf ("%20s\t%s\n", $2, $4)}')
115 SERVICES_NOT_READY=$(echo "${SERVICES_STATE}" | awk '$3=="replicated" && $4!="1/1" {printf ("%20s\t%s\n", $2, $4)}')
116 COUNT_SERVICES_READY=$(echo "${SERVICES_READY}" | grep -v -e '^$' | wc -l)
117 COUNT_SERVICES_NOT_READY=$(echo "${SERVICES_NOT_READY}" | grep -v -e '^$' | wc -l)
118
119 # OK sample
120 if [[ ${COUNT_SERVICES_NOT_READY} -eq 0 ]]
121 then
122 ((++oks_in_a_row))
123 failures_in_a_row=0
124 echo -ne ===\> Successful checks: "${oks_in_a_row}"/${oks_threshold}\\r
125 # NOK sample
126 else
127 ((++failures_in_a_row))
128 oks_in_a_row=0
129 echo
130 echo Bootstraping... "${failures_in_a_row}" attempts of ${failures_threshold}
131 echo ${COUNT_SERVICES_NOT_READY} of $((${COUNT_SERVICES_NOT_READY}+${COUNT_SERVICES_READY})) services starting:
132 echo "${SERVICES_NOT_READY}"
133 fi
134 fi
135
136 #------------ NEXT SAMPLE
137 sleep ${sampling_period}
138
139 done
140
141
142 ####################################################################################
143 # OUTCOME
144 ####################################################################################
145 if [[ (${failures_in_a_row} -ge ${failures_threshold}) ]]
146 then
147 echo
148 echo SYSTEM IS BROKEN
149 exit 1
150 else
151 echo
152 echo SYSTEM IS READY
153 fi