| ramonsalguer | 917ce8c | 2020-07-16 14:42:04 +0200 | [diff] [blame] | 1 | #!/usr/bin/env bash |
| Mike Marchetti | 9d9192b | 2018-09-21 12:03:05 -0400 | [diff] [blame] | 2 | |
| ramonsalguer | 917ce8c | 2020-07-16 14:42:04 +0200 | [diff] [blame] | 3 | # Copyright 2020 Telefónica Investigación y Desarrollo S.A.U. |
| vijaynag | 8339ed2 | 2019-07-25 17:10:58 +0530 | [diff] [blame] | 4 | # |
| ramonsalguer | 917ce8c | 2020-07-16 14:42:04 +0200 | [diff] [blame] | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | # you may not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| vijaynag | 8339ed2 | 2019-07-25 17:10:58 +0530 | [diff] [blame] | 8 | # |
| ramonsalguer | 917ce8c | 2020-07-16 14:42:04 +0200 | [diff] [blame] | 9 | # http://www.apache.org/licenses/LICENSE-2.0 |
| vijaynag | 8339ed2 | 2019-07-25 17:10:58 +0530 | [diff] [blame] | 10 | # |
| ramonsalguer | 917ce8c | 2020-07-16 14:42:04 +0200 | [diff] [blame] | 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
| vijaynag | 8339ed2 | 2019-07-25 17:10:58 +0530 | [diff] [blame] | 16 | |
| ramonsalguer | 917ce8c | 2020-07-16 14:42:04 +0200 | [diff] [blame] | 17 | # Default values |
| 18 | sampling_period=5 # seconds |
| 19 | time_for_readiness=2 # minutes ready |
| garciadeblas | 6e2e007 | 2021-09-07 18:58:35 +0200 | [diff] [blame] | 20 | time_for_failure=7 # minutes broken |
| ramonsalguer | 917ce8c | 2020-07-16 14:42:04 +0200 | [diff] [blame] | 21 | KUBERNETES= # By default, assumes Docker Swarm installation |
| 22 | STACK_NAME=osm # By default, "osm" |
| Mike Marchetti | 9d9192b | 2018-09-21 12:03:05 -0400 | [diff] [blame] | 23 | |
| ramonsalguer | 917ce8c | 2020-07-16 14:42:04 +0200 | [diff] [blame] | 24 | while getopts "p:r:f:s:k" o; do |
| Mike Marchetti | 9d9192b | 2018-09-21 12:03:05 -0400 | [diff] [blame] | 25 | case "${o}" in |
| ramonsalguer | 917ce8c | 2020-07-16 14:42:04 +0200 | [diff] [blame] | 26 | p) |
| 27 | sampling_period=${OPTARG} |
| 28 | ;; |
| 29 | r) |
| 30 | time_for_readiness=${OPTARG} |
| 31 | ;; |
| 32 | f) |
| 33 | time_for_failure=${OPTARG} |
| Mike Marchetti | 9d9192b | 2018-09-21 12:03:05 -0400 | [diff] [blame] | 34 | ;; |
| 35 | s) |
| 36 | STACK_NAME=${OPTARG} |
| 37 | ;; |
| vijaynag | 8339ed2 | 2019-07-25 17:10:58 +0530 | [diff] [blame] | 38 | k) |
| 39 | KUBERNETES="y" |
| 40 | ;; |
| Mike Marchetti | 9d9192b | 2018-09-21 12:03:05 -0400 | [diff] [blame] | 41 | esac |
| 42 | done |
| 43 | |
| ramonsalguer | 917ce8c | 2020-07-16 14:42:04 +0200 | [diff] [blame] | 44 | oks_threshold=$((time_for_readiness*60/${sampling_period})) # No. ok samples to declare the system ready |
| 45 | failures_threshold=$((time_for_failure*60/${sampling_period})) # No. nok samples to declare the system broken |
| 46 | failures_in_a_row=0 |
| 47 | oks_in_a_row=0 |
| Mike Marchetti | 9d9192b | 2018-09-21 12:03:05 -0400 | [diff] [blame] | 48 | |
| ramonsalguer | 917ce8c | 2020-07-16 14:42:04 +0200 | [diff] [blame] | 49 | |
| garciadeblas | 8080e4b | 2023-04-14 09:57:17 +0200 | [diff] [blame] | 50 | # Show status of the OSM services deployed with helm |
| 51 | echo "helm -n ${STACK_NAME} list" |
| 52 | helm -n ${STACK_NAME} list |
| 53 | echo "helm -n ${STACK_NAME} status ${STACK_NAME}" |
| 54 | helm -n ${STACK_NAME} status ${STACK_NAME} |
| 55 | |
| ramonsalguer | 917ce8c | 2020-07-16 14:42:04 +0200 | [diff] [blame] | 56 | #################################################################################### |
| 57 | # Loop to check system readiness |
| 58 | #################################################################################### |
| 59 | while [[ (${failures_in_a_row} -lt ${failures_threshold}) && (${oks_in_a_row} -lt ${oks_threshold}) ]] |
| 60 | do |
| 61 | |
| 62 | #------------ CHECKS FOR KUBERNETES INSTALLATION |
| 63 | if [ -n "$KUBERNETES" ] |
| 64 | then |
| 65 | |
| 66 | # State of Deployments |
| 67 | DEPLOYMENTS_STATE=$(kubectl get deployment -n ${STACK_NAME} --no-headers 2>&1) |
| 68 | DEPLOYMENTS_READY=$(echo "${DEPLOYMENTS_STATE}" | awk '$2=="1/1" && $4=="1" {printf ("%20s\t%s\t%s\n", $1, $2, $4)}') |
| 69 | DEPLOYMENTS_NOT_READY=$(echo "${DEPLOYMENTS_STATE}" | awk '$2!="1/1" || $4!="1" {printf ("%20s\t%s\t%s\n", $1, $2, $4)}') |
| 70 | COUNT_DEPLOYMENTS_READY=$(echo "${DEPLOYMENTS_READY}"| grep -v -e '^$' | wc -l) |
| 71 | COUNT_DEPLOYMENTS_NOT_READY=$(echo "${DEPLOYMENTS_NOT_READY}" | grep -v -e '^$' | wc -l) |
| 72 | |
| 73 | # State of Statefulsets |
| 74 | STS_STATE=$(kubectl get statefulset -n ${STACK_NAME} --no-headers 2>&1) |
| almagia | f976ab8 | 2023-12-07 15:19:15 +0100 | [diff] [blame] | 75 | STS_READY=$(echo "${STS_STATE}" | awk '$2=="1/1" || $2=="2/2" || $2=="3/3" {printf ("%20s\t%s\t%s\n", $1, $2, $4)}') |
| 76 | STS_NOT_READY=$(echo "${STS_STATE}" | awk '$2!="1/1" && $2!="2/2" && $2!="3/3" {printf ("%20s\t%s\t%s\n", $1, $2, $4)}') |
| ramonsalguer | 917ce8c | 2020-07-16 14:42:04 +0200 | [diff] [blame] | 77 | COUNT_STS_READY=$(echo "${STS_READY}" | grep -v -e '^$' | wc -l) |
| 78 | COUNT_STS_NOT_READY=$(echo "${STS_NOT_READY}" | grep -v -e '^$' | wc -l) |
| 79 | |
| 80 | # OK sample |
| 81 | if [[ $((${COUNT_DEPLOYMENTS_NOT_READY}+${COUNT_STS_NOT_READY})) -eq 0 ]] |
| 82 | then |
| 83 | ((++oks_in_a_row)) |
| 84 | failures_in_a_row=0 |
| 85 | echo -ne ===\> Successful checks: "${oks_in_a_row}"/${oks_threshold}\\r |
| 86 | # NOK sample |
| 87 | else |
| 88 | ((++failures_in_a_row)) |
| 89 | oks_in_a_row=0 |
| 90 | echo |
| 91 | echo Bootstraping... "${failures_in_a_row}" attempts of ${failures_threshold} |
| 92 | |
| 93 | # Reports failed deployments |
| 94 | if [[ "${COUNT_DEPLOYMENTS_NOT_READY}" -ne 0 ]] |
| 95 | then |
| 96 | echo ${COUNT_DEPLOYMENTS_NOT_READY} of $((${COUNT_DEPLOYMENTS_NOT_READY}+${COUNT_DEPLOYMENTS_READY})) deployments starting: |
| 97 | echo "${DEPLOYMENTS_NOT_READY}" |
| 98 | echo |
| 99 | fi |
| 100 | |
| 101 | # Reports failed statefulsets |
| 102 | if [[ "${COUNT_STS_NOT_READY}" -ne 0 ]] |
| 103 | then |
| 104 | echo ${COUNT_STS_NOT_READY} of $((${COUNT_STS_NOT_READY}+${COUNT_STS_READY})) statefulsets starting: |
| 105 | echo "${STS_NOT_READY}" |
| 106 | echo |
| 107 | fi |
| vijaynag | 8339ed2 | 2019-07-25 17:10:58 +0530 | [diff] [blame] | 108 | fi |
| ramonsalguer | 917ce8c | 2020-07-16 14:42:04 +0200 | [diff] [blame] | 109 | |
| 110 | #------------ CHECKS FOR DOCKER SWARM INSTALLATION |
| vijaynag | 8339ed2 | 2019-07-25 17:10:58 +0530 | [diff] [blame] | 111 | else |
| ramonsalguer | 917ce8c | 2020-07-16 14:42:04 +0200 | [diff] [blame] | 112 | # State of Docker Services |
| 113 | SERVICES_STATE=$(sg docker -c "docker service ls" 2>&1 | grep " ${STACK_NAME}_") |
| 114 | SERVICES_READY=$(echo "${SERVICES_STATE}" | awk '$3=="replicated" && $4=="1/1" {printf ("%20s\t%s\n", $2, $4)}') |
| 115 | SERVICES_NOT_READY=$(echo "${SERVICES_STATE}" | awk '$3=="replicated" && $4!="1/1" {printf ("%20s\t%s\n", $2, $4)}') |
| 116 | COUNT_SERVICES_READY=$(echo "${SERVICES_READY}" | grep -v -e '^$' | wc -l) |
| 117 | COUNT_SERVICES_NOT_READY=$(echo "${SERVICES_NOT_READY}" | grep -v -e '^$' | wc -l) |
| 118 | |
| 119 | # OK sample |
| 120 | if [[ ${COUNT_SERVICES_NOT_READY} -eq 0 ]] |
| 121 | then |
| 122 | ((++oks_in_a_row)) |
| 123 | failures_in_a_row=0 |
| 124 | echo -ne ===\> Successful checks: "${oks_in_a_row}"/${oks_threshold}\\r |
| 125 | # NOK sample |
| 126 | else |
| 127 | ((++failures_in_a_row)) |
| 128 | oks_in_a_row=0 |
| 129 | echo |
| 130 | echo Bootstraping... "${failures_in_a_row}" attempts of ${failures_threshold} |
| 131 | echo ${COUNT_SERVICES_NOT_READY} of $((${COUNT_SERVICES_NOT_READY}+${COUNT_SERVICES_READY})) services starting: |
| 132 | echo "${SERVICES_NOT_READY}" |
| vijaynag | 8339ed2 | 2019-07-25 17:10:58 +0530 | [diff] [blame] | 133 | fi |
| Mike Marchetti | 9d9192b | 2018-09-21 12:03:05 -0400 | [diff] [blame] | 134 | fi |
| 135 | |
| ramonsalguer | 917ce8c | 2020-07-16 14:42:04 +0200 | [diff] [blame] | 136 | #------------ NEXT SAMPLE |
| 137 | sleep ${sampling_period} |
| 138 | |
| Mike Marchetti | 9d9192b | 2018-09-21 12:03:05 -0400 | [diff] [blame] | 139 | done |
| Mike Marchetti | 37c3f51 | 2018-09-24 10:27:00 -0400 | [diff] [blame] | 140 | |
| tierno | bc983ec | 2018-10-11 15:03:06 +0200 | [diff] [blame] | 141 | |
| ramonsalguer | 917ce8c | 2020-07-16 14:42:04 +0200 | [diff] [blame] | 142 | #################################################################################### |
| 143 | # OUTCOME |
| 144 | #################################################################################### |
| 145 | if [[ (${failures_in_a_row} -ge ${failures_threshold}) ]] |
| 146 | then |
| 147 | echo |
| 148 | echo SYSTEM IS BROKEN |
| 149 | exit 1 |
| 150 | else |
| 151 | echo |
| 152 | echo SYSTEM IS READY |
| 153 | fi |