-#!/bin/sh
+#!/usr/bin/env bash
-# Copyright 2019 ETSI
+# Copyright 2020 Telefónica Investigación y Desarrollo S.A.U.
#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-# implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-WAIT_TIME=340 # LCM healthcheck needs 2x(30+140) senconds
-SERVICES_WITH_HEALTH="nbi ro zookeeper lcm mon pol kafka"
-NUM_SERVICES_WITH_HEALTH=$(echo $SERVICES_WITH_HEALTH | wc -w)
-WAIT_FINAL=30
-OSM_DEPLOYMENT="nbi lcm ro mon pol keystone"
-OSM_STATEFULSET="zookeeper kafka mongo mysql prometheus"
-NUM_K8S_PODS=$(echo $OSM_DEPLOYMENT $OSM_STATEFULSET | wc -w)
-
-while getopts "w:s:n:c:k" o; do
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Default values
+sampling_period=5 # seconds
+time_for_readiness=2 # minutes ready
+time_for_failure=5 # minutes broken
+KUBERNETES= # By default, assumes Docker Swarm installation
+STACK_NAME=osm # By default, "osm"
+
+while getopts "p:r:f:s:k" o; do
case "${o}" in
- w)
- WAIT_TIME=${OPTARG}
+ p)
+ sampling_period=${OPTARG}
;;
- s)
- STACK_NAME=${OPTARG}
+ r)
+ time_for_readiness=${OPTARG}
;;
- n)
- NUM_SERVICES_WITH_HEALTH=${OPTARG}
+ f)
+ time_for_failure=${OPTARG}
;;
- c)
- SERVICES_WITH_HEALTH="${OPTARG}"
+ s)
+ STACK_NAME=${OPTARG}
;;
k)
KUBERNETES="y"
esac
done
+oks_threshold=$((time_for_readiness*60/${sampling_period})) # No. ok samples to declare the system ready
+failures_threshold=$((time_for_failure*60/${sampling_period})) # No. nok samples to declare the system broken
+failures_in_a_row=0
+oks_in_a_row=0
+
+
+####################################################################################
+# Loop to check system readiness
+####################################################################################
+while [[ (${failures_in_a_row} -lt ${failures_threshold}) && (${oks_in_a_row} -lt ${oks_threshold}) ]]
+do
+
+ #------------ CHECKS FOR KUBERNETES INSTALLATION
+ if [ -n "$KUBERNETES" ]
+ then
-time=0
-step=2
-while [ $time -le "$WAIT_TIME" ]; do
- if [ -n "$KUBERNETES" ]; then
- if [ "$(kubectl get pods -n "${STACK_NAME}" | grep -i running | wc -l)" -ge "$NUM_K8S_PODS" ]; then
- #all pods are running now.
- sleep $WAIT_FINAL
- exit 0
+ # State of Deployments
+ DEPLOYMENTS_STATE=$(kubectl get deployment -n ${STACK_NAME} --no-headers 2>&1)
+ DEPLOYMENTS_READY=$(echo "${DEPLOYMENTS_STATE}" | awk '$2=="1/1" && $4=="1" {printf ("%20s\t%s\t%s\n", $1, $2, $4)}')
+ DEPLOYMENTS_NOT_READY=$(echo "${DEPLOYMENTS_STATE}" | awk '$2!="1/1" || $4!="1" {printf ("%20s\t%s\t%s\n", $1, $2, $4)}')
+ COUNT_DEPLOYMENTS_READY=$(echo "${DEPLOYMENTS_READY}"| grep -v -e '^$' | wc -l)
+ COUNT_DEPLOYMENTS_NOT_READY=$(echo "${DEPLOYMENTS_NOT_READY}" | grep -v -e '^$' | wc -l)
+
+ # State of Statefulsets
+ STS_STATE=$(kubectl get statefulset -n ${STACK_NAME} --no-headers 2>&1)
+ STS_READY=$(echo "${STS_STATE}" | awk '$2=="1/1" {printf ("%20s\t%s\t%s\n", $1, $2, $4)}')
+ STS_NOT_READY=$(echo "${STS_STATE}" | awk '$2!="1/1" {printf ("%20s\t%s\t%s\n", $1, $2, $4)}')
+ COUNT_STS_READY=$(echo "${STS_READY}" | grep -v -e '^$' | wc -l)
+ COUNT_STS_NOT_READY=$(echo "${STS_NOT_READY}" | grep -v -e '^$' | wc -l)
+
+ # OK sample
+ if [[ $((${COUNT_DEPLOYMENTS_NOT_READY}+${COUNT_STS_NOT_READY})) -eq 0 ]]
+ then
+ ((++oks_in_a_row))
+ failures_in_a_row=0
+ echo -ne ===\> Successful checks: "${oks_in_a_row}"/${oks_threshold}\\r
+ # NOK sample
+ else
+ ((++failures_in_a_row))
+ oks_in_a_row=0
+ echo
+ echo Bootstraping... "${failures_in_a_row}" attempts of ${failures_threshold}
+
+ # Reports failed deployments
+ if [[ "${COUNT_DEPLOYMENTS_NOT_READY}" -ne 0 ]]
+ then
+ echo ${COUNT_DEPLOYMENTS_NOT_READY} of $((${COUNT_DEPLOYMENTS_NOT_READY}+${COUNT_DEPLOYMENTS_READY})) deployments starting:
+ echo "${DEPLOYMENTS_NOT_READY}"
+ echo
+ fi
+
+ # Reports failed statefulsets
+ if [[ "${COUNT_STS_NOT_READY}" -ne 0 ]]
+ then
+ echo ${COUNT_STS_NOT_READY} of $((${COUNT_STS_NOT_READY}+${COUNT_STS_READY})) statefulsets starting:
+ echo "${STS_NOT_READY}"
+ echo
+ fi
fi
+
+ #------------ CHECKS FOR DOCKER SWARM INSTALLATION
else
- if [ "$(sg docker -c "docker ps" | grep " ${STACK_NAME}_" | grep -i healthy | wc -l)" -ge "$NUM_SERVICES_WITH_HEALTH" ]; then
- # all dockers are healthy now.
- # final sleep is needed until more health checks are added to validate system is ready to handle requests
- sleep $WAIT_FINAL
- exit 0
+ # State of Docker Services
+ SERVICES_STATE=$(sg docker -c "docker service ls" 2>&1 | grep " ${STACK_NAME}_")
+ SERVICES_READY=$(echo "${SERVICES_STATE}" | awk '$3=="replicated" && $4=="1/1" {printf ("%20s\t%s\n", $2, $4)}')
+ SERVICES_NOT_READY=$(echo "${SERVICES_STATE}" | awk '$3=="replicated" && $4!="1/1" {printf ("%20s\t%s\n", $2, $4)}')
+ COUNT_SERVICES_READY=$(echo "${SERVICES_READY}" | grep -v -e '^$' | wc -l)
+ COUNT_SERVICES_NOT_READY=$(echo "${SERVICES_NOT_READY}" | grep -v -e '^$' | wc -l)
+
+ # OK sample
+ if [[ ${COUNT_SERVICES_NOT_READY} -eq 0 ]]
+ then
+ ((++oks_in_a_row))
+ failures_in_a_row=0
+ echo -ne ===\> Successful checks: "${oks_in_a_row}"/${oks_threshold}\\r
+ # NOK sample
+ else
+ ((++failures_in_a_row))
+ oks_in_a_row=0
+ echo
+ echo Bootstraping... "${failures_in_a_row}" attempts of ${failures_threshold}
+ echo ${COUNT_SERVICES_NOT_READY} of $((${COUNT_SERVICES_NOT_READY}+${COUNT_SERVICES_READY})) services starting:
+ echo "${SERVICES_NOT_READY}"
fi
fi
- sleep $step
- time=$((time+step))
+ #------------ NEXT SAMPLE
+ sleep ${sampling_period}
+
done
-if [ -n "$KUBERNETES" ]; then
- echo "Not all pods are running"
- kubectl get pods -n "${STACK_NAME}"
- for POD in $OSM_DEPLOYMENT $OSM_STATEFULSET; do
- kubectl get pods -n "${STACK_NAME}" | grep -i running | grep -q ^"${POD}-" && continue
- echo
- echo BEGIN LOGS of pods ${POD} not running
- LOG_POD=$(kubectl get pods -n "${STACK_NAME}" | grep -e ^"${POD}-" | awk '{print $1}' )
- [ -z "$LOG_POD" ] && echo "${POD} Failed to deploy" || kubectl logs ${LOG_POD} -n $STACK_NAME 2>&1 | tail -n 100
- echo END LOGS of services $POD not running
- done
+
+####################################################################################
+# OUTCOME
+####################################################################################
+if [[ (${failures_in_a_row} -ge ${failures_threshold}) ]]
+then
+ echo
+ echo SYSTEM IS BROKEN
+ exit 1
else
- echo "Not all Docker services are healthy"
- sg docker -c "docker ps" | grep " ${STACK_NAME}_"
- for S_WITH_HEALTH in $SERVICES_WITH_HEALTH ; do
- sg docker -c "docker ps" | grep " ${STACK_NAME}_" | grep -i healthy | grep -q "_${S_WITH_HEALTH}." && continue
- echo
- echo BEGIN LOGS of container ${S_WITH_HEALTH} not healthy
- sg docker -c "docker service logs ${STACK_NAME}_${S_WITH_HEALTH} 2>&1" | tail -n 100
- echo END LOGS of container ${S_WITH_HEALTH} not healthy
- echo
- done
+ echo
+ echo SYSTEM IS READY
fi
-
-exit 1