From: garciadeblas Date: Mon, 29 Aug 2022 09:25:02 +0000 (+0200) Subject: Fix bug 2143 to track errors during installation of Kubernetes and helm X-Git-Tag: release-v13.0-start~17 X-Git-Url: https://osm.etsi.org/gitweb/?p=osm%2Fdevops.git;a=commitdiff_plain;h=8fed108b98ba8089f9ea6a2655399f26bcaf040e Fix bug 2143 to track errors during installation of Kubernetes and helm Change-Id: I180100f6bc26e1425acd853f51382c0eb2b374d3 Signed-off-by: garciadeblas --- diff --git a/common/track b/common/track index 615286a6..6b478261 100644 --- a/common/track +++ b/common/track @@ -16,16 +16,27 @@ function track(){ # Tracks events by sending HTTP GET requests with query strings to a web server -# - First argument: event name -# - Rest of arguments (if they exist): tuples of operation-value-comment-tags -# - operation: particular operation in an event (if it is not provided, the operation will be named after the event) -# - value: particular value for an operation -# - comment -# - none will be passed when empty -# - will be parsed to replace spaces by underscores -# - tags -# - none will be passed when empty -# - will be parsed to replace spaces by ~ +# Input: +# - First argument (mandatory): event name +# - Rest of arguments: +# - If they exist, they correspond to tuples of operation-value-comment-tags +# - operation: particular operation in an event +# - value: particular value for an operation +# - comment +# - none will be passed when empty +# - will be parsed to replace spaces by underscores +# - tags +# - none will be passed when empty +# - will be parsed to replace spaces by ~ +# - If no arguments: +# - operation: like event +# - value: "" +# - comment: "" +# - tags: "" +# Output: +# Sends as many HTTP requests as operations with the following query string +# "&{OSM_TRACK_INSTALLATION_ID}&{TIMESTAMP}&{EVENT}&{OPERATION}&{VALUE}&{COMMENT}&{TAGS}" +# if [ $# -lt 1 ]; then echo "Unexpected error in track function. At least 1 arg is expected: event" @@ -83,5 +94,3 @@ function track(){ fi return 0 } - - diff --git a/installers/full_install_osm.sh b/installers/full_install_osm.sh index d2f5e524..9f9fb051 100755 --- a/installers/full_install_osm.sh +++ b/installers/full_install_osm.sh @@ -34,7 +34,6 @@ function usage(){ echo -e " -H use specific juju host controller IP" echo -e " -S use VCA/juju secret key" echo -e " -P use VCA/juju public key file" - echo -e " -C use VCA/juju CA certificate file" echo -e " -A use VCA/juju API proxy" echo -e " --pla: install the PLA module for placement support" echo -e " -m : install OSM but only rebuild or pull the specified docker images (NG-UI, NBI, LCM, RO, MON, POL, PLA, KAFKA, MONGO, PROMETHEUS, PROMETHEUS-CADVISOR, KEYSTONE-DB, NONE)" @@ -154,38 +153,6 @@ function check_packages() { echo -e "Required packages are present: ${NEEDED_PACKAGES}" } -function install_lxd() { - [ -z "${DEBUG_INSTALL}" ] || DEBUG beginning of function - # Apply sysctl production values for optimal performance - sudo cp ${OSM_DEVOPS}/installers/60-lxd-production.conf /etc/sysctl.d/60-lxd-production.conf - sudo sysctl --system - - # Install LXD snap - sudo apt-get remove --purge -y liblxc1 lxc-common lxcfs lxd lxd-client - snap info lxd | grep installed > /dev/null - if [ $? -eq 0 ]; then - sudo snap refresh lxd --channel $LXD_VERSION/stable - else - sudo snap install lxd --channel $LXD_VERSION/stable - fi - - # Configure LXD - sudo usermod -a -G lxd `whoami` - cat ${OSM_DEVOPS}/installers/lxd-preseed.conf | sed 's/^config: {}/config:\n core.https_address: '$DEFAULT_IP':8443/' | sg lxd -c "lxd init --preseed" - sg lxd -c "lxd waitready" - DEFAULT_IF=$(ip route list|awk '$1=="default" {print $5; exit}') - [ -z "$DEFAULT_IF" ] && DEFAULT_IF=$(route -n |awk '$1~/^0.0.0.0/ {print $8; exit}') - [ -z "$DEFAULT_IF" ] && FATAL "Not possible to determine the interface with the default route 0.0.0.0" - DEFAULT_MTU=$(ip addr show ${DEFAULT_IF} | perl -ne 'if (/mtu\s(\d+)/) {print $1;}') - sg lxd -c "lxc profile device set default eth0 mtu $DEFAULT_MTU" - sg lxd -c "lxc network set lxdbr0 bridge.mtu $DEFAULT_MTU" - #sudo systemctl stop lxd-bridge - #sudo systemctl --system daemon-reload - #sudo systemctl enable lxd-bridge - #sudo systemctl start lxd-bridge - [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function -} - function ask_user(){ # ask to the user and parse a response among 'y', 'yes', 'n' or 'no'. Case insensitive # Params: $1 text to ask; $2 Action by default, can be 'y' for yes, 'n' for no, other or empty for not allowed @@ -661,7 +628,8 @@ function install_osm() { # if no host is passed in, we need to install lxd/juju, unless explicilty asked not to if [ -z "$OSM_VCA_HOST" ] && [ -z "$INSTALL_NOLXD" ] && [ -z "$LXD_CLOUD_FILE" ]; then - install_lxd + LXD_INSTALL_OPTS="-D ${OSM_DEVOPS} -i ${DEFAULT_IP} ${DEBUG_INSTALL}" + $OSM_DEVOPS/installers/install_lxd.sh ${LXD_INSTALL_OPTS} || FATAL_TRACK lxd "install_lxd.sh failed" fi track prereq prereqok_ok @@ -876,9 +844,6 @@ function ctrl_c() { [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function } -LXD_VERSION=5.0 -JUJU_VERSION=2.9 -JUJU_AGENT_VERSION=2.9.29 UNINSTALL="" DEVELOP="" UPDATE="" diff --git a/installers/install_kubeadm_cluster.sh b/installers/install_kubeadm_cluster.sh index 70f6d9da..6f3aa878 100755 --- a/installers/install_kubeadm_cluster.sh +++ b/installers/install_kubeadm_cluster.sh @@ -15,10 +15,11 @@ set +eux -#installs kubernetes packages +K8S_VERSION=1.23.3-00 + +# installs kubernetes packages function install_kube() { [ -z "${DEBUG_INSTALL}" ] || DEBUG beginning of function - K8S_VERSION=1.23.3-00 # Kubernetes releases can be found here: https://kubernetes.io/releases/ # To check other available versions, run the following command # curl -s https://packages.cloud.google.com/apt/dists/kubernetes-xenial/main/binary-amd64/Packages | grep Version | awk '{print $2}' @@ -36,39 +37,63 @@ EOF [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function } -#initializes kubernetes control plane +# check and track kube packages installation +function check_and_track_kube_install() { + [ -z "${DEBUG_INSTALL}" ] || DEBUG beginning of function + kubelet_version=$(dpkg -s kubelet|grep Version|awk '{print $2}') + [ -n "${kubelet_version}" ] || FATAL_TRACK k8scluster "Kubelet was not installed." + kubeadm_version=$(dpkg -s kubeadm|grep Version|awk '{print $2}') + [ -n "${kubeadm_version}" ] || FATAL_TRACK k8scluster "Kubeadm was not installed." + kubectl_version=$(dpkg -s kubectl|grep Version|awk '{print $2}') + [ -n "${kubectl_version}" ] || FATAL_TRACK k8scluster "Kubectl was not installed." + track k8scluster install_k8s_ok kubelet ${kubelet_version} none none kubeadm ${kubeadm_version} none none kubectl ${kubectl_version} none none + [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function +} + +# initializes kubernetes control plane function init_kubeadm() { [ -z "${DEBUG_INSTALL}" ] || DEBUG beginning of function sudo swapoff -a sudo sed -i.bak '/.*none.*swap/s/^\(.*\)$/#\1/g' /etc/fstab + sudo kubeadm init --dry-run || FATAL_TRACK k8scluster "kubeadm init dry-run failed" sudo kubeadm init --config $1 sleep 5 [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function } +# Initializes kubeconfig file function kube_config_dir() { [ -z "${DEBUG_INSTALL}" ] || DEBUG beginning of function K8S_MANIFEST_DIR="/etc/kubernetes/manifests" - [ ! -d $K8S_MANIFEST_DIR ] && FATAL "Cannot Install Kubernetes" + [ ! -d $K8S_MANIFEST_DIR ] && FATAL_TRACK k8scluster "Kubernetes folder $K8S_MANIFEST_DIR was not found" mkdir -p $HOME/.kube sudo cp /etc/kubernetes/admin.conf $HOME/.kube/config sudo chown $(id -u):$(id -g) $HOME/.kube/config [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function } -#deploys flannel as daemonsets +# test kubernetes installation +function check_and_track_init_k8s() { + [ -z "${DEBUG_INSTALL}" ] || DEBUG beginning of function + kubectl get ns || FATAL_TRACK k8scluster "Failed getting namespaces" + track k8scluster init_k8s_ok + [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function +} + +# deploys flannel as daemonsets function deploy_cni_provider() { [ -z "${DEBUG_INSTALL}" ] || DEBUG beginning of function CNI_DIR="$(mktemp -d -q --tmpdir "flannel.XXXXXX")" trap 'rm -rf "${CNI_DIR}"' EXIT - wget --retry-on-host-error --retry-on-http-error 404,429,503 --tries=5 https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml -P $CNI_DIR - [ ! -f $CNI_DIR/kube-flannel.yml ] && FATAL "Cannot Install Flannel because $CNI_DIR/kube-flannel.yml was not found. Maybe the file https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml is temporarily not accessible" + KUBE_FLANNEL_FILE_URL="https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml" + wget --retry-on-host-error --retry-on-http-error 404,429,503 --tries=5 "${KUBE_FLANNEL_FILE_URL}" -P $CNI_DIR + [ ! -f $CNI_DIR/kube-flannel.yml ] && FATAL_TRACK k8scluster "Cannot Install Flannel because $CNI_DIR/kube-flannel.yml was not found. Maybe the file ${KUBE_FLANNEL_FILE_URL} is temporarily not accessible" kubectl apply -f $CNI_DIR - [ $? -ne 0 ] && FATAL "Cannot Install Flannel" + [ $? -ne 0 ] && FATAL_TRACK k8scluster "Cannot Install Flannel" [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function } -#taints K8s master node +# taints K8s master node function taint_master_node() { [ -z "${DEBUG_INSTALL}" ] || DEBUG beginning of function K8S_MASTER=$(kubectl get nodes | awk '$3~/master/'| awk '{print $1}') @@ -77,6 +102,14 @@ function taint_master_node() { [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function } +# check and track kube packages installation +function check_and_track_k8s_ready_before_helm() { + [ -z "${DEBUG_INSTALL}" ] || DEBUG beginning of function + kubectl get events || FATAL_TRACK k8scluster "Failed getting events" + track k8scluster k8s_ready_before_helm + [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function +} + #Install Helm v3 #Helm releases can be found here: https://github.com/helm/helm/releases function install_helm() { @@ -93,8 +126,9 @@ function install_helm() { else echo "Helm3 is already installed. Skipping installation..." fi - helm repo add stable https://charts.helm.sh/stable - helm repo update + helm version || FATAL_TRACK k8scluster "Could not obtain helm version. Maybe helm client was not installed" + helm repo add stable https://charts.helm.sh/stable || FATAL_TRACK k8scluster "Helm repo stable could not be added" + helm repo update || FATAL_TRACK k8scluster "Helm repo stable could not be updated" [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function } @@ -127,7 +161,7 @@ function install_k8s_storageclass() { sleep 15 fi done - [ -n "$storageclass_ready" ] || FATAL "Storageclass not ready after $storageclass_timeout seconds. Cannot install openebs" + [ -n "$storageclass_ready" ] || FATAL_TRACK k8scluster "Storageclass not ready after $storageclass_timeout seconds. Cannot install openebs" kubectl patch storageclass openebs-hostpath -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}' [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function } @@ -226,7 +260,7 @@ function check_for_readiness() { if [[ (${failures_in_a_row} -ge ${failures_threshold}) ]] then echo - FATAL "K8S CLUSTER IS BROKEN" + FATAL_TRACK k8scluster "K8S CLUSTER IS BROKEN" else echo echo "K8S CLUSTER IS READY" @@ -284,21 +318,26 @@ echo "HOME=$HOME" install_kube -track k8scluster install_k8s_ok +check_and_track_kube_install + init_kubeadm $OSM_DOCKER_WORK_DIR/cluster-config.yaml kube_config_dir -track k8scluster init_k8s_ok +check_and_track_init_k8s + if [ -n "$INSTALL_K8S_MONITOR" ]; then # uninstall OSM MONITORING uninstall_k8s_monitoring track k8scluster uninstall_k8s_monitoring_ok fi -#remove old namespace + remove_k8s_namespace osm deploy_cni_provider taint_master_node +check_and_track_k8s_ready_before_helm + install_helm track k8scluster install_helm_ok + install_k8s_storageclass track k8scluster k8s_storageclass_ok install_helm_metallb diff --git a/installers/install_lxd.sh b/installers/install_lxd.sh new file mode 100755 index 00000000..a499eae3 --- /dev/null +++ b/installers/install_lxd.sh @@ -0,0 +1,117 @@ +#!/bin/bash +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set +eux + +function usage(){ + [ -z "${DEBUG_INSTALL}" ] || DEBUG beginning of function + echo -e "usage: $0 [OPTIONS]" + echo -e "Install Juju for OSM" + echo -e " OPTIONS" + echo -e " -h / --help: print this help" + echo -e " -D use local devops installation path" + echo -e " -H use specific juju host controller IP" + echo -e " -S use VCA/juju secret key" + echo -e " -P use VCA/juju public key file" + echo -e " -l: LXD cloud yaml file" + echo -e " -L: LXD credentials yaml file" + echo -e " -K: Specifies the name of the controller to use - The controller must be already bootstrapped" + echo -e " --debug: debug mode" + echo -e " --cachelxdimages: cache local lxd images, create cronjob for that cache (will make installation longer)" + echo -e " --nojuju: do not juju, assumes already installed" + [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function +} + +function install_lxd() { + [ -z "${DEBUG_INSTALL}" ] || DEBUG beginning of function + # Apply sysctl production values for optimal performance + sudo cp ${OSM_DEVOPS}/installers/60-lxd-production.conf /etc/sysctl.d/60-lxd-production.conf + sudo sysctl --system + + # Install LXD snap + sudo apt-get remove --purge -y liblxc1 lxc-common lxcfs lxd lxd-client + snap info lxd | grep installed > /dev/null + if [ $? -eq 0 ]; then + sudo snap refresh lxd --channel $LXD_VERSION/stable + else + sudo snap install lxd --channel $LXD_VERSION/stable + fi + + # Configure LXD + sudo usermod -a -G lxd `whoami` + cat ${OSM_DEVOPS}/installers/lxd-preseed.conf | sed 's/^config: {}/config:\n core.https_address: '$DEFAULT_IP':8443/' | sg lxd -c "lxd init --preseed" + sg lxd -c "lxd waitready" + DEFAULT_IF=$(ip route list|awk '$1=="default" {print $5; exit}') + [ -z "$DEFAULT_IF" ] && DEFAULT_IF=$(route -n |awk '$1~/^0.0.0.0/ {print $8; exit}') + [ -z "$DEFAULT_IF" ] && FATAL "Not possible to determine the interface with the default route 0.0.0.0" + DEFAULT_MTU=$(ip addr show ${DEFAULT_IF} | perl -ne 'if (/mtu\s(\d+)/) {print $1;}') + sg lxd -c "lxc profile device set default eth0 mtu $DEFAULT_MTU" + sg lxd -c "lxc network set lxdbr0 bridge.mtu $DEFAULT_MTU" + # sudo systemctl stop lxd-bridge + # sudo systemctl --system daemon-reload + # sudo systemctl enable lxd-bridge + # sudo systemctl start lxd-bridge + [ -z "${DEBUG_INSTALL}" ] || DEBUG end of function +} + +DEBUG_INSTALL="" +LXD_VERSION=5.0 +OSM_DEVOPS= + +# main +while getopts ":D:d:i:-: h" o; do + case "${o}" in + i) + DEFAULT_IP="${OPTARG}" + ;; + d) + OSM_DOCKER_WORK_DIR="${OPTARG}" + ;; + D) + OSM_DEVOPS="${OPTARG}" + ;; + -) + [ "${OPTARG}" == "help" ] && usage && exit 0 + [ "${OPTARG}" == "debug" ] && DEBUG_INSTALL="y" && continue + echo -e "Invalid option: '--$OPTARG'\n" >&2 + exit 1 + ;; + :) + echo "Option -$OPTARG requires an argument" >&2 + exit 1 + ;; + \?) + echo -e "Invalid option: '-$OPTARG'\n" >&2 + exit 1 + ;; + h) + usage && exit 0 + ;; + *) + exit 1 + ;; + esac +done + +source $OSM_DEVOPS/common/logging +source $OSM_DEVOPS/common/track + +echo "DEBUG_INSTALL=$DEBUG_INSTALL" +echo "DEFAULT_IP=$DEFAULT_IP" +echo "OSM_DEVOPS=$OSM_DEVOPS" + +[ -z "$INSTALL_NOJUJU" ] && install_lxd +track juju juju_install_ok + diff --git a/installers/test_track.sh b/installers/test_track.sh index 65e6e86f..3655c5e1 100755 --- a/installers/test_track.sh +++ b/installers/test_track.sh @@ -48,6 +48,6 @@ track test-event op1 value1 "My comment1 on test event" none op2 value2 "My comm sleep 1 echo echo "Next track call will be done from function FATAL_TRACK" -echo 'FATAL_TRACK test-event "Fatal error during execution"' -FATAL_TRACK test-event "Fatal error during execution" +echo 'FATAL_TRACK test-fatal "Fatal error during execution"' +FATAL_TRACK test-fatal "Fatal error during execution"