#!/bin/bash # reboot a ganeti cluster, making sure instances are moved around before and after # Copyright 2018, 2019 Peter Palfrader # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. set -e set -o pipefail set -u usage() { echo "Usage: $0 [-n ] [-f] [ -M ] [up|down]" echo " -M is for internal use only (used in n>2 clusters if we want to reboot the master first)" } error_usage() { usage >&2 exit 1 } do_cleanup() { local cnt cnt=$((${#cleanup[*]}-1)) for i in $(seq ${cnt} -1 0); do ${cleanup[$i]} || true done } declare -a cleanup cleanup+=(":") trap do_cleanup EXIT nodelist="node-list" newmaster="" force="0" while getopts "fhn:M:" OPTION; do case "$OPTION" in f) force="1" ;; h) usage exit 0 ;; M) newmaster="$OPTARG" ;; n) nodelist="$OPTARG" if ! [ -e "$nodelist" ]; then echo >&2 "nodelist $nodelist not found." exit 1 fi ;; *) error_usage esac done shift $(($OPTIND - 1)) direction="${1:-up}" [ "$#" -ge 1 ] && shift case "$direction" in up) print_list=tac;; down) print_list=cat;; *) error_usage;; esac [ "$#" -gt 0 ] && error_usage count_instances() { gnt-instance list --no-headers -o status --filter '(pnode == "'"$1"'")' | grep -c -v ADMIN_down } has_instances() { if [ "$(count_instances "$1")" != 0 ]; then return 0 else return 1 fi } reboot_host() { local tgt local max_wait local wait_until local sleep_time tgt="$1" if has_instances "$tgt"; then echo >&2 "$tgt not empty." exit 1 fi ssh -n -l root "$tgt" shutdown -r 1 "'reboot requested by $0 on $(hostname -f)'" # wait for target to go down: max_wait='300 seconds' wait_until=$(date -d "now +$max_wait" +%s) while ping -c 5 -q "$tgt" > /dev/null; do echo "[$(date)] $tgt is still up (will wait until $(date -d "@$wait_until")." sleep 10 if [ "$(date +%s)" -gt "$wait_until" ]; then echo >&2 "Giving up on waiting for $tgt to go down." exit 1 fi done sleep_time=30 echo "[$(date)] $tgt is down. Pausing for $sleep_time seconds" sleep "$sleep_time" max_wait='15 minutes' wait_until=$(date -d "now +$max_wait" +%s) while ! ping -c 5 -q "$tgt" > /dev/null; do echo "[$(date)] $tgt is still down (will wait until $(date -d "@$wait_until")." if [ "$(date +%s)" -gt "$wait_until" ]; then echo >&2 "Giving up on waiting for $tgt to come back." exit 1 fi sleep 10 done sleep_time=30 echo "[$(date)] $tgt is up. Pausing for $sleep_time seconds" sleep "$sleep_time" max_wait='180 minutes' wait_until=$(date -d "now +$max_wait" +%s) while ! ssh -n -l root "$tgt" systemctl is-system-running; do echo "[$(date)] $tgt is still booting up (will wait until $(date -d "@$wait_until")." if [ "$(date +%s)" -gt "$wait_until" ]; then echo >&2 "Giving up on waiting for $tgt to come back." exit 1 fi sleep 10 done sleep_time=30 echo "[$(date)] $tgt has finished booting. Pausing for $sleep_time seconds" sleep "$sleep_time" } # move down, i.e. from 2 to 1, ..., 14 to 13. moveupdown() { first_tgt="$(${print_list} "$nodelist" | head -n1 | awk '{print $1}')" last_node="$(${print_list} "$nodelist" | tail -n1 | awk '{print $1}')" me=$(hostname -f) if has_instances "$first_tgt"; then echo "$first_tgt not empty." exit 1 fi if [ "$me" != "$last_node" ]; then echo "Making $last_node the new master" ssh -n -l root "$last_node" gnt-cluster master-failover echo "relaunching reboot-cluster on $last_node" tmp="$(ssh -n -l root -t "$last_node" tempfile)" scp "$nodelist" "$last_node:$tmp" ssh -l root -t "$last_node" screen -S reboot-cluster -m sh -c "\"echo Relaunched on $last_node; ganeti-reboot-cluster -f -n '$tmp' -M '$me' '$direction'; echo ganeti-reboot-cluster exited with \$?.; sleep 12h\"" echo >&1 "fell through!" exit 1 fi ${print_list} "$nodelist" | ( read tgt dummy while read src dummy; do reboot_host "$tgt" if has_instances "$src"; then echo "Migrating from $src to $tgt." if ! gnt-node migrate -f -n "$tgt" "$src"; then echo >&2 "gnt-node migrate exited with an error. Bailing out." exit 1 fi else echo "nothing to migrate from $src to $tgt" fi tgt="$src" done if has_instances "$tgt"; then echo "$tgt not empty." exit 1 fi if ! [ "$tgt" = "$me" ]; then echo >&2 "I was expecting $tgt to be me ($me) here." exit 1 fi if [ "$newmaster" != "" ]; then echo "Making $newmaster the new master" ssh -n -l root "$newmaster" gnt-cluster master-failover fi shutdown -r 1 "reboot requested by $0" exit ) } crossmigratemany() { me=$(hostname -f) if ! grep -q --line-regexp --fixed-strings "$me" "$nodelist"; then echo >&2 "my hostname ($me) not found in nodelist" exit 1 fi # move ourselves last newlist="$(tempfile)" cleanup+=("rm -f '$newlist'") grep -v --line-regexp --fixed-strings "$me" "$nodelist" > "$newlist" echo "$me" >> "$newlist" while read node ; do if ! hbal -L -C -v -v --no-disk-moves --offline="$node" -X; then echo >&2 "hbal failed at node $node. Bailing out." exit 1 fi if ! gnt-node migrate -f "$node"; then echo >&2 "gnt-node migrate failed for node $node. Bailing out." exit 1 fi if [ "$node" = "$me" ] ; then break fi reboot_host "$node" # bring back disks echo "Bringing back disks using the watcher" ganeti-watcher # wait for a cron-launched ganeti-watcher to finish while pgrep ganeti-watcher > /dev/null ; do echo -n "." sleep 5 done echo done < "$newlist" at 'now + 5 min' << 'EOF' screen -S hbal -d -m sh -c ' echo "Activating disks using the watcher.." ganeti-watcher while pgrep ganeti-watcher > /dev/null ; do sleep 5 done hbal -L -C -v -v --no-disk-moves -X echo "done." sleep 1h ' EOF reboot_host "$me" } crossmigrate() { me=$(hostname -f) if ! grep -q --line-regexp --fixed-strings "$me" "$nodelist"; then echo >&2 "my hostname ($me) not found in nodelist" exit 1 fi them="$(grep -v --line-regexp --fixed-strings "$me" "$nodelist")" echo "Migrating from $them to $me." if ! gnt-node migrate -f -n "$me" "$them"; then echo >&2 "gnt-node migrate exited with an error. Bailing out." exit 1 fi reboot_host "$them" echo "Activating disks.." for instance in $( gnt-instance list -o name --no-headers --filter 'status == "running"' ); do echo " - $instance ..." if ! gnt-instance activate-disks "$instance"; then echo >&2 "gnt-instance activate-disks $instance failed. Bailing out." exit 1 fi done if [ -e /proc/drbd ]; then echo "Waiting for drbd to be consistent." sleep 5 while egrep -C2 --color -i 'iconsistent|finish' /proc/drbd || ! /usr/lib/nagios/plugins/dsa-check-drbd -d All ; do echo "Still waiting.." sleep 5 done fi echo "Migrating from $me to $them." if ! gnt-node migrate -f -n "$them" "$me"; then echo >&2 "gnt-node migrate exited with an error. Bailing out." exit 1 fi at 'now + 30 min' << 'EOF' screen -S hbal -d -m sh -c ' echo "Activating disks.." for instance in $( gnt-instance list -o name --no-headers --filter "status == \"running\"" ); do echo " - $instance ..." if ! gnt-instance activate-disks "$instance"; then echo >&2 "Warning: gnt-instance activate-disks $instance failed." fi done hbal -L -C -v -X echo "done." sleep 1h ' EOF reboot_host "$me" } reboot_byrd() { /sbin/shutdown -k 30 < /dev/null sleep 15m gnt-cluster watcher pause 30m for i in $(gnt-instance list --no-headers -o name); do gnt-instance shutdown --no-remember --submit $i done while pgrep -c '^qemu-|^kvm$' -u root ; do sleep 15; gnt-cluster watcher pause 30m done at 'now + 5 min' << EOF sleep 4m; gnt-cluster watcher continue EOF /sbin/shutdown -c sleep 5 /sbin/shutdown -r 1 &2 "Might want to launch me in a screen or tmux." exit 1 fi if ! [ "$force" = 1 ]; then echo -n 'really? ' read really [ "$really" = "y" ] fi ### ensure_nodelist ################### if ! [ -e "$nodelist" ]; then tmp="$(tempfile)" cleanup+=("rm -f '$tmp'") gnt-node list --no-headers -o name > "$tmp" nodelist="$tmp" fi lines=$(wc -l < "$nodelist") case "$lines" in 0) echo >&2 "nodelist $nodelist empty." exit 1 ;; 1) case "$(hostname -f)" in byrd.debian.org) reboot_byrd ;; *) echo >&2 "Only one node." exit 1 esac ;; 2) crossmigrate ;; 3) echo "WARNING: this is untested. ^C now if you want to stop" read dummy crossmigratemany ;; *) moveupdown ;; esac