From: Peter Palfrader Date: Mon, 23 Sep 2019 07:24:21 +0000 (+0200) Subject: First attempt at a reboot script for the ubc arm cluster X-Git-Url: https://git.adam-barratt.org.uk/?a=commitdiff_plain;h=90ececad553da56a7fbf8e6b30e5f70ab93b11d5;p=mirror%2Fdsa-puppet.git First attempt at a reboot script for the ubc arm cluster --- diff --git a/modules/ganeti2/files/ganeti-reboot-cluster b/modules/ganeti2/files/ganeti-reboot-cluster index 45c9fa800..86b6dfd93 100755 --- a/modules/ganeti2/files/ganeti-reboot-cluster +++ b/modules/ganeti2/files/ganeti-reboot-cluster @@ -36,6 +36,17 @@ error_usage() { usage >&2 exit 1 } +do_cleanup() { + local cnt + cnt=$((${#cleanup[*]}-1)) + for i in $(seq ${cnt} -1 0); do + ${cleanup[$i]} || true + done +} +declare -a cleanup +cleanup+=(":") +trap do_cleanup EXIT + nodelist="node-list" newmaster="" @@ -174,10 +185,6 @@ moveupdown() { ${print_list} "$nodelist" | ( read tgt dummy while read src dummy; do - if has_instances "$tgt"; then - echo "$tgt not empty." - exit 1 - fi reboot_host "$tgt" if has_instances "$src"; then @@ -211,13 +218,65 @@ moveupdown() { ) } +crossmigratemany() { + me=$(hostname -f) + if ! grep -q --line-regexp --fixed-strings "$me" "$nodelist"; then + echo >&2 "my hostname ($me) not found in nodelist" + exit 1 + fi + + # move ourselves last + newlist="$(tempfile)" + cleanup+=("rm -f '$newlist'") + grep -v --line-regexp --fixed-strings "$me" "$nodelist" > "$newlist" + echo "$me" >> "$newlist" + + while read node ; do + if ! hbal -L -C -v -v --no-disk-moves --offline="$node" -X; then + echo >&2 "hbal failed at node $node. Bailing out." + exit 1 + fi + if ! gnt-node migrate -f "$node"; then + echo >&2 "gnt-node migrate failed for node $node. Bailing out." + exit 1 + fi + if [ "$node" = "$me" ] ; then + break + fi + reboot_host "$node" + # bring back disks + echo "Bringing back disks using the watcher" + ganeti-watcher + # wait for a cron-launched ganeti-watcher to finish + while pgrep ganeti-watcher > /dev/null ; do + echo -n "." + sleep 5 + done + echo + done < "$newlist" + + at 'now + 5 min' << 'EOF' +screen -S hbal -d -m sh -c ' + echo "Activating disks using the watcher.." + ganeti-watcher + while pgrep ganeti-watcher > /dev/null ; do + sleep 5 + done + hbal -L -C -v -v --no-disk-moves -X + echo "done." + sleep 1h +' +EOF + reboot_host "$me" +} + crossmigrate() { me=$(hostname -f) - if ! grep -q -F "$me" "$nodelist"; then + if ! grep -q --line-regexp --fixed-strings "$me" "$nodelist"; then echo >&2 "my hostname ($me) not found in nodelist" exit 1 fi - them="$(grep -v -F "$me" "$nodelist")" + them="$(grep -v --line-regexp --fixed-strings "$me" "$nodelist")" echo "Migrating from $them to $me." if ! gnt-node migrate -f -n "$me" "$them"; then @@ -307,7 +366,7 @@ fi ################### if ! [ -e "$nodelist" ]; then tmp="$(tempfile)" - trap "rm -f '$tmp'" EXIT + cleanup+=("rm -f '$tmp'") gnt-node list --no-headers -o name > "$tmp" nodelist="$tmp" fi @@ -331,6 +390,11 @@ case "$lines" in 2) crossmigrate ;; + 3) + echo "WARNING: this is untested. ^C now if you want to stop" + read dummy + crossmigratemany + ;; *) moveupdown ;;