8 echo "Usage: $0 [-n <node-list>] [-f] [ -M <newmaster> ] [up|down]"
9 echo " -M is for internal use only (used in n>2 clusters if we want to reboot the master first)"
20 while getopts "fhn:M:" OPTION; do
34 if ! [ -e "$nodelist" ]; then
35 echo >&2 "nodelist $nodelist not found."
43 shift $(($OPTIND - 1))
46 [ "$#" -ge 1 ] && shift
49 down) print_list=cat;;
53 [ "$#" -gt 0 ] && error_usage
56 gnt-instance list --no-headers -o status --filter '(pnode == "'"$1"'")' | grep -c -v ADMIN_down
59 if [ "$(count_instances "$1")" != 0 ]; then
74 if has_instances "$tgt"; then
75 echo >&2 "$tgt not empty."
79 ssh -n -l root "$tgt" shutdown -r 1 "'reboot requested by $0 on $(hostname -f)'"
81 # wait for target to go down:
82 max_wait='300 seconds'
83 wait_until=$(date -d "now +$max_wait" +%s)
84 while ping -c 5 -q "$tgt" > /dev/null; do
85 echo "[$(date)] $tgt is still up (will wait until $(date -d "@$wait_until")."
87 if [ "$(date +%s)" -gt "$wait_until" ]; then
88 echo >&2 "Giving up on waiting for $tgt to go down."
94 echo "[$(date)] $tgt is down. Pausing for $sleep_time seconds"
98 wait_until=$(date -d "now +$max_wait" +%s)
99 while ! ping -c 5 -q "$tgt" > /dev/null; do
100 echo "[$(date)] $tgt is still down (will wait until $(date -d "@$wait_until")."
101 if [ "$(date +%s)" -gt "$wait_until" ]; then
102 echo >&2 "Giving up on waiting for $tgt to come back."
109 echo "[$(date)] $tgt is up. Pausing for $sleep_time seconds"
112 max_wait='180 minutes'
113 wait_until=$(date -d "now +$max_wait" +%s)
114 while ! ssh -n -l root "$tgt" systemctl is-system-running; do
115 echo "[$(date)] $tgt is still booting up (will wait until $(date -d "@$wait_until")."
116 if [ "$(date +%s)" -gt "$wait_until" ]; then
117 echo >&2 "Giving up on waiting for $tgt to come back."
124 echo "[$(date)] $tgt has finished booting. Pausing for $sleep_time seconds"
128 # move down, i.e. from 2 to 1, ..., 14 to 13.
130 first_tgt="$(${print_list} "$nodelist" | head -n1 | awk '{print $1}')"
131 last_node="$(${print_list} "$nodelist" | tail -n1 | awk '{print $1}')"
134 if has_instances "$first_tgt"; then
135 echo "$first_tgt not empty."
139 if [ "$me" != "$last_node" ]; then
140 echo "Making $last_node the new master"
141 ssh -n -l root "$last_node" gnt-cluster master-failover
142 echo "relaunching reboot-cluster on $last_node"
143 tmp="$(ssh -n -l root -t "$last_node" tempfile)"
144 scp "$nodelist" "$last_node:$tmp"
145 ssh -l root -t "$last_node" screen -S reboot-cluster -m sh -c "\"echo Relaunched on $last_node; ganeti-reboot-cluster -f -n '$tmp' -M '$me' '$direction'; echo ganeti-reboot-cluster exited with \$?.; sleep 12h\""
146 echo >&1 "fell through!"
150 ${print_list} "$nodelist" | (
152 while read src dummy; do
153 if has_instances "$tgt"; then
154 echo "$tgt not empty."
159 if has_instances "$src"; then
160 echo "Migrating from $src to $tgt."
161 if ! gnt-node migrate -f -n "$tgt" "$src"; then
162 echo >&2 "gnt-node migrate exited with an error. Bailing out."
166 echo "nothing to migrate from $src to $tgt"
171 if has_instances "$tgt"; then
172 echo "$tgt not empty."
176 if ! [ "$tgt" = "$me" ]; then
177 echo >&2 "I was expecting $tgt to be me ($me) here."
181 if [ "$newmaster" != "" ]; then
182 echo "Making $newmaster the new master"
183 ssh -n -l root "$newmaster" gnt-cluster master-failover
185 shutdown -r 1 "reboot requested by $0"
192 if ! grep -q -F "$me" "$nodelist"; then
193 echo >&2 "my hostname ($me) not found in nodelist"
196 them="$(grep -v -F "$me" "$nodelist")"
198 echo "Migrating from $them to $me."
199 if ! gnt-node migrate -f -n "$me" "$them"; then
200 echo >&2 "gnt-node migrate exited with an error. Bailing out."
205 echo "Activating disks.."
206 for instance in $( gnt-instance list -o name --no-headers --filter 'status == "running"' ); do
207 echo " - $instance ..."
208 if ! gnt-instance activate-disks "$instance"; then
209 echo >&2 "gnt-instance activate-disks $instance failed. Bailing out."
214 if [ -e /proc/drbd ]; then
215 echo "Waiting for drbd to be consistent."
217 while egrep -C2 --color -i 'iconsistent|finish' /proc/drbd || ! /usr/lib/nagios/plugins/dsa-check-drbd -d All ; do
218 echo "Still waiting.."
223 echo "Migrating from $me to $them."
224 if ! gnt-node migrate -f -n "$them" "$me"; then
225 echo >&2 "gnt-node migrate exited with an error. Bailing out."
229 at 'now + 30 min' << 'EOF'
230 screen -S hbal -d -m sh -c '
231 echo "Activating disks.."
232 for instance in $( gnt-instance list -o name --no-headers --filter "status == \"running\"" ); do
233 echo " - $instance ..."
234 if ! gnt-instance activate-disks "$instance"; then
235 echo >&2 "Warning: gnt-instance activate-disks $instance failed."
248 /sbin/shutdown -k 30 < /dev/null
250 gnt-cluster watcher pause 30m
252 for i in $(gnt-instance list --no-headers -o name); do
253 gnt-instance shutdown --no-remember --submit $i
256 while pgrep -c '^qemu-|^kvm$' -u root ; do
258 gnt-cluster watcher pause 30m
261 at 'now + 5 min' << EOF
263 gnt-cluster watcher continue
268 /sbin/shutdown -r 1 </dev/null
271 if [ "${TMUX:-}" = "" ] && [ "${STY:-}" = "" ] ; then
272 echo >&2 "Might want to launch me in a screen or tmux."
276 if ! [ "$force" = 1 ]; then
284 if ! [ -e "$nodelist" ]; then
286 trap "rm -f '$tmp'" EXIT
287 gnt-node list --no-headers -o name > "$tmp"
291 lines=$(wc -l < "$nodelist")
294 echo >&2 "nodelist $nodelist empty."
298 case "$(hostname -f)" in
303 echo >&2 "Only one node."