X-Git-Url: https://git.adam-barratt.org.uk/?a=blobdiff_plain;f=modules%2Fpostgres%2Ftemplates%2Fbackup_server%2Fpostgres-make-base-backups.erb;h=56d7f7777bd53bf9b07528b9abb09dfd4abb57bf;hb=7b5f8bd99839bcbd9080209121f58f9aee5a9831;hp=eaac39a174b11bcb2d370adfbc1b3c0f5bf1de08;hpb=ba9d4e7eceff2f88efbac2b16a4f08997ccd9d80;p=mirror%2Fdsa-puppet.git diff --git a/modules/postgres/templates/backup_server/postgres-make-base-backups.erb b/modules/postgres/templates/backup_server/postgres-make-base-backups.erb index eaac39a17..56d7f7777 100755 --- a/modules/postgres/templates/backup_server/postgres-make-base-backups.erb +++ b/modules/postgres/templates/backup_server/postgres-make-base-backups.erb @@ -36,11 +36,20 @@ MIN_WAIT=$(( 60*60*4 )) MIN_WAIT_SUCCESS=$(( 60*60*24*7 )) MAX_WAIT_SUCCESS=$(( 60*60*24*10 )) - STATEDIR=/var/lib/dsa/postgres-make-base-backups +#### set -u +if [ "$(id -u)" = 0 ]; then + echo >&2 "Do not run me as root. Probably you want sudo -u debbackup." + exit 1 +fi + +SELF="`basename "$0"`[$$]" +DELTA_WAIT_SUCCESS=$(( MAX_WAIT_SUCCESS - MIN_WAIT_SUCCESS )) +MYHOSTNAME=$(hostname -f) + if [ -t 0 ]; then verbose=1 else @@ -49,6 +58,15 @@ fi log() { [ "$verbose" -gt 0 ] && echo "$*" + logger -p daemon.info -t "$SELF" "$*" +} +format_timedelta() { + local secs="$1"; shift + if [ "$secs" -ge 86400 ]; then + printf '%d+%02d:%02d:%02d\n' $(($secs/3600/24)) $(($secs/3600%24)) $(($secs/60%60)) $(($secs%60)) + else + printf '%02d:%02d:%02d\n' $(($secs/3600)) $(($secs/60%60)) $(($secs%60)) + fi } @@ -75,9 +93,6 @@ if [ -z "$forcehostport" ]; then fi fi -DELTA_WAIT_SUCCESS=$(( MAX_WAIT_SUCCESS - MIN_WAIT_SUCCESS )) -MYHOSTNAME=$(hostname -f) - while read host port username cluster version; do [ "${host#\#}" = "$host" ] || continue [ -z "$host" ] && continue @@ -86,48 +101,48 @@ while read host port username cluster version; do flagfilesuccess="$STATEDIR/$host-$port.last-success" if [ -n "$forcehostport" ]; then if [ "$forcehostport" != "$host:$port" ]; then - log "Skipping $host:$port $version/$cluster." + log "Skipping $host:$port $version/$cluster because this run is limited to $forcehostport." runme=0 else - log "Running forced $host:$port $version/$cluster." + log "Forcing $host:$port $version/$cluster run." runme=1 fi else if ! [ -e "$flagfile" ]; then runme=1 - log "Running $host:$port $version/$cluster because no flag file exists." + log "Planning to run $host:$port $version/$cluster because no flag file exists." else now=$(date +%s) mtime="$(stat --printf "%Y" "$flagfile")" delta=$(( now - mtime )) if [ "$delta" -lt "$MIN_WAIT" ]; then runme=0 - log "Not running $host:$port $version/$cluster because last attempt was only ${delta}s ago." + log "Skipping $host:$port $version/$cluster because last attempt was only $(format_timedelta "${delta}") (< $(format_timedelta "${MIN_WAIT}")) ago." else if ! [ -e "$flagfilesuccess" ]; then runme=1 - log "Running $host:$port $version/$cluster because no success flag exists." + log "Planning to run $host:$port $version/$cluster because no success flag exists." else mtime="$(stat --printf "%Y" "$flagfilesuccess")" delta=$(( now - mtime )) if [ "$delta" -lt "$MIN_WAIT_SUCCESS" ]; then runme=0 - log "Not running $host:$port $version/$cluster because last success was only ${delta}s ago." + log "Skipping $host:$port $version/$cluster because last success was only $(format_timedelta "${delta}") (< $(format_timedelta "${MIN_WAIT_SUCCESS}")) ago." elif [ "$delta" -gt "$MAX_WAIT_SUCCESS" ]; then runme=1 - log "Running $host:$port $version/$cluster because last success was ${delta}s ago." + log "Planning to run $host:$port $version/$cluster because last success was $(format_timedelta "${delta}") (>= $(format_timedelta "${MAX_WAIT_SUCCESS}")) ago." else # get a "randomish" but stable value for this backup run val=$(echo "$MYHOSTNAME-$host-$port-$mtime" | sha256sum | head -c 8) val=$((16#$val)) - valmod=$(($val % $DELTA_WAIT_SUCCESS)) - after_min=$((delta - MIN_WAIT_SUCCESS)) - if [ "$after_min" -gt "$valmod" ]; then - runme=1 - log "Running $host:$port $version/$cluster because random computer says so ($after_min > $valmod)." - else + rnd_cuttoff=$(($val % $DELTA_WAIT_SUCCESS)) + age_after_min=$((delta - MIN_WAIT_SUCCESS)) + if [ "$age_after_min" -lt "$rnd_cuttoff" ]; then runme=0 - log "Not running $host:$port $version/$cluster because random computer says wait ($after_min <= $valmod)." + log "Skipping $host:$port $version/$cluster because random computer says wait ([$(format_timedelta "${age_after_min}") < $(format_timedelta "${rnd_cuttoff}") (< $(format_timedelta "${DELTA_WAIT_SUCCESS}"))] + $(format_timedelta "${MIN_WAIT_SUCCESS}"))." + else + runme=1 + log "Planning to run $host:$port $version/$cluster because random computer says so ($(format_timedelta "${age_after_min}") >= $(format_timedelta "${rnd_cuttoff}"))." fi fi fi @@ -137,20 +152,22 @@ while read host port username cluster version; do if [ "$runme" -gt 0 ]; then touch "$flagfile" - /usr/local/bin/postgres-make-one-base-backup "$host" "$port" "$username" "$cluster" "$version" - rc=$? - [ "$rc" = 0 ] && touch "$flagfilesuccess" + exec 201< "$flagfile" + if flock -w 0 -e 201; then + log "Running $host:$port $version/$cluster." + /usr/local/bin/postgres-make-one-base-backup "$host" "$port" "$username" "$cluster" "$version" + rc=$? + log "Base backup for $host:$port $version/$cluster exited with rc $rc." + [ "$rc" = 0 ] && touch "$flagfilesuccess" + flock -u 201 + else + log "Cannot acquire lock on $flagfile, skipping $host:$port $version/$cluster." + fi fi done << EOF -seger.debian.org 5432 debian-backup dak 9.6 bmdb1.debian.org 5435 debian-backup main 9.6 -bmdb1.debian.org 5436 debian-backup wannabuild 9.6 bmdb1.debian.org 5440 debian-backup debsources 9.6 -fasolo.debian.org 5433 debian-backup dak 9.6 -sibelius.debian.org 5433 debian-backup snapshot 9.4 -<%- if @hostname != "backuphost" -%> -moszumanska.debian.org 5432 debian-backup main 9.1 -<%- end -%> +sallinen.debian.org 5473 debian-backup snapshot 9.6 # # puppet notice: this is just a partial file. The tail EOF comes # from a different concat fragment