MIN_WAIT=$(( 60*60*4 ))
MIN_WAIT_SUCCESS=$(( 60*60*24*7 ))
MAX_WAIT_SUCCESS=$(( 60*60*24*10 ))
-
STATEDIR=/var/lib/dsa/postgres-make-base-backups
+####
set -u
+SELF="`basename "$0"`[$$]"
+DELTA_WAIT_SUCCESS=$(( MAX_WAIT_SUCCESS - MIN_WAIT_SUCCESS ))
+MYHOSTNAME=$(hostname -f)
+
if [ -t 0 ]; then
verbose=1
else
log() {
[ "$verbose" -gt 0 ] && echo "$*"
+ logger -p daemon.info -t "$SELF" "$*"
+}
+format_timedelta() {
+ local secs="$1"; shift
+ if [ "$secs" -ge 86400 ]; then
+ printf '%d+%02d:%02d:%02d\n' $(($secs/3600/24)) $(($secs/3600%24)) $(($secs/60%60)) $(($secs%60))
+ else
+ printf '%02d:%02d:%02d\n' $(($secs/3600)) $(($secs/60%60)) $(($secs%60))
+ fi
}
fi
fi
-DELTA_WAIT_SUCCESS=$(( MAX_WAIT_SUCCESS - MIN_WAIT_SUCCESS ))
-MYHOSTNAME=$(hostname -f)
-
while read host port username cluster version; do
[ "${host#\#}" = "$host" ] || continue
[ -z "$host" ] && continue
flagfilesuccess="$STATEDIR/$host-$port.last-success"
if [ -n "$forcehostport" ]; then
if [ "$forcehostport" != "$host:$port" ]; then
- log "Skipping $host:$port $version/$cluster."
+ log "Skipping $host:$port $version/$cluster because this run is limited to $forcehostport."
runme=0
else
- log "Running forced $host:$port $version/$cluster."
+ log "Forcing $host:$port $version/$cluster run."
runme=1
fi
else
if ! [ -e "$flagfile" ]; then
runme=1
- log "Running $host:$port $version/$cluster because no flag file exists."
+ log "Planning to run $host:$port $version/$cluster because no flag file exists."
else
now=$(date +%s)
mtime="$(stat --printf "%Y" "$flagfile")"
delta=$(( now - mtime ))
if [ "$delta" -lt "$MIN_WAIT" ]; then
runme=0
- log "Not running $host:$port $version/$cluster because last attempt was only ${delta}s ago."
+ log "Skipping $host:$port $version/$cluster because last attempt was only $(format_timedelta "${delta}") (< $(format_timedelta "${MIN_WAIT}")) ago."
else
if ! [ -e "$flagfilesuccess" ]; then
runme=1
- log "Running $host:$port $version/$cluster because no success flag exists."
+ log "Planning to run $host:$port $version/$cluster because no success flag exists."
else
mtime="$(stat --printf "%Y" "$flagfilesuccess")"
delta=$(( now - mtime ))
if [ "$delta" -lt "$MIN_WAIT_SUCCESS" ]; then
runme=0
- log "Not running $host:$port $version/$cluster because last success was only ${delta}s ago."
+ log "Skipping $host:$port $version/$cluster because last success was only $(format_timedelta "${delta}") (< $(format_timedelta "${MIN_WAIT_SUCCESS}")) ago."
elif [ "$delta" -gt "$MAX_WAIT_SUCCESS" ]; then
runme=1
- log "Running $host:$port $version/$cluster because last success was ${delta}s ago."
+ log "Planning to run $host:$port $version/$cluster because last success was $(format_timedelta "${delta}") (>= $(format_timedelta "${MAX_WAIT_SUCCESS}")) ago."
else
# get a "randomish" but stable value for this backup run
val=$(echo "$MYHOSTNAME-$host-$port-$mtime" | sha256sum | head -c 8)
val=$((16#$val))
- valmod=$(($val % $DELTA_WAIT_SUCCESS))
- after_min=$((delta - MIN_WAIT_SUCCESS))
- if [ "$after_min" -gt "$valmod" ]; then
- runme=1
- log "Running $host:$port $version/$cluster because random computer says so ($after_min > $valmod)."
- else
+ rnd_cuttoff=$(($val % $DELTA_WAIT_SUCCESS))
+ age_after_min=$((delta - MIN_WAIT_SUCCESS))
+ if [ "$age_after_min" -lt "$rnd_cuttoff" ]; then
runme=0
- log "Not running $host:$port $version/$cluster because random computer says wait ($after_min <= $valmod)."
+ log "Skipping $host:$port $version/$cluster because random computer says wait ([$(format_timedelta "${age_after_min}") < $(format_timedelta "${rnd_cuttoff}") (< $(format_timedelta "${DELTA_WAIT_SUCCESS}"))] + $(format_timedelta "${MIN_WAIT_SUCCESS}"))."
+ else
+ runme=1
+ log "Planning to run $host:$port $version/$cluster because random computer says so ($(format_timedelta "${age_after_min}") >= $(format_timedelta "${rnd_cuttoff}"))."
fi
fi
fi
if [ "$runme" -gt 0 ]; then
touch "$flagfile"
- /usr/local/bin/postgres-make-one-base-backup "$host" "$port" "$username" "$cluster" "$version"
- rc=$?
- [ "$rc" = 0 ] && touch "$flagfilesuccess"
+ exec 201< "$flagfile"
+ if flock -w 0 -e 201; then
+ log "Running $host:$port $version/$cluster."
+ /usr/local/bin/postgres-make-one-base-backup "$host" "$port" "$username" "$cluster" "$version"
+ rc=$?
+ log "Base backup for $host:$port $version/$cluster exited with rc $rc."
+ [ "$rc" = 0 ] && touch "$flagfilesuccess"
+ flock -u 201
+ else
+ log "Cannot acquire lock on $flagfile, skipping $host:$port $version/$cluster."
+ fi
fi
done << EOF
seger.debian.org 5432 debian-backup dak 9.6
bmdb1.debian.org 5436 debian-backup wannabuild 9.6
bmdb1.debian.org 5440 debian-backup debsources 9.6
fasolo.debian.org 5433 debian-backup dak 9.6
-sibelius.debian.org 5433 debian-backup snapshot 9.4
-<%- if @hostname != "backuphost" -%>
-moszumanska.debian.org 5432 debian-backup main 9.1
-<%- end -%>
+sallinen.debian.org 5473 debian-backup snapshot 9.6
#
# puppet notice: this is just a partial file. The tail EOF comes
# from a different concat fragment