From ed498450d07565df2d9d0a5566b1d701841b94a9 Mon Sep 17 00:00:00 2001 From: Peter Palfrader Date: Tue, 6 Mar 2018 23:03:59 +0100 Subject: [PATCH] postgres-make-base-backups: locks and logs - get locks for each individual base backup so we do not run parallel ones in the precense of forced runs - also log to syslog --- .../postgres-make-base-backups.erb | 41 +++++++++++-------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/modules/postgres/templates/backup_server/postgres-make-base-backups.erb b/modules/postgres/templates/backup_server/postgres-make-base-backups.erb index eaac39a17..18046bbb9 100755 --- a/modules/postgres/templates/backup_server/postgres-make-base-backups.erb +++ b/modules/postgres/templates/backup_server/postgres-make-base-backups.erb @@ -36,11 +36,15 @@ MIN_WAIT=$(( 60*60*4 )) MIN_WAIT_SUCCESS=$(( 60*60*24*7 )) MAX_WAIT_SUCCESS=$(( 60*60*24*10 )) - STATEDIR=/var/lib/dsa/postgres-make-base-backups +#### set -u +SELF="`basename "$0"`[$$]" +DELTA_WAIT_SUCCESS=$(( MAX_WAIT_SUCCESS - MIN_WAIT_SUCCESS )) +MYHOSTNAME=$(hostname -f) + if [ -t 0 ]; then verbose=1 else @@ -49,6 +53,7 @@ fi log() { [ "$verbose" -gt 0 ] && echo "$*" + logger -p daemon.info -t "$SELF" "$*" } @@ -75,9 +80,6 @@ if [ -z "$forcehostport" ]; then fi fi -DELTA_WAIT_SUCCESS=$(( MAX_WAIT_SUCCESS - MIN_WAIT_SUCCESS )) -MYHOSTNAME=$(hostname -f) - while read host port username cluster version; do [ "${host#\#}" = "$host" ] || continue [ -z "$host" ] && continue @@ -86,36 +88,36 @@ while read host port username cluster version; do flagfilesuccess="$STATEDIR/$host-$port.last-success" if [ -n "$forcehostport" ]; then if [ "$forcehostport" != "$host:$port" ]; then - log "Skipping $host:$port $version/$cluster." + log "Skipping $host:$port $version/$cluster because this run is limited to $host:$port." runme=0 else - log "Running forced $host:$port $version/$cluster." + log "Forcing $host:$port $version/$cluster run." runme=1 fi else if ! [ -e "$flagfile" ]; then runme=1 - log "Running $host:$port $version/$cluster because no flag file exists." + log "Planning to run $host:$port $version/$cluster because no flag file exists." else now=$(date +%s) mtime="$(stat --printf "%Y" "$flagfile")" delta=$(( now - mtime )) if [ "$delta" -lt "$MIN_WAIT" ]; then runme=0 - log "Not running $host:$port $version/$cluster because last attempt was only ${delta}s ago." + log "Skipping $host:$port $version/$cluster because last attempt was only ${delta}s ago." else if ! [ -e "$flagfilesuccess" ]; then runme=1 - log "Running $host:$port $version/$cluster because no success flag exists." + log "Planning to run $host:$port $version/$cluster because no success flag exists." else mtime="$(stat --printf "%Y" "$flagfilesuccess")" delta=$(( now - mtime )) if [ "$delta" -lt "$MIN_WAIT_SUCCESS" ]; then runme=0 - log "Not running $host:$port $version/$cluster because last success was only ${delta}s ago." + log "Skipping $host:$port $version/$cluster because last success was only ${delta}s ago." elif [ "$delta" -gt "$MAX_WAIT_SUCCESS" ]; then runme=1 - log "Running $host:$port $version/$cluster because last success was ${delta}s ago." + log "Planning to run $host:$port $version/$cluster because last success was ${delta}s ago." else # get a "randomish" but stable value for this backup run val=$(echo "$MYHOSTNAME-$host-$port-$mtime" | sha256sum | head -c 8) @@ -124,10 +126,10 @@ while read host port username cluster version; do after_min=$((delta - MIN_WAIT_SUCCESS)) if [ "$after_min" -gt "$valmod" ]; then runme=1 - log "Running $host:$port $version/$cluster because random computer says so ($after_min > $valmod)." + log "Planning to run $host:$port $version/$cluster because random computer says so ($after_min > $valmod)." else runme=0 - log "Not running $host:$port $version/$cluster because random computer says wait ($after_min <= $valmod)." + log "Skipping $host:$port $version/$cluster because random computer says wait ($after_min <= $valmod)." fi fi fi @@ -137,9 +139,16 @@ while read host port username cluster version; do if [ "$runme" -gt 0 ]; then touch "$flagfile" - /usr/local/bin/postgres-make-one-base-backup "$host" "$port" "$username" "$cluster" "$version" - rc=$? - [ "$rc" = 0 ] && touch "$flagfilesuccess" + exec 201< "$flagfile" + if flock -w 0 -e 201; then + log "Running $host:$port $version/$cluster." + /usr/local/bin/postgres-make-one-base-backup "$host" "$port" "$username" "$cluster" "$version" + rc=$? + [ "$rc" = 0 ] && touch "$flagfilesuccess" + flock -u 201 + else + log "Cannot acquire lock on $flagfile, skipping $host:$port $version/$cluster." + fi fi done << EOF seger.debian.org 5432 debian-backup dak 9.6 -- 2.20.1