X-Git-Url: https://git.adam-barratt.org.uk/?a=blobdiff_plain;f=modules%2Fpostgres%2Ftemplates%2Fbackup_server%2Fpostgres-make-base-backups.erb;fp=modules%2Fpostgres%2Ftemplates%2Fbackup_server%2Fpostgres-make-base-backups.erb;h=b0d7d32b46de615380af09d8ec9acb9d0048cf0d;hb=76ca91bce24ecbcbcc4e62a37aa06fd0fb9f96c7;hp=2e6d7bed5443dcbbe8939fa476f78f1831c1632c;hpb=46cee04ab06b23ab6e9e4baba655cf470d10cfc4;p=mirror%2Fdsa-puppet.git diff --git a/modules/postgres/templates/backup_server/postgres-make-base-backups.erb b/modules/postgres/templates/backup_server/postgres-make-base-backups.erb index 2e6d7bed5..b0d7d32b4 100755 --- a/modules/postgres/templates/backup_server/postgres-make-base-backups.erb +++ b/modules/postgres/templates/backup_server/postgres-make-base-backups.erb @@ -1,6 +1,13 @@ #!/bin/bash +# vim:syn=sh: +# vim:ts=4: +# vim:et: + + # run a bunch of full postgresql backups +# if given a host:port, run this backup, +# else run all defined once if they have not run recently # vim:syn=sh: @@ -26,68 +33,131 @@ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -set -e +MIN_WAIT=$(( 60*60*4 )) +MIN_WAIT_SUCCESS=$(( 60*60*24*7 )) +MAX_WAIT_SUCCESS=$(( 60*60*24*10 )) +STATEDIR=/var/lib/dsa/postgres-make-base-backups + +#### set -u -CONFFILE=/etc/nagios/dsa-check-backuppg.conf -ROOTDIR=$(perl -MYAML -e "print YAML::LoadFile('$CONFFILE')->{'rootdir'}") -if [ -z "$ROOTDIR" ]; then - echo >&2 "Could not learn rootdir from $CONFFILE" - exit 1 -fi +SELF="`basename "$0"`[$$]" +DELTA_WAIT_SUCCESS=$(( MAX_WAIT_SUCCESS - MIN_WAIT_SUCCESS )) +MYHOSTNAME=$(hostname -f) if [ -t 0 ]; then - verbose=1 + verbose=1 else - verbose=0 + verbose=0 fi -if [ "$verbose" -gt 0 ]; then - console="--progress --verbose" -else - console="" -fi +log() { + [ "$verbose" -gt 0 ] && echo "$*" + logger -p daemon.info -t "$SELF" "$*" +} +format_timedelta() { + local secs="$1"; shift + if [ "$secs" -ge 86400 ]; then + printf '%d+%02d:%02d:%02d\n' $(($secs/3600/24)) $(($secs/3600%24)) $(($secs/60%60)) $(($secs%60)) + else + printf '%02d:%02d:%02d\n' $(($secs/3600)) $(($secs/60%60)) $(($secs%60)) + fi +} + if [ "${1:-}" = "-h" ] || [ "${1:-}" = "--help" ]; then - echo "Usage: $0 [:]" - exit 0 + echo "Usage: $0 [:]" + exit 0 fi if [ "$#" -gt 0 ]; then - forcehostport="$1" - shift + forcehostport="$1" + shift else - forcehostport="" + forcehostport="" fi -export PGSSLMODE=verify-full -export PGSSLROOTCERT=/etc/ssl/debian/certs/ca.crt +mkdir -p "$STATEDIR" -date=$(date "+%Y%m%d-%H%M%S") -thishost=$(hostname -f) +# get a lock, but only if we did not force the run +if [ -z "$forcehostport" ]; then + exec 200< "$STATEDIR" + if ! flock -w 0 -e 200; then + log "Cannot acquire lock on $STATEDIR." + exit 0 + fi +fi while read host port username cluster version; do - [ "${host#\#}" = "$host" ] || continue - [ -z "$host" ] && continue - - if [ -n "$forcehostport" ] && [ "$forcehostport" != "$host:$port" ]; then - [ "$verbose" -gt 0 ] && echo "Skipping $host:$port $version/$cluster." - continue - fi - - label="$thishost-$date-$host-$cluster-$version-backup" - [ "$verbose" -gt 0 ] && echo "Doing $host:$port $version/$cluster: $label" - - target="$cluster.BASE.$label.tar.gz" - tmp=$(tempfile -d "$ROOTDIR" -p "BASE-$host:$port-" -s ".tar.gz") - trap "rm -f '$tmp'" EXIT - - /usr/lib/postgresql/"$version"/bin/pg_basebackup --format=tar --pgdata=- --label="$label" --host="$host" --port="$port" --username="$username" --no-password $console | pigz > "$tmp" - if ! [ "${PIPESTATUS[0]}" -eq 0 ]; then - echo >&2 "pg_basebackup failed with exit code ${PIPESTATUS[0]}" - exit 1 - fi - mv "$tmp" "$ROOTDIR/${host%%.*}/$target" + [ "${host#\#}" = "$host" ] || continue + [ -z "$host" ] && continue + + flagfile="$STATEDIR/$host-$port.last-attempt" + flagfilesuccess="$STATEDIR/$host-$port.last-success" + if [ -n "$forcehostport" ]; then + if [ "$forcehostport" != "$host:$port" ]; then + log "Skipping $host:$port $version/$cluster because this run is limited to $host:$port." + runme=0 + else + log "Forcing $host:$port $version/$cluster run." + runme=1 + fi + else + if ! [ -e "$flagfile" ]; then + runme=1 + log "Planning to run $host:$port $version/$cluster because no flag file exists." + else + now=$(date +%s) + mtime="$(stat --printf "%Y" "$flagfile")" + delta=$(( now - mtime )) + if [ "$delta" -lt "$MIN_WAIT" ]; then + runme=0 + log "Skipping $host:$port $version/$cluster because last attempt was only $(format_timedelta "${delta}") (< $(format_timedelta "${MIN_WAIT}")) ago." + else + if ! [ -e "$flagfilesuccess" ]; then + runme=1 + log "Planning to run $host:$port $version/$cluster because no success flag exists." + else + mtime="$(stat --printf "%Y" "$flagfilesuccess")" + delta=$(( now - mtime )) + if [ "$delta" -lt "$MIN_WAIT_SUCCESS" ]; then + runme=0 + log "Skipping $host:$port $version/$cluster because last success was only $(format_timedelta "${delta}") (< $(format_timedelta "${MIN_WAIT_SUCCESS}")) ago." + elif [ "$delta" -gt "$MAX_WAIT_SUCCESS" ]; then + runme=1 + log "Planning to run $host:$port $version/$cluster because last success was $(format_timedelta "${delta}") (>= $(format_timedelta "${MAX_WAIT_SUCCESS}")) ago." + else + # get a "randomish" but stable value for this backup run + val=$(echo "$MYHOSTNAME-$host-$port-$mtime" | sha256sum | head -c 8) + val=$((16#$val)) + rnd_cuttoff=$(($val % $DELTA_WAIT_SUCCESS)) + age_after_min=$((delta - MIN_WAIT_SUCCESS)) + if [ "$age_after_min" -lt "$rnd_cuttoff" ]; then + runme=0 + log "Skipping $host:$port $version/$cluster because random computer says wait ([$(format_timedelta "${age_after_min}") < $(format_timedelta "${rnd_cuttoff}") (< $(format_timedelta "${DELTA_WAIT_SUCCESS}"))] + $(format_timedelta "${MIN_WAIT_SUCCESS}"))." + else + runme=1 + log "Planning to run $host:$port $version/$cluster because random computer says so ($(format_timedelta "${age_after_min}") >= $(format_timedelta "${rnd_cuttoff}"))." + fi + fi + fi + fi + fi + fi + + if [ "$runme" -gt 0 ]; then + touch "$flagfile" + exec 201< "$flagfile" + if flock -w 0 -e 201; then + log "Running $host:$port $version/$cluster." + /usr/local/bin/postgres-make-one-base-backup "$host" "$port" "$username" "$cluster" "$version" + rc=$? + [ "$rc" = 0 ] && touch "$flagfilesuccess" + flock -u 201 + else + log "Cannot acquire lock on $flagfile, skipping $host:$port $version/$cluster." + fi + fi done << EOF seger.debian.org 5432 debian-backup dak 9.6 bmdb1.debian.org 5435 debian-backup main 9.6 @@ -95,6 +165,7 @@ bmdb1.debian.org 5436 debian-backup wannabuild 9.6 bmdb1.debian.org 5440 debian-backup debsources 9.6 fasolo.debian.org 5433 debian-backup dak 9.6 sibelius.debian.org 5433 debian-backup snapshot 9.4 +sallinen.debian.org 5473 debian-backup snapshot 9.6 <%- if @hostname != "backuphost" -%> moszumanska.debian.org 5432 debian-backup main 9.1 <%- end -%>