Move list of clusters to make a base backup of from the script to a conffile
[mirror/dsa-puppet.git] / modules / postgres / templates / backup_server / postgres-make-base-backups.erb
index c60a541..fc56411 100755 (executable)
@@ -41,6 +41,11 @@ STATEDIR=/var/lib/dsa/postgres-make-base-backups
 ####
 set -u
 
+if [ "$(id -u)" = 0 ]; then
+    echo >&2 "Do not run me as root.  Probably you want sudo -u debbackup."
+    exit 1
+fi
+
 SELF="`basename "$0"`[$$]"
 DELTA_WAIT_SUCCESS=$(( MAX_WAIT_SUCCESS - MIN_WAIT_SUCCESS ))
 MYHOSTNAME=$(hostname -f)
@@ -58,7 +63,7 @@ log() {
 format_timedelta() {
     local secs="$1"; shift
     if [ "$secs" -ge 86400 ]; then
-        printf '%d%02d:%02d:%02d\n' $(($secs/3600/24)) $(($secs/3600%24)) $(($secs/60%60)) $(($secs%60))
+        printf '%d+%02d:%02d:%02d\n' $(($secs/3600/24)) $(($secs/3600%24)) $(($secs/60%60)) $(($secs%60))
     else
         printf '%02d:%02d:%02d\n' $(($secs/3600)) $(($secs/60%60)) $(($secs%60))
     fi
@@ -96,7 +101,7 @@ while read host port username  cluster version; do
     flagfilesuccess="$STATEDIR/$host-$port.last-success"
     if [ -n "$forcehostport" ]; then
         if [ "$forcehostport" != "$host:$port" ]; then
-            log "Skipping $host:$port $version/$cluster because this run is limited to $host:$port."
+            log "Skipping $host:$port $version/$cluster because this run is limited to $forcehostport."
             runme=0
         else
             log "Forcing $host:$port $version/$cluster run."
@@ -112,7 +117,7 @@ while read host port username  cluster version; do
             delta=$(( now - mtime ))
             if [ "$delta" -lt "$MIN_WAIT" ]; then
                 runme=0
-                log "Skipping $host:$port $version/$cluster because last attempt was only $(format_timedelta "${delta}") ago."
+                log "Skipping $host:$port $version/$cluster because last attempt was only $(format_timedelta "${delta}") (< $(format_timedelta "${MIN_WAIT}")) ago."
             else
                 if ! [ -e "$flagfilesuccess" ]; then
                     runme=1
@@ -122,22 +127,22 @@ while read host port username  cluster version; do
                     delta=$(( now - mtime ))
                     if [ "$delta" -lt "$MIN_WAIT_SUCCESS" ]; then
                         runme=0
-                        log "Skipping $host:$port $version/$cluster because last success was only $(format_timedelta "${delta}") ago."
+                        log "Skipping $host:$port $version/$cluster because last success was only $(format_timedelta "${delta}") (< $(format_timedelta "${MIN_WAIT_SUCCESS}")) ago."
                     elif [ "$delta" -gt "$MAX_WAIT_SUCCESS" ]; then
                         runme=1
-                        log "Planning to run $host:$port $version/$cluster because last success was $(format_timedelta "${delta}") ago."
+                        log "Planning to run $host:$port $version/$cluster because last success was $(format_timedelta "${delta}") (>= $(format_timedelta "${MAX_WAIT_SUCCESS}")) ago."
                     else
                         # get a "randomish" but stable value for this backup run
                         val=$(echo "$MYHOSTNAME-$host-$port-$mtime" | sha256sum | head -c 8)
                         val=$((16#$val))
-                        valmod=$(($val % $DELTA_WAIT_SUCCESS))
-                        after_min=$((delta - MIN_WAIT_SUCCESS))
-                        if [ "$after_min" -gt "$valmod" ]; then
-                            runme=1
-                            log "Planning to run $host:$port $version/$cluster because random computer says so ($after_min > $valmod)."
-                        else
+                        rnd_cuttoff=$(($val % $DELTA_WAIT_SUCCESS))
+                        age_after_min=$((delta - MIN_WAIT_SUCCESS))
+                        if [ "$age_after_min" -lt "$rnd_cuttoff" ]; then
                             runme=0
-                            log "Skipping $host:$port $version/$cluster because random computer says wait ($after_min <= $valmod)."
+                            log "Skipping $host:$port $version/$cluster because random computer says wait ([$(format_timedelta "${age_after_min}") < $(format_timedelta "${rnd_cuttoff}") (< $(format_timedelta "${DELTA_WAIT_SUCCESS}"))] + $(format_timedelta "${MIN_WAIT_SUCCESS}"))."
+                        else
+                            runme=1
+                            log "Planning to run $host:$port $version/$cluster because random computer says so ($(format_timedelta "${age_after_min}") >= $(format_timedelta "${rnd_cuttoff}"))."
                         fi
                     fi
                 fi
@@ -152,22 +157,11 @@ while read host port username  cluster version; do
             log "Running $host:$port $version/$cluster."
             /usr/local/bin/postgres-make-one-base-backup "$host" "$port" "$username" "$cluster" "$version"
             rc=$?
+            log "Base backup for $host:$port $version/$cluster exited with rc $rc."
             [ "$rc" = 0 ] && touch "$flagfilesuccess"
             flock -u 201
         else
             log "Cannot acquire lock on $flagfile, skipping $host:$port $version/$cluster."
         fi
     fi
-done << EOF
-seger.debian.org       5432    debian-backup           dak             9.6
-bmdb1.debian.org       5435    debian-backup           main            9.6
-bmdb1.debian.org       5436    debian-backup           wannabuild      9.6
-bmdb1.debian.org       5440    debian-backup           debsources      9.6
-fasolo.debian.org      5433    debian-backup           dak             9.6
-sibelius.debian.org    5433    debian-backup           snapshot        9.4
-<%- if @hostname != "backuphost" -%>
-moszumanska.debian.org 5432    debian-backup           main            9.1
-<%- end -%>
-#
-# puppet notice:  this is just a partial file.  The tail EOF comes
-# from a different concat fragment
+done < '<%= scope['postgres::backup_server::globals::base_backup_clusters'] %> '