- [ "${host#\#}" = "$host" ] || continue
-
- if [ -n "$forcehostport" ] && [ "$forcehostport" != "$host:$port" ]; then
- [ "$verbose" -gt 0 ] && echo "Skipping $host:$port $version/$cluster."
- continue
- fi
-
- label="$thishost-$date-$host-$cluster-$version-backup"
- [ "$verbose" -gt 0 ] && echo "Doing $host:$port $version/$cluster: $label"
-
- target="$cluster.BASE.$label.tar.gz"
- tmp=$(tempfile -d "$ROOTDIR" -p "BASE-$host:$port-" -s ".tar.gz")
- trap "rm -f '$tmp'" EXIT
-
- /usr/lib/postgresql/"$version"/bin/pg_basebackup --format=tar --pgdata=- --label="$label" --host="$host" --port="$port" --username="$username" --no-password $console | pigz > "$tmp"
- if ! [ "${PIPESTATUS[0]}" -eq 0 ]; then
- echo >&2 "pg_basebackup failed with exit code ${PIPESTATUS[0]}"
- exit 1
- fi
- mv "$tmp" "$ROOTDIR/${host%%.*}/$target"
+ [ "${host#\#}" = "$host" ] || continue
+ [ -z "$host" ] && continue
+
+ flagfile="$STATEDIR/$host-$port.last-attempt"
+ flagfilesuccess="$STATEDIR/$host-$port.last-success"
+ if [ -n "$forcehostport" ]; then
+ if [ "$forcehostport" != "$host:$port" ]; then
+ log "Skipping $host:$port $version/$cluster."
+ runme=0
+ else
+ log "Running forced $host:$port $version/$cluster."
+ runme=1
+ fi
+ else
+ if ! [ -e "$flagfile" ]; then
+ runme=1
+ log "Running $host:$port $version/$cluster because no flag file exists."
+ else
+ now=$(date +%s)
+ mtime="$(stat --printf "%Y" "$flagfile")"
+ delta=$(( now - mtime ))
+ if [ "$delta" -lt "$MIN_WAIT" ]; then
+ runme=0
+ log "Not running $host:$port $version/$cluster because last attempt was only ${delta}s ago."
+ else
+ if ! [ -e "$flagfilesuccess" ]; then
+ runme=1
+ log "Running $host:$port $version/$cluster because no success flag exists."
+ else
+ mtime="$(stat --printf "%Y" "$flagfilesuccess")"
+ delta=$(( now - mtime ))
+ if [ "$delta" -lt "$MIN_WAIT_SUCCESS" ]; then
+ runme=0
+ log "Not running $host:$port $version/$cluster because last success was only ${delta}s ago."
+ elif [ "$delta" -gt "$MAX_WAIT_SUCCESS" ]; then
+ runme=1
+ log "Running $host:$port $version/$cluster because last success was ${delta}s ago."
+ else
+ # get a "randomish" but stable value for this backup run
+ val=$(echo "$MYHOSTNAME-$host-$port-$mtime" | sha256sum | head -c 8)
+ val=$((16#$val))
+ valmod=$(($val % $DELTA_WAIT_SUCCESS))
+ after_min=$((delta - MIN_WAIT_SUCCESS))
+ if [ "$after_min" -gt "$valmod" ]; then
+ runme=1
+ log "Running $host:$port $version/$cluster because random computer says so ($after_min > $valmod)."
+ else
+ runme=0
+ log "Not running $host:$port $version/$cluster because random computer says wait ($after_min <= $valmod)."
+ fi
+ fi
+ fi
+ fi
+ fi
+ fi
+
+ if [ "$runme" -gt 0 ]; then
+ touch "$flagfile"
+ /usr/local/bin/postgres-make-one-base-backup "$host" "$port" "$username" "$cluster" "$version"
+ rc=$?
+ [ "$rc" = 0 ] && touch "$flagfilesuccess"
+ fi