run base backups spread over time. This also should help us to recover from failures...
[mirror/dsa-puppet.git] / modules / postgres / templates / backup_server / postgres-make-base-backups.erb
1 #!/bin/bash
2
3 # vim:syn=sh:
4 # vim:ts=4:
5 # vim:et:
6
7
8 # run a bunch of full postgresql backups
9 #  if given a host:port, run this backup,
10 #  else run all defined once if they have not run recently
11 # vim:syn=sh:
12
13
14 # Copyright 2014 Peter Palfrader
15 #
16 # Permission is hereby granted, free of charge, to any person obtaining
17 # a copy of this software and associated documentation files (the
18 # "Software"), to deal in the Software without restriction, including
19 # without limitation the rights to use, copy, modify, merge, publish,
20 # distribute, sublicense, and/or sell copies of the Software, and to
21 # permit persons to whom the Software is furnished to do so, subject to
22 # the following conditions:
23 #
24 # The above copyright notice and this permission notice shall be
25 # included in all copies or substantial portions of the Software.
26 #
27 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
31 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
32 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
33 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
34
35
36 MIN_WAIT=$(( 60*60*4 ))
37 MIN_WAIT_SUCCESS=$(( 60*60*24*7 ))
38 MAX_WAIT_SUCCESS=$(( 60*60*24*10 ))
39
40 STATEDIR=/var/lib/dsa/postgres-make-base-backups
41
42 set -u
43
44 if [ -t 0 ]; then
45     verbose=1
46 else
47     verbose=0
48 fi
49
50 log() {
51     [ "$verbose" -gt 0 ] && echo "$*"
52 }
53
54
55 if [ "${1:-}" = "-h" ] || [ "${1:-}" = "--help" ]; then
56     echo "Usage: $0 [<host>:<port>]"
57     exit 0
58 fi
59
60 if [ "$#" -gt 0 ]; then
61     forcehostport="$1"
62     shift
63 else
64     forcehostport=""
65 fi
66
67 mkdir -p "$STATEDIR"
68
69 # get a lock, but only if we did not force the run
70 if [ -z "$forcehostport" ]; then
71     exec 200< "$STATEDIR"
72     if ! flock -w 0 -e 200; then
73         log "Cannot acquire lock on $STATEDIR."
74         exit 0
75     fi
76 fi
77
78 DELTA_WAIT_SUCCESS=$(( MAX_WAIT_SUCCESS - MIN_WAIT_SUCCESS ))
79 MYHOSTNAME=$(hostname -f)
80
81 while read host port username  cluster version; do
82     [ "${host#\#}" = "$host" ] || continue
83     [ -z "$host" ] && continue
84
85     flagfile="$STATEDIR/$host-$port.last-attempt"
86     flagfilesuccess="$STATEDIR/$host-$port.last-success"
87     if [ -n "$forcehostport" ]; then
88         if [ "$forcehostport" != "$host:$port" ]; then
89             log "Skipping $host:$port $version/$cluster."
90             runme=0
91         else
92             log "Running forced $host:$port $version/$cluster."
93             runme=1
94         fi
95     else
96         if ! [ -e "$flagfile" ]; then
97             runme=1
98             log "Running $host:$port $version/$cluster because no flag file exists."
99         else
100             now=$(date +%s)
101             mtime="$(stat --printf "%Y" "$flagfile")"
102             delta=$(( now - mtime ))
103             if [ "$delta" -lt "$MIN_WAIT" ]; then
104                 runme=0
105                 log "Not running $host:$port $version/$cluster because last attempt was only ${delta}s ago."
106             else
107                 if ! [ -e "$flagfilesuccess" ]; then
108                     runme=1
109                     log "Running $host:$port $version/$cluster because no success flag exists."
110                 else
111                     mtime="$(stat --printf "%Y" "$flagfilesuccess")"
112                     delta=$(( now - mtime ))
113                     if [ "$delta" -lt "$MIN_WAIT_SUCCESS" ]; then
114                         runme=0
115                         log "Not running $host:$port $version/$cluster because last success was only ${delta}s ago."
116                     elif [ "$delta" -gt "$MAX_WAIT_SUCCESS" ]; then
117                         runme=1
118                         log "Running $host:$port $version/$cluster because last success was ${delta}s ago."
119                     else
120                         # get a "randomish" but stable value for this backup run
121                         val=$(echo "$MYHOSTNAME-$host-$port-$mtime" | sha256sum | head -c 8)
122                         val=$((16#$val))
123                         valmod=$(($val % $DELTA_WAIT_SUCCESS))
124                         after_min=$((delta - MIN_WAIT_SUCCESS))
125                         if [ "$after_min" -gt "$valmod" ]; then
126                             runme=1
127                             log "Running $host:$port $version/$cluster because random computer says so ($after_min > $valmod)."
128                         else
129                             runme=0
130                             log "Not running $host:$port $version/$cluster because random computer says wait ($after_min <= $valmod)."
131                         fi
132                     fi
133                 fi
134             fi
135         fi
136     fi
137
138     if [ "$runme" -gt 0 ]; then
139         touch "$flagfile"
140         /usr/local/bin/postgres-make-one-base-backup "$host" "$port" "$username" "$cluster" "$version"
141         rc=$?
142         [ "$rc" = 0 ] && touch "$flagfilesuccess"
143     fi
144 done << EOF
145 seger.debian.org        5432    debian-backup           dak             9.6
146 bmdb1.debian.org        5435    debian-backup           main            9.6
147 bmdb1.debian.org        5436    debian-backup           wannabuild      9.6
148 bmdb1.debian.org        5440    debian-backup           debsources      9.6
149 fasolo.debian.org       5433    debian-backup           dak             9.6
150 sibelius.debian.org     5433    debian-backup           snapshot        9.4
151 <%- if @hostname != "backuphost" -%>
152 moszumanska.debian.org  5432    debian-backup           main            9.1
153 <%- end -%>
154 #
155 # puppet notice:  this is just a partial file.  The tail EOF comes
156 # from a different concat fragment