import time
import re
import os
+import errno
import sys
import yaml
import optparse
+import socket
def load_conf(cf):
if cf is not None:
configfile = '/etc/nagios/dsa-check-backuppg.conf'
f = open(configfile)
- config = yaml.load(f.read())
+ config = yaml.safe_load(f.read())
f.close()
return config
problems_per_db[host][db] = True
-def wal_pre(w):
+def wal_pre(w, host, db):
(w1,w2) = w
if w2 == 0:
w1 -= 1
- w2 = 0xFE
+ w2 = 0xFF
else:
w2 -= 1
f.close()
return i
+def get_retention(config, host, db):
+ assert('retention' in config)
+
+ assert('backups' in config)
+ assert(isinstance(config['backups'], dict))
+
+ assert(host in config['backups'])
+ assert(isinstance(config['backups'][host], dict))
+
+ assert(db in config['backups'][host])
+ if isinstance(config['backups'][host][db], dict) and 'retention' in config['backups'][host][db]:
+ r = config['backups'][host][db]['retention']
+ elif '_retention' in config['backups'][host]:
+ r = config['backups'][host]['_retention']
+ else:
+ r = config['retention']
+
+ assert(isinstance(r, int))
+ return r
parser = optparse.OptionParser()
parser.set_usage("%prog [-c=<CONFFILE>] (nagios mode)\n" +
os.chdir(config['rootdir'])
for dir in os.listdir('.'):
- if dir.startswith('.'):
+ if dir.startswith('.') or dir.endswith('.old') or dir == 'lost+found':
note_info('IGNORED', dir)
continue
if not os.path.isdir(dir):
- if min(os.path.getmtime(dir), os.path.getctime(dir)) + 3600*4 > time.time():
+ try:
+ mtime = os.path.getmtime(dir)
+ ctime = os.path.getctime(dir)
+ except OSError as e:
+ if e.errno == errno.ENOENT:
+ continue
+ else:
+ raise e
+ if min(mtime, ctime) + 3600*4 > time.time():
note_info('IGNORED', dir)
else:
note_warning('NOT-A-DIR', dir)
files.sort()
- unhandled_backups = copy.copy(config['backups'][dir])
+ notyetseen_dbs = copy.copy(config['backups'][dir])
ignored_dbs = {}
backup_state = {}
continue
(db, type) = r.groups(1)
- if not isinstance(config['backups'][dir], list) or not db in config['backups'][dir]:
+ if not isinstance(config['backups'][dir], dict) or not db in config['backups'][dir]:
if not db in ignored_dbs:
note_warning_db(dir, db, 'NOT-CONFIGURED', '%s/%s'%(dir, db))
ignored_dbs[db] = True
# can_expire_next: Can expire all files that we handle from now on
backup_state[db]['can_expire_next'] = False
backup_state[db]['expires'] = []
+ if isinstance(config['backups'][dir][db], dict) and 'timeline' in config['backups'][dir][db]:
+ backup_state[db]['timeline'] = config['backups'][dir][db]['timeline']
+ else:
+ backup_state[db]['timeline'] = 1
# Apparently we already have seen a base backup and all its wal files
# which we want to keep, so everything what we see now is older than
basefn = '%s.BASE.%s-%s.tar.gz'%(db, info['label'], info['start wal location'].split(' ',2)[0].replace('/', '_'))
baseffn = os.path.join(dir, basefn)
if not basefn in files:
- note_warning_db(dir, db, 'MISSING-BASE', basefn)
- continue
+ basefn = '%s.BASE.%s.tar.gz'%(db, info['label'])
+ baseffn = os.path.join(dir, basefn)
+ if not basefn in files:
+ m = re.match('([a-z0-9.]+)-\d{8}-\d{6}', info['label'])
+ if m and (m.group(1) != socket.getfqdn()):
+ note_info(dir, 'IGNORED-OTHER-BASE: '+basefn)
+ continue
+ else:
+ note_warning_db(dir, db, 'MISSING-BASE', basefn)
+ continue
+ if db in notyetseen_dbs: del notyetseen_dbs[db]
files.remove(basefn)
if backup_state[db]['can_expire_next']:
backup_state[db]['expires'].append(baseffn)
backup_state[db]['base_needs_wal_until'] = walbase
start = time.mktime(time.strptime(info['start time'], '%Y-%m-%d %H:%M:%S %Z'))
- if start + config['retention'] < time.time():
+ if start + get_retention(config, dir, db) < time.time():
backup_state[db]['can_expire_for_base_hit'] = True
continue
backup_state[db]['can_expire_next'] = True
(timeline, wal1, wal2) = map(lambda x: int(x,16), r.groups())
- if not timeline == 1:
- note_warning_db(dir, db, 'CANNOT-HANDLE-TIMELINES_NOT_1', ffn)
+ if not timeline == backup_state[db]['timeline']:
+ note_warning_db(dir, db, 'UNEXPECTED-TIMELINE', ffn)
continue
thissegment = (wal1, wal2)
backup_state[db]['newest-wal'] = thissegment
backup_state[db]['newest-wal-file'] = ffn
else:
- if not wal_pre(backup_state[db]['oldest-wal']) == thissegment:
+ if not wal_pre(backup_state[db]['oldest-wal'], dir, db) == thissegment:
note_warning_db(dir, db, 'WAL-MISSING-AFTER', ffn)
ignored_dbs[db] = True
continue
for f in backup_state[db]['expires']:
global_expires.append(f)
+ for db in notyetseen_dbs:
+ if db.startswith('_'): continue
+ note_warning_db(dir, db, 'NO-BACKUP', 'no backups! (no .backup files found)')
+
#if not db in backup_state:
# note_warning('BASE-WITHOUT-WAL', ffn)
# ignored_dbs[db] = True
for p in problems_seq:
print p
-for p in notices_seq:
- print p
+if options.verbose:
+ for p in notices_seq:
+ print p
if options.expire:
for f in global_expires: