port dsa-check-memory to python3
[mirror/dsa-nagios.git] / dsa-nagios-checks / checks / dsa-check-backuppg
index 886336f..ce6a9a0 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 # Copyright 2010 Peter Palfrader
 #
@@ -47,9 +47,11 @@ import copy
 import time
 import re
 import os
+import errno
 import sys
 import yaml
 import optparse
+import socket
 
 def load_conf(cf):
     if cf is not None:
@@ -60,7 +62,7 @@ def load_conf(cf):
         configfile = '/etc/nagios/dsa-check-backuppg.conf'
 
     f = open(configfile)
-    config = yaml.load(f.read())
+    config = yaml.safe_load(f.read())
     f.close()
     return config
 
@@ -96,11 +98,11 @@ def note_warning_db(host, db, key, value):
     problems_per_db[host][db] = True
 
 
-def wal_pre(w):
+def wal_pre(w, host, db):
     (w1,w2) = w
     if w2 == 0:
         w1 -= 1
-        w2 = 0xFE
+        w2 = 0xFF
     else:
         w2 -= 1
 
@@ -115,6 +117,25 @@ def parse_pg_backup_info(fn):
     f.close()
     return i
 
+def get_retention(config, host, db):
+    assert('retention' in config)
+
+    assert('backups' in config)
+    assert(isinstance(config['backups'], dict))
+
+    assert(host in config['backups'])
+    assert(isinstance(config['backups'][host], dict))
+
+    assert(db in config['backups'][host])
+    if isinstance(config['backups'][host][db], dict) and 'retention' in config['backups'][host][db]:
+        r = config['backups'][host][db]['retention']
+    elif '_retention' in config['backups'][host]:
+        r = config['backups'][host]['_retention']
+    else:
+        r = config['retention']
+
+    assert(isinstance(r, int))
+    return r
 
 parser = optparse.OptionParser()
 parser.set_usage("%prog [-c=<CONFFILE>]               (nagios mode)\n" +
@@ -138,7 +159,7 @@ config = load_conf(options.conffile)
 
 os.chdir(config['rootdir'])
 for dir in os.listdir('.'):
-    if dir.startswith('.') or dir.endswith('.old'):
+    if dir.startswith('.') or dir.endswith('.old') or dir == 'lost+found':
         note_info('IGNORED', dir)
         continue
 
@@ -168,7 +189,7 @@ for dir in os.listdir('.'):
 
     files.sort()
 
-    unhandled_backups = copy.copy(config['backups'][dir])
+    notyetseen_dbs = copy.copy(config['backups'][dir])
     ignored_dbs = {}
     backup_state = {}
 
@@ -190,7 +211,7 @@ for dir in os.listdir('.'):
             continue
 
         (db, type) = r.groups(1)
-        if not isinstance(config['backups'][dir], list) or not db in config['backups'][dir]:
+        if not isinstance(config['backups'][dir], dict) or not db in config['backups'][dir]:
             if not db in ignored_dbs:
                 note_warning_db(dir, db, 'NOT-CONFIGURED', '%s/%s'%(dir, db))
             ignored_dbs[db] = True
@@ -206,6 +227,10 @@ for dir in os.listdir('.'):
             # can_expire_next: Can expire all files that we handle from now on
             backup_state[db]['can_expire_next'] = False
             backup_state[db]['expires'] = []
+            if isinstance(config['backups'][dir][db], dict) and 'timeline' in config['backups'][dir][db]:
+                backup_state[db]['timeline'] = config['backups'][dir][db]['timeline']
+            else:
+                backup_state[db]['timeline'] = 1
 
         # Apparently we already have seen a base backup and all its wal files
         # which we want to keep, so everything what we see now is older than
@@ -230,8 +255,14 @@ for dir in os.listdir('.'):
                     basefn = '%s.BASE.%s.tar.gz'%(db, info['label'])
                     baseffn = os.path.join(dir, basefn)
                     if not basefn in files:
-                        note_warning_db(dir, db, 'MISSING-BASE', basefn)
-                        continue
+                        m = re.match('([a-z0-9.]+)-\d{8}-\d{6}', info['label'])
+                        if m and (m.group(1) != socket.getfqdn()):
+                            note_info(dir, 'IGNORED-OTHER-BASE: '+basefn)
+                            continue
+                        else:
+                            note_warning_db(dir, db, 'MISSING-BASE', basefn)
+                            continue
+                if db in notyetseen_dbs: del notyetseen_dbs[db]
                 files.remove(basefn)
                 if backup_state[db]['can_expire_next']:
                     backup_state[db]['expires'].append(baseffn)
@@ -249,7 +280,7 @@ for dir in os.listdir('.'):
                 backup_state[db]['base_needs_wal_until'] = walbase
 
                 start = time.mktime(time.strptime(info['start time'], '%Y-%m-%d %H:%M:%S %Z'))
-                if start + config['retention'] < time.time():
+                if start + get_retention(config, dir, db) < time.time():
                     backup_state[db]['can_expire_for_base_hit'] = True
                 continue
 
@@ -263,8 +294,8 @@ for dir in os.listdir('.'):
                             backup_state[db]['can_expire_next'] = True
 
                 (timeline, wal1, wal2) = map(lambda x: int(x,16), r.groups())
-                if not timeline == 1:
-                    note_warning_db(dir, db, 'CANNOT-HANDLE-TIMELINES_NOT_1', ffn)
+                if not timeline == backup_state[db]['timeline']:
+                    note_warning_db(dir, db, 'UNEXPECTED-TIMELINE', ffn)
                     continue
 
                 thissegment = (wal1, wal2)
@@ -272,7 +303,7 @@ for dir in os.listdir('.'):
                     backup_state[db]['newest-wal'] = thissegment
                     backup_state[db]['newest-wal-file'] = ffn
                 else:
-                    if not wal_pre(backup_state[db]['oldest-wal']) == thissegment:
+                    if not wal_pre(backup_state[db]['oldest-wal'], dir, db) == thissegment:
                         note_warning_db(dir, db, 'WAL-MISSING-AFTER', ffn)
                         ignored_dbs[db] = True
                         continue
@@ -313,6 +344,10 @@ for dir in os.listdir('.'):
                 for f in backup_state[db]['expires']:
                     global_expires.append(f)
 
+    for db in notyetseen_dbs:
+        if db.startswith('_'): continue
+        note_warning_db(dir, db, 'NO-BACKUP', 'no backups! (no .backup files found)')
+
     #if not db in backup_state:
     #    note_warning('BASE-WITHOUT-WAL', ffn)
     #    ignored_dbs[db] = True
@@ -326,20 +361,21 @@ for dir in os.listdir('.'):
 
 
 for p in problems_seq:
-    print p
-for p in notices_seq:
-    print p
+    print(p)
+if options.verbose:
+    for p in notices_seq:
+        print(p)
 
 if options.expire:
     for f in global_expires:
-        if options.verbose: print "Expiring %s"%(f)
+        if options.verbose: print("Expiring %s" % f)
         if not options.dry_run: os.unlink(f)
 
 if len(problems_seq) > 0:
     sys.exit(1)
 
 if not options.expire or options.verbose:
-    print "OK: no problems detected"
+    print("OK: no problems detected")
 sys.exit(0)
 
 # vim:set et: