dsa-check-backuppg: clusternames with _ are not really clusternames
[mirror/dsa-nagios.git] / dsa-nagios-checks / checks / dsa-check-backuppg
index e8a8592..8704f3c 100755 (executable)
@@ -62,7 +62,7 @@ def load_conf(cf):
         configfile = '/etc/nagios/dsa-check-backuppg.conf'
 
     f = open(configfile)
-    config = yaml.load(f.read())
+    config = yaml.safe_load(f.read())
     f.close()
     return config
 
@@ -120,6 +120,25 @@ def parse_pg_backup_info(fn):
     f.close()
     return i
 
+def get_retention(config, host, db):
+    assert('retention' in config)
+
+    assert('backups' in config)
+    assert(isinstance(config['backups'], dict))
+
+    assert(host in config['backups'])
+    assert(isinstance(config['backups'][host], dict))
+
+    assert(db in config['backups'][host])
+    if isinstance(config['backups'][host][db], dict) and 'retention' in config['backups'][host][db]:
+        r = config['backups'][host][db]['retention']
+    elif '_retention' in config['backups'][host]:
+        r = config['backups'][host]['_retention']
+    else:
+        r = config['retention']
+
+    assert(isinstance(r, int))
+    return r
 
 parser = optparse.OptionParser()
 parser.set_usage("%prog [-c=<CONFFILE>]               (nagios mode)\n" +
@@ -173,7 +192,7 @@ for dir in os.listdir('.'):
 
     files.sort()
 
-    unhandled_backups = copy.copy(config['backups'][dir])
+    notyetseen_dbs = copy.copy(config['backups'][dir])
     ignored_dbs = {}
     backup_state = {}
 
@@ -246,6 +265,7 @@ for dir in os.listdir('.'):
                         else:
                             note_warning_db(dir, db, 'MISSING-BASE', basefn)
                             continue
+                if db in notyetseen_dbs: del notyetseen_dbs[db]
                 files.remove(basefn)
                 if backup_state[db]['can_expire_next']:
                     backup_state[db]['expires'].append(baseffn)
@@ -263,7 +283,7 @@ for dir in os.listdir('.'):
                 backup_state[db]['base_needs_wal_until'] = walbase
 
                 start = time.mktime(time.strptime(info['start time'], '%Y-%m-%d %H:%M:%S %Z'))
-                if start + config['retention'] < time.time():
+                if start + get_retention(config, dir, db) < time.time():
                     backup_state[db]['can_expire_for_base_hit'] = True
                 continue
 
@@ -327,6 +347,10 @@ for dir in os.listdir('.'):
                 for f in backup_state[db]['expires']:
                     global_expires.append(f)
 
+    for db in notyetseen_dbs:
+        if db.startswith('_'): continue
+        note_warning_db(dir, db, 'NO-BACKUP', 'no backups! (no .backup files found)')
+
     #if not db in backup_state:
     #    note_warning('BASE-WITHOUT-WAL', ffn)
     #    ignored_dbs[db] = True
@@ -341,8 +365,9 @@ for dir in os.listdir('.'):
 
 for p in problems_seq:
     print p
-for p in notices_seq:
-    print p
+if options.verbose:
+    for p in notices_seq:
+        print p
 
 if options.expire:
     for f in global_expires: