X-Git-Url: https://git.adam-barratt.org.uk/?a=blobdiff_plain;f=dsa-nagios-checks%2Fchecks%2Fdsa-check-bacula;h=2d6c34d2d911bed35fd764bd3637e8a24d393299;hb=b3923761df2779305961f11dc4e953dc31624f00;hp=69988fee6ce75e3bc58a0a259c53b352901fc68e;hpb=74935e3d25b7e8969a272acb3cd5ad9fdafeca75;p=mirror%2Fdsa-nagios.git
diff --git a/dsa-nagios-checks/checks/dsa-check-bacula b/dsa-nagios-checks/checks/dsa-check-bacula
index 69988fe..2d6c34d 100755
--- a/dsa-nagios-checks/checks/dsa-check-bacula
+++ b/dsa-nagios-checks/checks/dsa-check-bacula
@@ -1,93 +1,119 @@
-#!/usr/bin/env python
-#
-# check_bacula_client Nagios plugin to check Bacula client backups
-# Copyright (C) 2010 Tom Payne
+#!/usr/bin/python
+
+# queries a bacula database for the last backup of a given host
+
+# Copyright 2010, 2011, 2013 Peter Palfrader
#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-from datetime import datetime, timedelta
-from optparse import OptionParser, OptionValueError
+import optparse
+import psycopg2
+import psycopg2.extras
import re
import sys
-import time
-
-import pexpect
-
-
-OK, WARNING, CRITICAL, UNKNOWN = xrange(0, 4)
-status_message = 'OK WARNING CRITICAL UNKNOWN'.split()
-
-MULTIPLIERS = {'s': 1, 'm': 60, 'h': 3600, 'd': 86400, 'w': 604800}
-DIVISORS = ((60, 'minutes'), (60, 'hours'), (24, 'days'), (7, 'weeks'))
-
-
-def parse_period(option, opt_str, value, parser):
- m = re.match(r'(\d+(?:\.\d+)?)(%s)\Z' % '|'.join(MULTIPLIERS.keys()), value)
- if not m:
- raise OptionValueError('invalid period - %s' % value)
- setattr(parser.values, option.dest, timedelta(seconds=float(m.group(1)) * MULTIPLIERS[m.group(2)]))
-
-
-def main(argv):
- parser = OptionParser()
- parser.add_option('-H', metavar='ADDRESS', dest='host', help='client name')
- parser.add_option('-w', metavar='PERIOD', type=str, dest='warning', action='callback', callback=parse_period, help='generate warning if last successful backup older than PERIOD')
- parser.add_option('-c', metavar='PERIOD', type=str, dest='critical', action='callback', callback=parse_period, help='generate critical if last successful backup older than PERIOD')
- parser.add_option('-b', metavar='PATH', dest='bconsole', help='path to bconsole')
- parser.set_defaults(bconsole='/usr/bin/bconsole')
- options, args = parser.parse_args(argv[1:])
- exit_status, message = OK, None
- child = pexpect.spawn(options.bconsole, ['-n'])
- try:
- child.expect(r'\n\*')
- child.sendline('status client=%s.debian.org-fd' % options.host)
- if child.expect_list([re.compile(r'Terminated Jobs:'), re.compile(r'Error: Client resource .* does not exist.')]):
- raise RuntimeError('unknown client %s' % options.host)
- child.expect(r'\n\*')
- r = re.compile(r'\s*(\d+)\s+(\S+)\s+(\S+)\s+(\d+\.\d+\s+[KMGTP]|0)\s+OK\s+(\S+\s+\S+)')
- job_id = level = files = bytes = finished = None
- for line in child.before.splitlines():
- m = r.match(line)
- if m:
- job_id = int(m.group(1))
- level = m.group(2)
- files = int(re.sub(r',', '', m.group(3)))
- bytes = re.sub(r'\s+', '', m.group(4))
- finished = datetime(*(time.strptime(m.group(5), '%d-%b-%y %H:%M')[0:6]))
- if job_id is None:
- raise RuntimeError('no terminated jobs')
- age = datetime.now() - finished
- if options.warning and age > options.warning:
- exit_status = WARNING
- if options.critical and age > options.critical:
- exit_status = CRITICAL
- age, units = 24.0 * 60 * 60 * age.days + age.seconds, 'seconds'
- for d, u in DIVISORS:
- if age < d:
- break
- else:
- age /= d
- units = u
- message = '%s, %d files, %sB, %s (%.1f %s ago)' % (level, files, bytes, finished, age, units)
- except RuntimeError:
- exit_status, message = (CRITICAL, str(sys.exc_info()[1]))
- child.sendeof()
- child.expect(pexpect.EOF)
- print '%s: %s' % (status_message[exit_status], message)
- sys.exit(exit_status)
-
-
-if __name__ == '__main__':
- main(sys.argv)
+
+codes = {
+ 'UNKNOWN': 3,
+ 'CRITICAL': 2,
+ 'WARNING': 1,
+ 'OK': 0 }
+
+
+def convert_time(s, default_unit='h'):
+ m = re.match('([0-9]+)([smhdw])?$', s)
+ if m is None: raise ValueError
+ ticks = int(m.group(1))
+ unit = m.group(2)
+ if unit is None: unit = default_unit
+
+ if unit == 's': None
+ elif unit == 'm': ticks *= 60
+ elif unit == 'h': ticks *= 60*60
+ elif unit == 'd': ticks *= 60*60*24
+ elif unit == 'w': ticks *= 60*60*24*7
+ else: raise ValueError
+ return ticks
+
+
+parser = optparse.OptionParser()
+parser.set_usage("%prog [options] []")
+parser.add_option("-w", "--warn", metavar="AGE", dest="warn",
+ help="Warn if backup older than (default: 28h)")
+parser.add_option("-c", "--critical", metavar="AGE", dest="critical",
+ help="Warn if backup older than (default: 72h)")
+parser.add_option("-d", "--db-connect-string", metavar="connect-string", dest="db",
+ help="Database connect string")
+parser.add_option("-D", "--db-connect-string-file", metavar="FILE", dest="dbfile",
+ default='/etc/nagios/bacula-database',
+ help="File to read database connect string from (/etc/nagios/bacula-database)")
+(options, args) = parser.parse_args()
+
+if len(args) == 1:
+ host = args[0]
+ level = None
+elif len(args) == 2:
+ host = args[0]
+ level = args[1]
+else:
+ parser.print_help()
+ sys.exit(codes['UNKNOWN'])
+
+if options.warn is None: options.warn = '28'
+if options.critical is None: options.critical = '72'
+options.warn = convert_time(options.warn)
+options.critical = convert_time(options.critical)
+
+if options.db is not None:
+ pass
+elif options.dbfile is not None:
+ options.db = open(options.dbfile).read().rstrip()
+else:
+ print >>sys.stderr, "Need one of -d or -D."
+ sys.exit(codes['UNKNOWN'])
+
+
+query = "SELECT min(extract('epoch' from (CURRENT_TIMESTAMP - realendtime))) AS age FROM job WHERE name=%(host)s AND jobstatus='T'"
+params = { 'host': host }
+if level is not None:
+ query += " AND level=%(level)s"
+ params['level'] = level
+else:
+ level = 'any'
+
+conn = psycopg2.connect(options.db)
+
+cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
+cursor.execute(query, params)
+records = cursor.fetchall()
+if len(records) == 0 or records[0][0] is None:
+ print "CRITICAL: No backups of %s/%s."%(host, level)
+ sys.exit(codes['CRITICAL'])
+elif len(records) > 1:
+ print "UNKNOWN: got too many records back from query."
+ sys.exit(codes['UNKNOWN'])
+elif records[0]['age'] > options.critical:
+ print "CRITICAL: Last backup of %s/%s is %.2f days old."%(host, level, float(records[0]['age'])/3600/24)
+ sys.exit(codes['CRITICAL'])
+elif records[0]['age'] > options.warn:
+ print "WARN: Last backup of %s/%s is %.2f days old."%(host, level, float(records[0]['age'])/3600/24)
+ sys.exit(codes['WARNING'])
+else:
+ print "OK: Last backup of %s/%s is %.2f days old."%(host, level, float(records[0]['age'])/3600/24)
+ sys.exit(codes['OK'])