X-Git-Url: https://git.adam-barratt.org.uk/?a=blobdiff_plain;f=dsa-nagios-checks%2Fchecks%2Fdsa-check-bacula;h=2d6c34d2d911bed35fd764bd3637e8a24d393299;hb=8deb3b867e05cc6e2fc4e6570fbcd5f94d1edc81;hp=69988fee6ce75e3bc58a0a259c53b352901fc68e;hpb=74935e3d25b7e8969a272acb3cd5ad9fdafeca75;p=mirror%2Fdsa-nagios.git diff --git a/dsa-nagios-checks/checks/dsa-check-bacula b/dsa-nagios-checks/checks/dsa-check-bacula index 69988fe..2d6c34d 100755 --- a/dsa-nagios-checks/checks/dsa-check-bacula +++ b/dsa-nagios-checks/checks/dsa-check-bacula @@ -1,93 +1,119 @@ -#!/usr/bin/env python -# -# check_bacula_client Nagios plugin to check Bacula client backups -# Copyright (C) 2010 Tom Payne +#!/usr/bin/python + +# queries a bacula database for the last backup of a given host + +# Copyright 2010, 2011, 2013 Peter Palfrader # -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: # -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. # -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -from datetime import datetime, timedelta -from optparse import OptionParser, OptionValueError +import optparse +import psycopg2 +import psycopg2.extras import re import sys -import time - -import pexpect - - -OK, WARNING, CRITICAL, UNKNOWN = xrange(0, 4) -status_message = 'OK WARNING CRITICAL UNKNOWN'.split() - -MULTIPLIERS = {'s': 1, 'm': 60, 'h': 3600, 'd': 86400, 'w': 604800} -DIVISORS = ((60, 'minutes'), (60, 'hours'), (24, 'days'), (7, 'weeks')) - - -def parse_period(option, opt_str, value, parser): - m = re.match(r'(\d+(?:\.\d+)?)(%s)\Z' % '|'.join(MULTIPLIERS.keys()), value) - if not m: - raise OptionValueError('invalid period - %s' % value) - setattr(parser.values, option.dest, timedelta(seconds=float(m.group(1)) * MULTIPLIERS[m.group(2)])) - - -def main(argv): - parser = OptionParser() - parser.add_option('-H', metavar='ADDRESS', dest='host', help='client name') - parser.add_option('-w', metavar='PERIOD', type=str, dest='warning', action='callback', callback=parse_period, help='generate warning if last successful backup older than PERIOD') - parser.add_option('-c', metavar='PERIOD', type=str, dest='critical', action='callback', callback=parse_period, help='generate critical if last successful backup older than PERIOD') - parser.add_option('-b', metavar='PATH', dest='bconsole', help='path to bconsole') - parser.set_defaults(bconsole='/usr/bin/bconsole') - options, args = parser.parse_args(argv[1:]) - exit_status, message = OK, None - child = pexpect.spawn(options.bconsole, ['-n']) - try: - child.expect(r'\n\*') - child.sendline('status client=%s.debian.org-fd' % options.host) - if child.expect_list([re.compile(r'Terminated Jobs:'), re.compile(r'Error: Client resource .* does not exist.')]): - raise RuntimeError('unknown client %s' % options.host) - child.expect(r'\n\*') - r = re.compile(r'\s*(\d+)\s+(\S+)\s+(\S+)\s+(\d+\.\d+\s+[KMGTP]|0)\s+OK\s+(\S+\s+\S+)') - job_id = level = files = bytes = finished = None - for line in child.before.splitlines(): - m = r.match(line) - if m: - job_id = int(m.group(1)) - level = m.group(2) - files = int(re.sub(r',', '', m.group(3))) - bytes = re.sub(r'\s+', '', m.group(4)) - finished = datetime(*(time.strptime(m.group(5), '%d-%b-%y %H:%M')[0:6])) - if job_id is None: - raise RuntimeError('no terminated jobs') - age = datetime.now() - finished - if options.warning and age > options.warning: - exit_status = WARNING - if options.critical and age > options.critical: - exit_status = CRITICAL - age, units = 24.0 * 60 * 60 * age.days + age.seconds, 'seconds' - for d, u in DIVISORS: - if age < d: - break - else: - age /= d - units = u - message = '%s, %d files, %sB, %s (%.1f %s ago)' % (level, files, bytes, finished, age, units) - except RuntimeError: - exit_status, message = (CRITICAL, str(sys.exc_info()[1])) - child.sendeof() - child.expect(pexpect.EOF) - print '%s: %s' % (status_message[exit_status], message) - sys.exit(exit_status) - - -if __name__ == '__main__': - main(sys.argv) + +codes = { + 'UNKNOWN': 3, + 'CRITICAL': 2, + 'WARNING': 1, + 'OK': 0 } + + +def convert_time(s, default_unit='h'): + m = re.match('([0-9]+)([smhdw])?$', s) + if m is None: raise ValueError + ticks = int(m.group(1)) + unit = m.group(2) + if unit is None: unit = default_unit + + if unit == 's': None + elif unit == 'm': ticks *= 60 + elif unit == 'h': ticks *= 60*60 + elif unit == 'd': ticks *= 60*60*24 + elif unit == 'w': ticks *= 60*60*24*7 + else: raise ValueError + return ticks + + +parser = optparse.OptionParser() +parser.set_usage("%prog [options] []") +parser.add_option("-w", "--warn", metavar="AGE", dest="warn", + help="Warn if backup older than (default: 28h)") +parser.add_option("-c", "--critical", metavar="AGE", dest="critical", + help="Warn if backup older than (default: 72h)") +parser.add_option("-d", "--db-connect-string", metavar="connect-string", dest="db", + help="Database connect string") +parser.add_option("-D", "--db-connect-string-file", metavar="FILE", dest="dbfile", + default='/etc/nagios/bacula-database', + help="File to read database connect string from (/etc/nagios/bacula-database)") +(options, args) = parser.parse_args() + +if len(args) == 1: + host = args[0] + level = None +elif len(args) == 2: + host = args[0] + level = args[1] +else: + parser.print_help() + sys.exit(codes['UNKNOWN']) + +if options.warn is None: options.warn = '28' +if options.critical is None: options.critical = '72' +options.warn = convert_time(options.warn) +options.critical = convert_time(options.critical) + +if options.db is not None: + pass +elif options.dbfile is not None: + options.db = open(options.dbfile).read().rstrip() +else: + print >>sys.stderr, "Need one of -d or -D." + sys.exit(codes['UNKNOWN']) + + +query = "SELECT min(extract('epoch' from (CURRENT_TIMESTAMP - realendtime))) AS age FROM job WHERE name=%(host)s AND jobstatus='T'" +params = { 'host': host } +if level is not None: + query += " AND level=%(level)s" + params['level'] = level +else: + level = 'any' + +conn = psycopg2.connect(options.db) + +cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) +cursor.execute(query, params) +records = cursor.fetchall() +if len(records) == 0 or records[0][0] is None: + print "CRITICAL: No backups of %s/%s."%(host, level) + sys.exit(codes['CRITICAL']) +elif len(records) > 1: + print "UNKNOWN: got too many records back from query." + sys.exit(codes['UNKNOWN']) +elif records[0]['age'] > options.critical: + print "CRITICAL: Last backup of %s/%s is %.2f days old."%(host, level, float(records[0]['age'])/3600/24) + sys.exit(codes['CRITICAL']) +elif records[0]['age'] > options.warn: + print "WARN: Last backup of %s/%s is %.2f days old."%(host, level, float(records[0]['age'])/3600/24) + sys.exit(codes['WARNING']) +else: + print "OK: Last backup of %s/%s is %.2f days old."%(host, level, float(records[0]['age'])/3600/24) + sys.exit(codes['OK'])