2 # ------------------------------------------------------------------------------
3 # File Name: chech_raid.pl
4 # Author: Thomas Nilsen - Norway
7 # Description: This script will check to see if any software raid
9 # Email: thomas.nilsen@doc-s.co.uk
10 # WWW: www.doc-s.co.uk
11 # ------------------------------------------------------------------------------
12 # Copyright 2003 (c) Thomas Nilsen
13 # Credits go to Ethan Galstad for coding Nagios
15 # ------------------------------------------------------------------------------
18 # 2008-03-31 Peter Palfrader Return warning on running resync
19 # 2007-11-07 Peter Palfrader Return unknown if /proc/mdstat does not exist
20 # 05/10/2004 Peter Palfrader Make it work without that 'use util (vars)'
21 # 14/06/2003 TN Initial Release
22 # - Format of mdstat assumed to be "2 line" per
23 # device with [??] on the second line.
24 # ------------------------------------------------------------------------------
29 use vars qw($opt_V $opt_h $opt_t $opt_F $PROGNAME);
30 use lib '/usr/local/nagios/libexec/';
32 my %ERRORS = ( OK => 0, WARNING => 1, CRITICAL => 2, UNKNOWN => -1 );
35 $PROGNAME="check_raid";
43 my ( $line, $stat, $state ,@device, $msg, $status, $timeout);
48 Getopt::Long::Configure('bundling');
50 "V" => \$opt_V, "version" => \$opt_V,
51 "h" => \$opt_h, "help" => \$opt_h,
52 "F" => \$opt_F, "filename" => \$opt_F,
53 "t" => \$opt_t, "timeout" => \$opt_t);
56 print($PROGNAME,': $Revision: 0.1 $');
67 $stat = $1 if ($opt_F =~ /^(.*)$/);
70 print "Invalid mdstat file: $opt_F\n";
71 exit $ERRORS{'UNKNOWN'};
76 ($opt_t) && ($opt_t =~ /^([0-9]+)$/) && ($timeout = $1);
78 # Just in case of problems, let's not hang Nagios
80 print ("ERROR: No response (alarm)\n");
81 exit $ERRORS{'UNKNOWN'};
85 # Start checking the file...
86 open (FH, $stat) or print("UNKNOWN: Cannot open $stat: $!\n"), exit $ERRORS{'UNKNOWN'};
87 $state = $ERRORS{'OK'};
93 # Now check the mdstat file..
96 if ($line =~ /^(md\S*) /) {
98 } elsif( $line =~ / \[_|_\]|U_|_U /) {
99 $state = $ERRORS{'CRITICAL'};
100 $msg = $msg . $device . ": - ";
102 elsif ( $line =~ / resync /) {
103 # [==>..................] resync = 10.3% (15216320/146994624) finish=2153.2min speed=1018K/sec
104 my ($percent) = ($line =~ m# resync = ([0-9.]+%)#);
105 my ($finish) = ($line =~ m# finish=([0-9.]+min)#);
106 my ($speed) = ($line =~ m# speed=([0-9.]+K/sec)#);
107 push @resyncing, "$device ($percent done, finish in $finish at $speed)";
112 if ( $state == $ERRORS{'CRITICAL'} ) {
113 print "CRITICAL - Device(s) $msg have failed\n";
114 } elsif ( scalar @resyncing > 0 ) {
115 print "WARNING: Resyncing: ".(join "; ", @resyncing)."\n";
116 $state = $ERRORS{'WARNING'};
117 } elsif ( $state == $ERRORS{'OK'} )
118 { print "OK - All devices are online\n"; }
123 print "Usage: $PROGNAME -t <timeout> -F <filename>\n";
127 print_revision($PROGNAME,'$Revision: 0.1 $');
128 print "Copyright (c) 2003 Thomas Nilsen/Karl DeBisschop\n";
131 print "Checks the mdstat file for errors on any configured software raid.\n
132 -t ( --timeout=INTEGER)
133 Seconds before script times out (default: 10)\n
134 -F ( --filename=FILE)
135 Full path and name to mdstat file (usually '/proc/mdstat') \n\n";