2 # ------------------------------------------------------------------------------
3 # File Name: chech_raid.pl
4 # Author: Thomas Nilsen - Norway
7 # Description: This script will check to see if any software raid
9 # Email: thomas.nilsen@doc-s.co.uk
10 # WWW: www.doc-s.co.uk
11 # ------------------------------------------------------------------------------
12 # Copyright 2003 (c) Thomas Nilsen
13 # Credits go to Ethan Galstad for coding Nagios
15 # ------------------------------------------------------------------------------
18 # 2008-03-31 Peter Palfrader Return warning on running resync
19 # 2007-11-07 Peter Palfrader Return unknown if /proc/mdstat does not exist
20 # 05/10/2004 Peter Palfrader Make it work without that 'use util (vars)'
21 # 14/06/2003 TN Initial Release
22 # - Format of mdstat assumed to be "2 line" per
23 # device with [??] on the second line.
24 # ------------------------------------------------------------------------------
29 use vars qw($opt_V $opt_h $opt_t $opt_F $PROGNAME);
30 use lib '/usr/local/nagios/libexec/';
32 my %ERRORS = ( OK => 0, WARNING => 1, CRITICAL => 2, UNKNOWN => -1 );
35 $PROGNAME="check_raid";
43 my ( $line, $stat, $state ,@device, $msg, $status, $timeout);
48 Getopt::Long::Configure('bundling');
50 "V" => \$opt_V, "version" => \$opt_V,
51 "h" => \$opt_h, "help" => \$opt_h,
52 "F" => \$opt_F, "filename" => \$opt_F,
53 "t" => \$opt_t, "timeout" => \$opt_t);
56 print($PROGNAME,': $Revision: 0.1 $');
67 $stat = $1 if ($opt_F =~ /^(.*)$/);
70 print "Invalid mdstat file: $opt_F\n";
71 exit $ERRORS{'UNKNOWN'};
76 ($opt_t) && ($opt_t =~ /^([0-9]+)$/) && ($timeout = $1);
78 # Just in case of problems, let's not hang Nagios
80 print ("ERROR: No response (alarm)\n");
81 exit $ERRORS{'UNKNOWN'};
85 # Start checking the file...
86 open (FH, $stat) or print("UNKNOWN: Cannot open $stat: $!\n"), exit $ERRORS{'UNKNOWN'};
92 # Now check the mdstat file..
95 if ($line =~ /^(md\S*) /) {
97 } elsif( $line =~ / \[_|_\]|U_|_U /) {
98 push @failed, $device;
100 elsif ( $line =~ / resync /) {
101 # [==>..................] resync = 10.3% (15216320/146994624) finish=2153.2min speed=1018K/sec
102 my ($percent) = ($line =~ m# resync = ([0-9.]+%)#);
103 my ($finish) = ($line =~ m# finish=([0-9.]+min)#);
104 my ($speed) = ($line =~ m# speed=([0-9.]+K/sec)#);
105 push @resyncing, "$device ($percent done, finish in $finish at $speed)";
110 if (scalar @failed > 0) {
111 if (scalar @failed == 1) {
112 print "CRITICAL - Device $failed[0] has failed.\n";
114 print "CRITICAL - Devices ".join(", ", @failed)." have failed.\n"
116 exit $ERRORS{'CRITICAL'};
117 } elsif (scalar @resyncing > 0 ) {
118 print "WARNING: Resyncing: ".(join "; ", @resyncing)."\n";
119 exit $ERRORS{'WARNING'};
121 print "OK - All devices are online\n";
127 print "Usage: $PROGNAME -t <timeout> -F <filename>\n";
131 print_revision($PROGNAME,'$Revision: 0.1 $');
132 print "Copyright (c) 2003 Thomas Nilsen/Karl DeBisschop\n";
135 print "Checks the mdstat file for errors on any configured software raid.\n
136 -t ( --timeout=INTEGER)
137 Seconds before script times out (default: 10)\n
138 -F ( --filename=FILE)
139 Full path and name to mdstat file (usually '/proc/mdstat') \n\n";