From: Julien Cristau Date: Mon, 7 Dec 2015 10:16:04 +0000 (+0100) Subject: Add dsa-check-hpssacli, replaces hpacucli for new hosts. X-Git-Url: https://git.adam-barratt.org.uk/?a=commitdiff_plain;h=9bf85fcda0dd5c78e6bc51bc09bb5940c5e74383;p=mirror%2Fdsa-nagios.git Add dsa-check-hpssacli, replaces hpacucli for new hosts. --- diff --git a/dsa-nagios-checks/checks/dsa-check-hpssacli b/dsa-nagios-checks/checks/dsa-check-hpssacli new file mode 100755 index 0000000..f9aa262 --- /dev/null +++ b/dsa-nagios-checks/checks/dsa-check-hpssacli @@ -0,0 +1,317 @@ +#!/usr/bin/perl -w + +# check _physical_ disk status of disks on HP smart array controllers +# requires hpssacli +# +# does _not_ check raid status. use arrayprobe for that. + +# Copyright (c) 2008,2009,2010,2011 Peter Palfrader +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +use strict; +use English; +use Getopt::Long; + +# nagios exit codes +my %CODE = ( + 'OK' => 0, + 'WARNING' => 1, + 'CRITICAL' => 2, + 'UNKNOWN' => 3 +); + +my $EXITCODE = 'OK'; + +$SIG{'__DIE__'} = sub { + print @_; + exit $CODE{'UNKNOWN'}; +}; + +sub runcmd($) { + my ($cmd) = @_; + $cmd = "sudo hpssacli $cmd"; + open(FH, $cmd."|") or die ("Cannot run $cmd: $!"); + my @lines = ; + close FH; + die ("no results from $cmd\n") if (scalar @lines == 0); + return \@lines; +} + +sub record($) { + my ($newexit) = @_; + die "code $newexit not defined\n" unless defined $CODE{$newexit}; + + if ($CODE{$newexit} > $CODE{$EXITCODE}) { + $EXITCODE = $newexit; + }; +} + +my $usage = "$PROGRAM_NAME: Usage: $PROGRAM_NAME [--no-battery] [--ignore-controller=] [--no-controller-ok] [--ignore-transfer-speed= [--ignore-transfer-speed= ...]]\n"; +my $params; +Getopt::Long::Configure('bundling'); +if (!GetOptions ( + '--help' => \$params->{'help'}, + '--no-battery' => \$params->{'no-battery'}, + '--no-controller-ok' => \$params->{'no-controller-ok'}, + '--ignore-controller=s' => \$params->{'ignore-controller'}, + '--ignore-transfer-speed=s@' => \$params->{'ignore-transfer-speed'}, + )) { + die ($usage); +}; +if ($params->{'help'}) { + print $usage; + exit (0); +}; +die ($usage) unless (scalar @ARGV == 0); + +my $ctrlallshow = runcmd("controller all show"); +my @controllers; +for (@$ctrlallshow) { + chomp; + next if /^$/; + next if ($params->{'ignore-controller'} && /$params->{'ignore-controller'}/); + if (/in Slot ([0-9a-z]+)/) { + push @controllers, $1; + next; + }; + die ("Cannot read line '$_' gotten from hpssacli controller all show\n"); +}; + +if (scalar @controllers == 0) { + if ($params->{'no-controller-ok'}) { + print "No smartarray controllers found with hpssacli\n"; + exit $CODE{'OK'} + } else { + print "UNKNOWN: No smartarray controllers found with hpssacli\n"; + exit $CODE{'UNKNOWN'} + } +}; + +my @resultstr; + +for my $slot (sort @controllers) { + my @drives; + my $nodrives = 0; + my %status; + + my $ldallshow = runcmd("controller slot=$slot ld all show"); + my @logicaldrives; + for (@$ldallshow) { + chomp; + next if /^$/; + next if (/^\S.*in Slot $slot/); + next if /^ *array [A-Z]$/; + if (/logicaldrive ([0-9a-z]+)/) { + push @logicaldrives, $1; + next; + } elsif (/^Error: The specified device does not have any logical drives.$/) { + $nodrives = 1; + } else { + die ("Cannot read line '$_' gotten from hpssacli controller slot = $slot logicaldrive all show\n"); + } + }; + + # check logicaldrives + for my $logicaldrive (sort @logicaldrives) { + my $lds = runcmd("controller slot=$slot ld $logicaldrive show"); + for (@$lds) { + chomp; + next if /^$/; + if (/^ *Parity Initialization Status: (Initialization Completed|Initialization Failed|Rebuilding)$/) { + my $status = $1; + if ($status eq 'Initialization Completed') { + push @{$status{'OK'}}, "Parity LD$logicaldrive"; + } elsif ($status eq 'Rebuilding') { + push @{$status{'Failed'}}, "Parity LD$logicaldrive"; + record('WARNING'); + } elsif ($status eq 'Initialization Failed') { + push @{$status{'Failed'}}, "Parity LD$logicaldrive"; + record('CRITICAL'); + } else { + record('UNKNOWN'); + } + } + } + } + + if (!$nodrives && scalar @logicaldrives == 0) { + push @resultstr, "Slot $slot: unexpectedly, found no logical drives in list."; + record('UNKNOWN'); + } elsif ($nodrives && scalar keys %status > 0) { + push @resultstr, "Slot $slot: have no logical drives but status results?"; + record('UNKNOWN'); + next; + } elsif ($nodrives) { + push @resultstr, "Slot $slot: no logical drives"; + }; + + + my $pds = runcmd("controller slot=$slot pd all show"); + for (@$pds) { + chomp; + next if /^$/; + next if (/^\S.*in Slot $slot/); + next if /^ *array [A-Z]$/; + next if /^ *unassigned/; + if (/^ *(array [A-Z]) \(Failed\)$/) { + record('CRITICAL'); + push @{$status{'Failed'}}, $1; + } elsif (/^Error: The specified controller does not have any physical drives on it.$/) { + $nodrives = 1; + } elsif (/^ *physicaldrive (\S+) .* (OK|Predictive Failure|Failed|Rebuilding)(?:, (?:active )?spare)?\)$/) { + my $drive = $1; + my $status = $2; + push @{$status{$status}}, $drive; + if ($status eq 'OK') { + } elsif ($status eq 'Predictive Failure' || + $status eq 'Rebuilding') { + record('WARNING'); + } elsif ($status eq 'Failed') { + record('CRITICAL'); + } else { + record('UNKNOWN'); + }; + push @drives, $drive; + } else { + die ("Cannot read line '$_' gotten from hpssacli controller slot=$slot pd all show\n"); + }; + }; + + # Check that all drives have the proper transfer speed. + # sometimes stuff breaks and they fall back to 10mb/sec. + for my $drive (@drives) { + # skip drives that are known to have failed + next if (exists $status{'Failed'} && grep {$drive eq $_} @{$status{'Failed'}}); + my $type; + if ($drive =~ /^[0-9]+:[0-9]+$/) { # scsi drives + $type = 'SCSI'; + } elsif ($drive =~ /^[0-9]+[EI]:[0-9]+:[0-9]+$/) { # SAS + $type = 'SAS'; + } elsif ($drive =~ /^[0-9]+[C]:[0-9]+:[0-9]+$/) { # New 6GBPS SAS + $type = 'SAS+'; + } else { + # I'm not going to run pass arguments of unknown form to the shell.. + warn ("Unknown diskdrive ID $drive\n"); + next; + } + + my $pd = runcmd("controller slot=$slot pd $drive show"); + while (defined $pd->[0] && !($pd->[0] =~ /physicaldrive/)) { + shift @$pd; + }; + shift @$pd; + my %value; + for (@$pd) { + if (m/^\s*(.*?):\s*(.*?)\s*$/) { + $value{$1} = $2; + } + } + + my $key; + my $expected; + if ($type eq 'SCSI') { + $key = 'Transfer Speed'; + if (!defined $value{'Transfer Mode'}) { + record('WARNING'); + push @{$status{'unknown transfer mode'}}, $drive; + next; + } elsif ($value{'Transfer Mode'} eq 'Ultra 3 Wide') { + $expected = '160 MB/Sec'; + } elsif ($value{'Transfer Mode'} eq 'Ultra 320 Wide') { + $expected = '320 MB/Sec'; + } else { + record('WARNING'); + push @{$status{'unknown transfer mode'}}, $drive."(".$value{'Transfer Mode'}.")"; + next; + }; + } elsif ($type eq 'SAS' || $type eq 'SAS+') { + $key = 'PHY Transfer Rate'; + if ($value{'Interface Type'} eq 'SATA') { + $expected = [ '1.5Gbps', '3.0Gbps' ]; + } elsif ($value{'PHY Count'} eq '2') { + if (defined($value{'Redundant Path(s)'})) { + $expected = [ '3.0GBPS, 3.0GBPS', '6.0GBPS, 6.0GBPS' ]; + } else { + $expected = [ '3.0GBPS, Unknown', 'Unknown, 3.0GBPS', + '6.0GBPS, Unknown', 'Unknown, 6.0GBPS' ]; + } + } else { + $expected = [ '3.0GBPS', '6.0GBPS' ]; + } + } else { + warn "Should not be here. Do not know what to do with type '$type'\n"; + next; + } + + if ($params->{'ignore-transfer-speed'}) { + if (grep { $drive eq $_ } @{$params->{'ignore-transfer-speed'}}) { + push @{$status{'ignored transfer speed'}}, $drive."(".$value{$key}.")"; + next; + }; + }; + if (!defined $value{$key}) { + record('WARNING'); + push @{$status{'unknown transfer speed'}}, $drive; + } elsif (ref($expected) eq 'ARRAY') { + if (scalar(grep { uc($value{$key}) eq uc($_) } @$expected) == 0) { + record('WARNING'); + push @{$status{'bad transfer speed'}}, $drive."(".$value{$key}.")"; + }; + } elsif (uc($value{$key}) ne uc($expected)) { + record('WARNING'); + push @{$status{'bad transfer speed'}}, $drive."(".$value{$key}.")"; + }; + }; + + if ($nodrives && scalar keys %status > 0) { + push @resultstr, "Slot $slot: have no drives but status results?"; + record('UNKNOWN'); + next; + } elsif ($nodrives) { + push @resultstr, "Slot $slot: no drives"; + next; + }; + + my $cst = runcmd("controller slot=$slot show status"); + for (@$cst) { + chomp; + next if /^$/; + next if (/^\S.*in Slot $slot/); + if (/^ *(.*) Status: (.*)$/) { + my $system = $1; + my $status = $2; + push @{$status{$status}}, $system; + if ($status ne 'OK') { + next if ($params->{'no-battery'} && $system eq 'Cache'); + next if ($params->{'no-battery'} && $system eq 'Battery/Capacitor'); + record('WARNING'); + }; + } else { + die ("Cannot read line '$_' gotten from hpssacli controller slot=$slot show status\n"); + }; + }; + + my $status = join(" - ", (map { $_.": ".join(", ", @{$status{$_}}) } keys %status)); + push @resultstr, "Slot $slot: $status"; +}; + +print "$EXITCODE: ", join(" --- ", @resultstr), "\n"; +exit $CODE{$EXITCODE}; diff --git a/dsa-nagios-checks/debian/changelog b/dsa-nagios-checks/debian/changelog index 680bb13..cd50bd5 100644 --- a/dsa-nagios-checks/debian/changelog +++ b/dsa-nagios-checks/debian/changelog @@ -19,6 +19,9 @@ dsa-nagios-checks (108+XXXX) UNRELEASED; urgency=medium * dsa-update-unowned-file-status: avoid printing the apex of pruned directory trees. + [ Julien Cristau ] + * Add dsa-check-hpssacli, replaces hpacucli for new hosts. + -- Peter Palfrader Fri, 11 Sep 2015 10:21:15 +0200 dsa-nagios-checks (108) unstable; urgency=medium