From d12a96228342f968b0bbd470d5cbdb71cc94081c Mon Sep 17 00:00:00 2001 From: Faidon Liambotis Date: Mon, 29 May 2017 20:17:26 +0300 Subject: [PATCH] raid/hpssacli: check for cable errors/no batteries We had a case in the wild where the following "show status" resulted in an OK: Controller Status: OK Cache Status: Permanently Disabled We already fixed the check to emit a WARNING when Cache Status is not "OK" or "Not Configured" in a previous commit, but it seems there's another thing we could check: "controller slot=N show detail". This had a few more values we could check, and specifically: Cache Status Details: Cable Error Battery/Capacitor Count: 0 Emit CRITICAL for the former, and CRITICAL for the latter if the count is 0 and the argument --no-battery hasn't been passed to us. This is untested on systems that have no battery by design -- hopefully, Cable Error won't be reported on these. Signed-off-by: Faidon Liambotis --- dsa-nagios-checks/checks/dsa-check-hpssacli | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/dsa-nagios-checks/checks/dsa-check-hpssacli b/dsa-nagios-checks/checks/dsa-check-hpssacli index d10874b..8ae7121 100755 --- a/dsa-nagios-checks/checks/dsa-check-hpssacli +++ b/dsa-nagios-checks/checks/dsa-check-hpssacli @@ -111,6 +111,7 @@ for my $slot (sort @controllers) { my @drives; my $nodrives = 0; my %status; + my @freetext; my $ldallshow = runcmd("controller slot=$slot ld all show"); my @logicaldrives; @@ -300,12 +301,12 @@ for my $slot (sort @controllers) { next; }; - my $cst = runcmd("controller slot=$slot show status"); + my $cst = runcmd("controller slot=$slot show detail"); for (@$cst) { chomp; next if /^$/; next if (/^\S.*in Slot $slot/); - if (/^ *(.*) Status: (.*)$/) { + if (/^ *(Controller|Cache|Battery\/Capacitor) Status: (.*)$/) { my $system = $1; my $status = $2; @@ -318,17 +319,23 @@ for my $slot (sort @controllers) { next if $status =~ /^(OK|Not Configured)$/; } - push @{$status{$status}}, $system; + push @freetext, "$system: $status"; if ($status ne 'OK') { next if ($params->{'no-battery'} && $system eq 'Battery/Capacitor'); record('WARNING'); }; - } else { - die ("Cannot read line '$_' gotten from hpssacli controller slot=$slot show status\n"); + } elsif (/^ *(Cache Status Details): (Cable Error)/) { + push @freetext, $2; + record('CRITICAL'); + } elsif (/^ *(Battery\/Capacitor Count): (.*)/) { + next if $params->{'no-battery'} || int($2) > 0; + push @freetext, "Battery count: $2"; + record('CRITICAL'); }; }; - my $status = join(" - ", (map { $_.": ".join(", ", @{$status{$_}}) } keys %status)); + my $status = join(" - ", ((map { $_.": ".join(", ", @{$status{$_}}) } keys %status), @freetext)); + push @resultstr, "Slot $slot: $status"; }; -- 2.20.1