From: Faidon Liambotis Date: Wed, 26 Jun 2019 15:32:45 +0000 (+0000) Subject: dsa-check-hpssacli: refactor for speed/efficiency X-Git-Url: https://git.adam-barratt.org.uk/?p=mirror%2Fdsa-nagios.git;a=commitdiff_plain;h=85df8fa4003a665d815a5e32f0e7ad0dc7be6e1d dsa-check-hpssacli: refactor for speed/efficiency Currently dsa-check-hpssacli executes: hpssacli controller all show hpssacli controller slot=3 ld all show hpssacli controller slot=3 ld 1 show hpssacli controller slot=3 ld 2 show ... hpssacli controller slot=3 pd all show hpssacli controller slot=3 pd 1I:1:1 show hpssacli controller slot=3 pd 1I:1:2 show ... hpssacli controller slot=3 show detail On systems with e.g. 14 PDs/14 LDs, this results into 32 invocations, just to get the status of all these disks. On systems with more disks or controllers, even more. Refactor the code to simply do: hpssacli controller all show detail hpssacli controller slot=3 ld all show detail hpssacli controller slot=3 pd all show detail This is lightly tested, but seems to have no adverse effects and currently achieves a ~10x speed-up. Signed-off-by: Julien Cristau --- diff --git a/dsa-nagios-checks/checks/dsa-check-hpssacli b/dsa-nagios-checks/checks/dsa-check-hpssacli index 2982600..905435d 100755 --- a/dsa-nagios-checks/checks/dsa-check-hpssacli +++ b/dsa-nagios-checks/checks/dsa-check-hpssacli @@ -83,20 +83,49 @@ if ($params->{'help'}) { }; die ($usage) unless (scalar @ARGV == 0); -my $ctrlallshow = runcmd("controller all show"); -my @controllers; +my $ctrlallshow = runcmd("controller all show detail"); +my $slot; +my %controllers; for (@$ctrlallshow) { chomp; next if /^$/; next if ($params->{'ignore-controller'} && /$params->{'ignore-controller'}/); if (/in Slot ([0-9a-z]+)/) { - push @controllers, $1; - next; + $slot = $1; + $controllers{$slot} = (); + } elsif (/^ *(Controller|Cache|Battery\/Capacitor) Status: (.*)$/) { + my $system = $1; + my $status = $2; + + if ($system eq 'Cache') { + # Can be: + # - 'OK' + # - 'Not Configured' (for e.g. HP SSD Smart Path) + # - 'Permanently Disabled' + # - ...? + next if $status =~ /^(OK|Not Configured)$/; + if ($params->{'ignore-cache'}) { + push @{$controllers{$slot}}, "$system: $status (ignored)"; + next; + } + } + + push @{$controllers{$slot}}, "$system: $status"; + if ($status ne 'OK') { + next if ($params->{'no-battery'} && $system eq 'Battery/Capacitor'); + record('WARNING'); + }; + } elsif (/^ *(Cache Status Details): (Cable Error)/) { + push @{$controllers{$slot}}, $2; + record('CRITICAL'); + } elsif (/^ *(Battery\/Capacitor Count): (.*)/) { + next if $params->{'no-battery'} || int($2) > 0; + push @{$controllers{$slot}}, "Battery count: $2"; + record('CRITICAL'); }; - die ("Cannot read line '$_' gotten from hpssacli controller all show\n"); }; -if (scalar @controllers == 0) { +if (scalar keys %controllers == 0) { if ($params->{'no-controller-ok'}) { print "No smartarray controllers found with hpssacli\n"; exit $CODE{'OK'} @@ -108,56 +137,41 @@ if (scalar @controllers == 0) { my @resultstr; -for my $slot (sort @controllers) { - my @drives; +for my $slot (sort keys %controllers) { my $nodrives = 0; my %status; - my @freetext; - my $ldallshow = runcmd("controller slot=$slot ld all show"); + # check logicaldrives + my $logicaldrive; my @logicaldrives; - for (@$ldallshow) { + my $lds = runcmd("controller slot=$slot ld all show detail"); + for (@$lds) { chomp; next if /^$/; - next if (/^\S.*in Slot $slot/); - next if /^ *array [A-Z]$/; - if (/logicaldrive ([0-9a-z]+)/) { - push @logicaldrives, $1; - next; + if (/Logical Drive: ([0-9a-z]+)/) { + $logicaldrive = $1; + push @logicaldrives, $logicaldrive; } elsif (/^Error: The specified device does not have any logical drives.$/) { $nodrives = 1; - } else { - die ("Cannot read line '$_' gotten from hpssacli controller slot = $slot logicaldrive all show\n"); - } - }; - - # check logicaldrives - for my $logicaldrive (sort @logicaldrives) { - my $lds = runcmd("controller slot=$slot ld $logicaldrive show"); - for (@$lds) { - chomp; - next if /^$/; - if (/^ *Parity Initialization Status: (Initialization Completed|Initialization Failed|Rebuilding)$/) { - my $status = $1; - if ($status eq 'Initialization Completed') { - push @{$status{'OK'}}, "Parity LD$logicaldrive"; - } elsif ($status eq 'Rebuilding') { - push @{$status{'Failed'}}, "Parity LD$logicaldrive"; - record('WARNING'); - } elsif ($status eq 'Initialization Failed') { - push @{$status{'Failed'}}, "Parity LD$logicaldrive"; - record('CRITICAL'); - } else { - record('UNKNOWN'); - } + } elsif (/^ *Parity Initialization Status: (Initialization Completed|Initialization Failed|Rebuilding)$/) { + my $status = $1; + if ($status eq 'Initialization Completed') { + push @{$status{'OK'}}, "Parity LD$logicaldrive"; + } elsif ($status eq 'Rebuilding') { + push @{$status{'Failed'}}, "Parity LD$logicaldrive"; + record('WARNING'); + } elsif ($status eq 'Initialization Failed') { + push @{$status{'Failed'}}, "Parity LD$logicaldrive"; + record('CRITICAL'); + } else { + record('UNKNOWN'); } - if (/^ *LD Acceleration Method: (.*)$/) { - my $status = $1; - # can at least be "Controller Cache" or HP SSD Smart Path", both OK - if ($status eq 'All disabled') { - push @{$status{'Acceleration method'}}, "LD$logicaldrive disabled"; - record('WARNING'); - } + } elsif (/^ *LD Acceleration Method: (.*)$/) { + my $status = $1; + # can at least be "Controller Cache" or HP SSD Smart Path", both OK + if ($status eq 'All disabled') { + push @{$status{'Acceleration method'}}, "LD$logicaldrive disabled"; + record('WARNING'); } } } @@ -173,8 +187,9 @@ for my $slot (sort @controllers) { push @resultstr, "Slot $slot: no logical drives"; }; - - my $pds = runcmd("controller slot=$slot pd all show"); + my $pds = runcmd("controller slot=$slot pd all show detail"); + my $drive; + my %drives; for (@$pds) { chomp; next if /^$/; @@ -192,30 +207,34 @@ for my $slot (sort @controllers) { push @{$status{'Failed'}}, $1; } elsif (/^Error: The specified controller does not have any physical drives on it.$/) { $nodrives = 1; - } elsif (/^ *physicaldrive (\S+) .* (OK|Predictive Failure|Failed|Rebuilding)(?:, (?:active )?spare.*)?\)$/) { - my $drive = $1; - my $status = $2; - push @{$status{$status}}, $drive; - if ($status eq 'OK') { - } elsif ($status eq 'Predictive Failure' || - $status eq 'Rebuilding') { - record('WARNING'); - } elsif ($status eq 'Failed') { - record('CRITICAL'); - } else { - record('UNKNOWN'); - }; - push @drives, $drive; + } elsif (/^ *physicaldrive (\S+)/) { + $drive = $1; + $drives{$drive} = {}; + } elsif (defined $drive && m/^\s*(.*?):\s*(.*?)\s*$/) { + $drives{$drive}{$1} = $2; } else { die ("Cannot read line '$_' gotten from hpssacli controller slot=$slot pd all show\n"); - }; + } }; # Check that all drives have the proper transfer speed. # sometimes stuff breaks and they fall back to 10mb/sec. - for my $drive (@drives) { - # skip drives that are known to have failed - next if (exists $status{'Failed'} && grep {$drive eq $_} @{$status{'Failed'}}); + for my $drive (sort keys %drives) { + my $value = $drives{$drive}; + my $status = $value->{'Status'}; + push @{$status{$status}}, $drive; + if ($status eq 'OK') { + } elsif ($status eq 'Predictive Failure' || + $status eq 'Rebuilding') { + record('WARNING'); + } elsif ($status eq 'Failed') { + record('CRITICAL'); + # skip drives that are known to have failed + next; + } else { + record('UNKNOWN'); + } + my $type; if ($drive =~ /^[0-9]+:[0-9]+$/) { # scsi drives $type = 'SCSI'; @@ -224,46 +243,33 @@ for my $slot (sort @controllers) { } elsif ($drive =~ /^[0-9]+[C]:[0-9]+:[0-9]+$/) { # New 6GBPS SAS $type = 'SAS+'; } else { - # I'm not going to run pass arguments of unknown form to the shell.. warn ("Unknown diskdrive ID $drive\n"); next; } - my $pd = runcmd("controller slot=$slot pd $drive show"); - while (defined $pd->[0] && !($pd->[0] =~ /physicaldrive/)) { - shift @$pd; - }; - shift @$pd; - my %value; - for (@$pd) { - if (m/^\s*(.*?):\s*(.*?)\s*$/) { - $value{$1} = $2; - } - } - my $key; my $expected; if ($type eq 'SCSI') { $key = 'Transfer Speed'; - if (!defined $value{'Transfer Mode'}) { + if (!defined $value->{'Transfer Mode'}) { record('WARNING'); push @{$status{'unknown transfer mode'}}, $drive; next; - } elsif ($value{'Transfer Mode'} eq 'Ultra 3 Wide') { + } elsif ($value->{'Transfer Mode'} eq 'Ultra 3 Wide') { $expected = '160 MB/Sec'; - } elsif ($value{'Transfer Mode'} eq 'Ultra 320 Wide') { + } elsif ($value->{'Transfer Mode'} eq 'Ultra 320 Wide') { $expected = '320 MB/Sec'; } else { record('WARNING'); - push @{$status{'unknown transfer mode'}}, $drive."(".$value{'Transfer Mode'}.")"; + push @{$status{'unknown transfer mode'}}, $drive."(".$value->{'Transfer Mode'}.")"; next; }; } elsif ($type eq 'SAS' || $type eq 'SAS+') { $key = 'PHY Transfer Rate'; - if ($value{'Interface Type'} eq 'SATA') { + if ($value->{'Interface Type'} eq 'SATA') { $expected = [ '1.5Gbps', '3.0Gbps', '6.0Gbps' ]; - } elsif ($value{'PHY Count'} eq '2') { - if (defined($value{'Redundant Path(s)'})) { + } elsif ($value->{'PHY Count'} eq '2') { + if (defined($value->{'Redundant Path(s)'})) { $expected = [ '3.0GBPS, 3.0GBPS', '6.0GBPS, 6.0GBPS', '12.0GBPS, 12.0GBPS' ]; } else { @@ -281,21 +287,21 @@ for my $slot (sort @controllers) { if ($params->{'ignore-transfer-speed'}) { if (grep { $drive eq $_ } @{$params->{'ignore-transfer-speed'}}) { - push @{$status{'ignored transfer speed'}}, $drive."(".$value{$key}.")"; + push @{$status{'ignored transfer speed'}}, $drive."(".$value->{$key}.")"; next; }; }; - if (!defined $value{$key}) { + if (!defined $value->{$key}) { record('WARNING'); push @{$status{'unknown transfer speed'}}, $drive; } elsif (ref($expected) eq 'ARRAY') { - if (scalar(grep { uc($value{$key}) eq uc($_) } @$expected) == 0) { + if (scalar(grep { uc($value->{$key}) eq uc($_) } @$expected) == 0) { record('WARNING'); - push @{$status{'bad transfer speed'}}, $drive."(".$value{$key}.")"; + push @{$status{'bad transfer speed'}}, $drive."(".$value->{$key}.")"; }; - } elsif (uc($value{$key}) ne uc($expected)) { + } elsif (uc($value->{$key}) ne uc($expected)) { record('WARNING'); - push @{$status{'bad transfer speed'}}, $drive."(".$value{$key}.")"; + push @{$status{'bad transfer speed'}}, $drive."(".$value->{$key}.")"; }; }; @@ -308,44 +314,7 @@ for my $slot (sort @controllers) { next; }; - my $cst = runcmd("controller slot=$slot show detail"); - for (@$cst) { - chomp; - next if /^$/; - next if (/^\S.*in Slot $slot/); - if (/^ *(Controller|Cache|Battery\/Capacitor) Status: (.*)$/) { - my $system = $1; - my $status = $2; - - if ($system eq 'Cache') { - # Can be: - # - 'OK' - # - 'Not Configured' (for e.g. HP SSD Smart Path) - # - 'Permanently Disabled' - # - ...? - next if $status =~ /^(OK|Not Configured)$/; - if ($params->{'ignore-cache'}) { - push @freetext, "$system: $status (ignored)"; - next; - } - } - - push @freetext, "$system: $status"; - if ($status ne 'OK') { - next if ($params->{'no-battery'} && $system eq 'Battery/Capacitor'); - record('WARNING'); - }; - } elsif (/^ *(Cache Status Details): (Cable Error)/) { - push @freetext, $2; - record('CRITICAL'); - } elsif (/^ *(Battery\/Capacitor Count): (.*)/) { - next if $params->{'no-battery'} || int($2) > 0; - push @freetext, "Battery count: $2"; - record('CRITICAL'); - }; - }; - - my $status = join(" - ", ((map { $_.": ".join(", ", @{$status{$_}}) } keys %status), @freetext)); + my $status = join(" - ", ((map { $_.": ".join(", ", @{$status{$_}}) } keys %status), @{$controllers{$slot}})); push @resultstr, "Slot $slot: $status"; };