dsa-check-soas: fix error when 0 (or more than 1) records returned
[mirror/dsa-nagios.git] / dsa-nagios-checks / checks / dsa-check-hpssacli
1 #!/usr/bin/perl -w
2
3 # check _physical_ disk status of disks on HP smart array controllers
4 # requires hpssacli
5 #
6 # does _not_ check raid status.  use arrayprobe for that.
7
8 # Copyright (c) 2008,2009,2010,2011 Peter Palfrader <peter@palfrader.org>
9 #
10 # Permission is hereby granted, free of charge, to any person obtaining
11 # a copy of this software and associated documentation files (the
12 # "Software"), to deal in the Software without restriction, including
13 # without limitation the rights to use, copy, modify, merge, publish,
14 # distribute, sublicense, and/or sell copies of the Software, and to
15 # permit persons to whom the Software is furnished to do so, subject to
16 # the following conditions:
17 #
18 # The above copyright notice and this permission notice shall be
19 # included in all copies or substantial portions of the Software.
20 #
21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
29 use strict;
30 use English;
31 use Getopt::Long;
32
33 # nagios exit codes
34 my %CODE = (
35         'OK'            => 0,
36         'WARNING'       => 1,
37         'CRITICAL'      => 2,
38         'UNKNOWN'       => 3
39 );
40
41 my $EXITCODE = 'OK';
42
43 $SIG{'__DIE__'} = sub {
44         print @_;
45         exit $CODE{'UNKNOWN'};
46 };
47
48 sub runcmd($) {
49         my ($cmd) = @_;
50         $cmd = "sudo hpssacli $cmd";
51         open(FH, $cmd."|") or die ("Cannot run $cmd: $!");
52         my @lines = <FH>;
53         close FH;
54         die ("no results from $cmd\n") if (scalar @lines == 0);
55         return \@lines;
56 }
57
58 sub record($) {
59         my ($newexit) = @_;
60         die "code $newexit not defined\n" unless defined $CODE{$newexit};
61
62         if ($CODE{$newexit} > $CODE{$EXITCODE}) {
63                 $EXITCODE = $newexit;
64         };
65 }
66
67 my $usage = "$PROGRAM_NAME: Usage: $PROGRAM_NAME [--no-battery] [--ignore-cache] [--ignore-controller=<regex>] [--no-controller-ok] [--ignore-transfer-speed=<pd> [--ignore-transfer-speed=<pd> ...]]\n";
68 my $params;
69 Getopt::Long::Configure('bundling');
70 if (!GetOptions (
71         '--help'                      => \$params->{'help'},
72         '--no-battery'                => \$params->{'no-battery'},
73         '--no-controller-ok'          => \$params->{'no-controller-ok'},
74         '--ignore-cache'              => \$params->{'ignore-cache'},
75         '--ignore-controller=s'       => \$params->{'ignore-controller'},
76         '--ignore-transfer-speed=s@'  => \$params->{'ignore-transfer-speed'},
77         )) {
78         die ($usage);
79 };
80 if ($params->{'help'}) {
81         print $usage;
82         exit (0);
83 };
84 die ($usage) unless (scalar @ARGV == 0);
85
86 my $ctrlallshow = runcmd("controller all show");
87 my @controllers;
88 for (@$ctrlallshow) {
89         chomp;
90         next if /^$/;
91         next if ($params->{'ignore-controller'} && /$params->{'ignore-controller'}/);
92         if (/in Slot ([0-9a-z]+)/) {
93                 push @controllers, $1;
94                 next;
95         };
96         die ("Cannot read line '$_' gotten from hpssacli controller all show\n");
97 };
98
99 if (scalar @controllers == 0) {
100         if ($params->{'no-controller-ok'}) {
101                 print "No smartarray controllers found with hpssacli\n";
102                 exit $CODE{'OK'}
103         } else {
104                 print "UNKNOWN: No smartarray controllers found with hpssacli\n";
105                 exit $CODE{'UNKNOWN'}
106         }
107 };
108
109 my @resultstr;
110
111 for my $slot (sort @controllers) {
112         my @drives;
113         my $nodrives = 0;
114         my %status;
115         my @freetext;
116
117         my $ldallshow = runcmd("controller slot=$slot ld all show");
118         my @logicaldrives;
119         for (@$ldallshow) {
120                 chomp;
121                 next if /^$/;
122                 next if (/^\S.*in Slot $slot/);
123                 next if /^ *array [A-Z]$/;
124                 if (/logicaldrive ([0-9a-z]+)/) {
125                         push @logicaldrives, $1;
126                         next;
127                 } elsif (/^Error: The specified device does not have any logical drives.$/) {
128                         $nodrives = 1;
129                 } else {
130                         die ("Cannot read line '$_' gotten from hpssacli controller slot = $slot logicaldrive all show\n");
131                 }
132         };
133
134         # check logicaldrives
135         for my $logicaldrive (sort @logicaldrives) {
136                 my $lds = runcmd("controller slot=$slot ld $logicaldrive show");
137                 for (@$lds) {
138                         chomp;
139                         next if /^$/;
140                         if (/^ *Parity Initialization Status: (Initialization Completed|Initialization Failed|Rebuilding)$/) {
141                                 my $status = $1;
142                                 if ($status eq 'Initialization Completed') {
143                                         push @{$status{'OK'}}, "Parity LD$logicaldrive";
144                                 } elsif ($status eq 'Rebuilding') {
145                                         push @{$status{'Failed'}}, "Parity LD$logicaldrive";
146                                         record('WARNING');
147                                 } elsif ($status eq 'Initialization Failed') {
148                                         push @{$status{'Failed'}}, "Parity LD$logicaldrive";
149                                         record('CRITICAL');
150                                 } else {
151                                         record('UNKNOWN');
152                                 }
153                         }
154                         if (/^ *LD Acceleration Method: (.*)$/) {
155                                 my $status = $1;
156                                 # can at least be "Controller Cache" or HP SSD Smart Path", both OK
157                                 if ($status eq 'All disabled') {
158                                         push @{$status{'Acceleration method'}}, "LD$logicaldrive disabled";
159                                         record('WARNING');
160                                 }
161                         }
162                 }
163         }
164
165         if (!$nodrives && scalar @logicaldrives == 0) {
166                 push @resultstr, "Slot $slot: unexpectedly, found no logical drives in list.";
167                 record('UNKNOWN');
168         } elsif ($nodrives && scalar keys %status > 0) {
169                 push @resultstr, "Slot $slot: have no logical drives but status results?";
170                 record('UNKNOWN');
171                 next;
172         } elsif ($nodrives) {
173                 push @resultstr, "Slot $slot: no logical drives";
174         };
175
176
177         my $pds = runcmd("controller slot=$slot pd all show");
178         for (@$pds) {
179                 chomp;
180                 next if /^$/;
181                 next if (/^\S.*in Slot $slot/);
182                 next if /^ *array [A-Z]$/;
183                 next if /^ *unassigned/;
184                 if (/^ *HBA Drives/) {
185                         # HBA mode implies no logical drives, thus reset the "drives found" check and proceed with
186                         # checking physical drives.
187                         $nodrives = 0;
188                         next;
189                 }
190                 if (/^ *(array [A-Z]) \(Failed\)$/) {
191                         record('CRITICAL');
192                         push @{$status{'Failed'}}, $1;
193                 } elsif (/^Error: The specified controller does not have any physical drives on it.$/) {
194                         $nodrives = 1;
195                 } elsif (/^ *physicaldrive (\S+) .* (OK|Predictive Failure|Failed|Rebuilding)(?:, (?:active )?spare.*)?\)$/) {
196                         my $drive = $1;
197                         my $status = $2;
198                         push @{$status{$status}}, $drive;
199                         if ($status eq 'OK') {
200                         } elsif ($status eq 'Predictive Failure' ||
201                                  $status eq 'Rebuilding') {
202                                 record('WARNING');
203                         } elsif ($status eq 'Failed') {
204                                 record('CRITICAL');
205                         } else {
206                                 record('UNKNOWN');
207                         };      
208                         push @drives, $drive;
209                 } else {
210                         die ("Cannot read line '$_' gotten from hpssacli controller slot=$slot pd all show\n");
211                 };
212         };
213
214         # Check that all drives have the proper transfer speed.
215         # sometimes stuff breaks and they fall back to 10mb/sec.
216         for my $drive (@drives) {
217                 # skip drives that are known to have failed
218                 next if (exists $status{'Failed'} && grep {$drive eq $_} @{$status{'Failed'}});
219                 my $type;
220                 if ($drive =~ /^[0-9]+:[0-9]+$/) { # scsi drives
221                         $type = 'SCSI';
222                 } elsif ($drive =~ /^[0-9]+[EI]:[0-9]+:[0-9]+$/) { # SAS
223                         $type = 'SAS';
224                 } elsif ($drive =~ /^[0-9]+[C]:[0-9]+:[0-9]+$/) { # New 6GBPS SAS
225                         $type = 'SAS+';
226                 } else {
227                         # I'm not going to run pass arguments of unknown form to the shell..
228                         warn ("Unknown diskdrive ID $drive\n");
229                         next;
230                 }
231
232                 my $pd = runcmd("controller slot=$slot pd $drive show");
233                 while (defined $pd->[0] && !($pd->[0] =~ /physicaldrive/)) {
234                         shift @$pd;
235                 };
236                 shift @$pd;
237                 my %value;
238                 for (@$pd) {
239                         if (m/^\s*(.*?):\s*(.*?)\s*$/) {
240                                 $value{$1} = $2;
241                         }
242                 }
243
244                 my $key;
245                 my $expected;
246                 if ($type eq 'SCSI') {
247                         $key = 'Transfer Speed';
248                         if (!defined $value{'Transfer Mode'}) {
249                                 record('WARNING');
250                                 push @{$status{'unknown transfer mode'}}, $drive;
251                                 next;
252                         } elsif ($value{'Transfer Mode'} eq 'Ultra 3 Wide') {
253                                 $expected = '160 MB/Sec';
254                         } elsif ($value{'Transfer Mode'} eq 'Ultra 320 Wide') {
255                                 $expected = '320 MB/Sec';
256                         } else {
257                                 record('WARNING');
258                                 push @{$status{'unknown transfer mode'}}, $drive."(".$value{'Transfer Mode'}.")";
259                                 next;
260                         };
261                 } elsif ($type eq 'SAS' || $type eq 'SAS+') {
262                         $key = 'PHY Transfer Rate';
263                         if ($value{'Interface Type'} eq 'SATA') {
264                                 $expected = [ '1.5Gbps', '3.0Gbps', '6.0Gbps' ];
265                         } elsif ($value{'PHY Count'} eq '2') {
266                                 if (defined($value{'Redundant Path(s)'})) {
267                                         $expected = [ '3.0GBPS, 3.0GBPS', '6.0GBPS, 6.0GBPS',
268                                                       '12.0GBPS, 12.0GBPS' ];
269                                 } else {
270                                         $expected = [ '3.0GBPS, Unknown', 'Unknown, 3.0GBPS',
271                                                       '6.0GBPS, Unknown', 'Unknown, 6.0GBPS',
272                                                       '12.0GBPS, Unknown', 'Unknown, 12.0GBPS' ];
273                                 }
274                         } else {
275                                 $expected = [ '3.0GBPS', '6.0GBPS', '12.0GBPS' ];
276                         }
277                 } else {
278                         warn "Should not be here.  Do not know what to do with type '$type'\n";
279                         next;
280                 }
281
282                 if ($params->{'ignore-transfer-speed'}) {
283                         if (grep { $drive eq $_ } @{$params->{'ignore-transfer-speed'}}) {
284                                 push @{$status{'ignored transfer speed'}}, $drive."(".$value{$key}.")";
285                                 next;
286                         };
287                 };
288                 if (!defined $value{$key}) {
289                         record('WARNING');
290                         push @{$status{'unknown transfer speed'}}, $drive;
291                 } elsif (ref($expected) eq 'ARRAY') {
292                         if (scalar(grep { uc($value{$key}) eq uc($_) } @$expected) == 0) {
293                                 record('WARNING');
294                                 push @{$status{'bad transfer speed'}}, $drive."(".$value{$key}.")";
295                         };
296                 } elsif (uc($value{$key}) ne uc($expected)) {
297                         record('WARNING');
298                         push @{$status{'bad transfer speed'}}, $drive."(".$value{$key}.")";
299                 };
300         };
301
302         if ($nodrives && scalar keys %status > 0) {
303                 push @resultstr, "Slot $slot: have no drives but status results?";
304                 record('UNKNOWN');
305                 next;
306         } elsif ($nodrives) {
307                 push @resultstr, "Slot $slot: no drives";
308                 next;
309         };
310
311         my $cst = runcmd("controller slot=$slot show detail");
312         for (@$cst) {
313                 chomp;
314                 next if /^$/;
315                 next if (/^\S.*in Slot $slot/);
316                 if (/^ *(Controller|Cache|Battery\/Capacitor) Status: (.*)$/) {
317                         my $system = $1;
318                         my $status = $2;
319
320                         if ($system eq 'Cache') {
321                                 # Can be:
322                                 # - 'OK'
323                                 # - 'Not Configured' (for e.g. HP SSD Smart Path)
324                                 # - 'Permanently Disabled'
325                                 # - ...?
326                                 next if $status =~ /^(OK|Not Configured)$/;
327                                 if ($params->{'ignore-cache'}) {
328                                         push @freetext, "$system: $status (ignored)";
329                                         next;
330                                 }
331                         }
332
333                         push @freetext, "$system: $status";
334                         if ($status ne 'OK') {
335                                 next if ($params->{'no-battery'} && $system eq 'Battery/Capacitor');
336                                 record('WARNING');
337                         };
338                 } elsif (/^ *(Cache Status Details): (Cable Error)/) {
339                         push @freetext, $2;
340                         record('CRITICAL');
341                 } elsif (/^ *(Battery\/Capacitor Count): (.*)/) {
342                         next if $params->{'no-battery'} || int($2) > 0;
343                         push @freetext, "Battery count: $2";
344                         record('CRITICAL');
345                 };
346         };
347
348         my $status = join(" - ", ((map { $_.": ".join(", ", @{$status{$_}}) } keys %status), @freetext));
349
350         push @resultstr, "Slot $slot: $status";
351 };
352
353 print "$EXITCODE: ", join(" --- ", @resultstr), "\n";
354 exit $CODE{$EXITCODE};