Merge remote-tracking branch 'adsb/fordsa'
[mirror/dsa-nagios.git] / dsa-nagios-checks / checks / dsa-check-hpssacli
1 #!/usr/bin/perl -w
2
3 # check _physical_ disk status of disks on Smart Array controllers
4 # requires hpssacli or ssacli
5 #
6 # does _not_ check raid status.  use arrayprobe for that.
7
8 # Copyright (c) 2008,2009,2010,2011 Peter Palfrader <peter@palfrader.org>
9 #
10 # Permission is hereby granted, free of charge, to any person obtaining
11 # a copy of this software and associated documentation files (the
12 # "Software"), to deal in the Software without restriction, including
13 # without limitation the rights to use, copy, modify, merge, publish,
14 # distribute, sublicense, and/or sell copies of the Software, and to
15 # permit persons to whom the Software is furnished to do so, subject to
16 # the following conditions:
17 #
18 # The above copyright notice and this permission notice shall be
19 # included in all copies or substantial portions of the Software.
20 #
21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
29 use strict;
30 use English;
31 use Getopt::Long;
32
33 # nagios exit codes
34 my %CODE = (
35         'OK'            => 0,
36         'WARNING'       => 1,
37         'CRITICAL'      => 2,
38         'UNKNOWN'       => 3
39 );
40
41 my $EXITCODE = 'OK';
42
43 $SIG{'__DIE__'} = sub {
44         print @_;
45         exit $CODE{'UNKNOWN'};
46 };
47
48 # support both the older hpssacli and the newer ssacli
49 my $BIN;
50 if ($0 =~ /hpssacli/) {
51         $BIN = "hpssacli";
52 } else {
53         $BIN = "ssacli";
54 }
55
56 sub runcmd($) {
57         my ($cmd) = @_;
58         $cmd = "sudo $BIN $cmd";
59         open(FH, $cmd."|") or die ("Cannot run $cmd: $!");
60         my @lines = <FH>;
61         close FH;
62         die ("no results from $cmd\n") if (scalar @lines == 0);
63         return \@lines;
64 }
65
66 sub record($) {
67         my ($newexit) = @_;
68         die "code $newexit not defined\n" unless defined $CODE{$newexit};
69
70         if ($CODE{$newexit} > $CODE{$EXITCODE}) {
71                 $EXITCODE = $newexit;
72         };
73 }
74
75 my $usage = "$PROGRAM_NAME: Usage: $PROGRAM_NAME [--no-battery] [--ignore-cache] [--ignore-controller=<regex>] [--no-controller-ok] [--ignore-transfer-speed=<pd> [--ignore-transfer-speed=<pd> ...]]\n";
76 my $params;
77 Getopt::Long::Configure('bundling');
78 if (!GetOptions (
79         '--help'                      => \$params->{'help'},
80         '--no-battery'                => \$params->{'no-battery'},
81         '--no-controller-ok'          => \$params->{'no-controller-ok'},
82         '--ignore-cache'              => \$params->{'ignore-cache'},
83         '--ignore-controller=s'       => \$params->{'ignore-controller'},
84         '--ignore-transfer-speed=s@'  => \$params->{'ignore-transfer-speed'},
85         )) {
86         die ($usage);
87 };
88 if ($params->{'help'}) {
89         print $usage;
90         exit (0);
91 };
92 die ($usage) unless (scalar @ARGV == 0);
93
94 my $ctrlallshow = runcmd("controller all show detail");
95 my $slot;
96 my %controllers;
97 for (@$ctrlallshow) {
98         chomp;
99         next if /^$/;
100         next if ($params->{'ignore-controller'} && /$params->{'ignore-controller'}/);
101         if (/in Slot ([0-9a-z]+)/) {
102                 $slot = $1;
103                 $controllers{$slot} = ();
104         } elsif (/^ *(Controller|Cache|Battery\/Capacitor) Status: (.*)$/) {
105                 my $system = $1;
106                 my $status = $2;
107
108                 if ($system eq 'Cache') {
109                         # Can be:
110                         # - 'OK'
111                         # - 'Not Configured' (for e.g. HP SSD Smart Path)
112                         # - 'Permanently Disabled'
113                         # - ...?
114                         next if $status =~ /^(OK|Not Configured)$/;
115                         if ($params->{'ignore-cache'}) {
116                                 push @{$controllers{$slot}}, "$system: $status (ignored)";
117                                 next;
118                         }
119                 }
120
121                 push @{$controllers{$slot}}, "$system: $status";
122                 if ($status ne 'OK') {
123                         next if ($params->{'no-battery'} && $system eq 'Battery/Capacitor');
124                         record('WARNING');
125                 };
126         } elsif (/^ *(Cache Status Details): (Cable Error)/) {
127                 push @{$controllers{$slot}}, $2;
128                 record('CRITICAL');
129         } elsif (/^ *(Battery\/Capacitor Count): (.*)/) {
130                 next if $params->{'no-battery'} || int($2) > 0;
131                 push @{$controllers{$slot}}, "Battery count: $2";
132                 record('CRITICAL');
133         };
134 };
135
136 if (scalar keys %controllers == 0) {
137         if ($params->{'no-controller-ok'}) {
138                 print "No Smart Array controllers found with $BIN\n";
139                 exit $CODE{'OK'}
140         } else {
141                 print "UNKNOWN: No Smart Array controllers found with $BIN\n";
142                 exit $CODE{'UNKNOWN'}
143         }
144 };
145
146 my @resultstr;
147
148 for my $slot (sort keys %controllers) {
149         my $nodrives = 0;
150         my %status;
151
152         # check logicaldrives
153         my $logicaldrive;
154         my @logicaldrives;
155         my $lds = runcmd("controller slot=$slot ld all show detail");
156         for (@$lds) {
157                 chomp;
158                 next if /^$/;
159                 if (/Logical Drive: ([0-9a-z]+)/) {
160                         $logicaldrive = $1;
161                         push @logicaldrives, $logicaldrive;
162                 } elsif (/^Error: The specified device does not have any logical drives.$/) {
163                         $nodrives = 1;
164                 } elsif (/^ *Parity Initialization Status: (Initialization Completed|Initialization Failed|Rebuilding)$/) {
165                         my $status = $1;
166                         if ($status eq 'Initialization Completed') {
167                                 push @{$status{'OK'}}, "Parity LD$logicaldrive";
168                         } elsif ($status eq 'Rebuilding') {
169                                 push @{$status{'Failed'}}, "Parity LD$logicaldrive";
170                                 record('WARNING');
171                         } elsif ($status eq 'Initialization Failed') {
172                                 push @{$status{'Failed'}}, "Parity LD$logicaldrive";
173                                 record('CRITICAL');
174                         } else {
175                                 record('UNKNOWN');
176                         }
177                 } elsif (/^ *LD Acceleration Method: (.*)$/) {
178                         my $status = $1;
179                         # can at least be "Controller Cache" or HP SSD Smart Path", both OK
180                         if ($status eq 'All disabled') {
181                                 push @{$status{'Acceleration method'}}, "LD$logicaldrive disabled";
182                                 record('WARNING');
183                         }
184                 }
185         }
186
187         if (!$nodrives && scalar @logicaldrives == 0) {
188                 push @resultstr, "Slot $slot: unexpectedly, found no logical drives in list.";
189                 record('UNKNOWN');
190         } elsif ($nodrives && scalar keys %status > 0) {
191                 push @resultstr, "Slot $slot: have no logical drives but status results?";
192                 record('UNKNOWN');
193                 next;
194         } elsif ($nodrives) {
195                 push @resultstr, "Slot $slot: no logical drives";
196         };
197
198         my $pds = runcmd("controller slot=$slot pd all show detail");
199         my $drive;
200         my %drives;
201         for (@$pds) {
202                 chomp;
203                 next if /^$/;
204                 next if (/^\S.*in Slot $slot/);
205                 next if /^ *Array [A-Z]$/i;
206                 next if /^ *unassigned/;
207                 if (/^ *HBA Drives/) {
208                         # HBA mode implies no logical drives, thus reset the "drives found" check and proceed with
209                         # checking physical drives.
210                         $nodrives = 0;
211                         next;
212                 }
213                 if (/^ *(Array [A-Z]) \(Failed\)$/i) {
214                         record('CRITICAL');
215                         push @{$status{'Failed'}}, $1;
216                 } elsif (/^Error: The specified controller does not have any physical drives on it.$/) {
217                         $nodrives = 1;
218                 } elsif (/^ *physicaldrive (\S+)/) {
219                         $drive = $1;
220                         $drives{$drive} = {};
221                 } elsif (defined $drive && m/^\s*(.*?):\s*(.*?)\s*$/) {
222                         $drives{$drive}{$1} = $2;
223                 } else {
224                         die ("Cannot read line '$_' gotten from $BIN controller slot=$slot pd all show\n");
225                 }
226         };
227
228         # Check that all drives have the proper transfer speed.
229         # sometimes stuff breaks and they fall back to 10mb/sec.
230         for my $drive (sort keys %drives) {
231                 my $value = $drives{$drive};
232                 my $status = $value->{'Status'};
233                 push @{$status{$status}}, $drive;
234                 if ($status eq 'OK') {
235                 } elsif ($status eq 'Predictive Failure' ||
236                          $status eq 'Rebuilding') {
237                         record('WARNING');
238                 } elsif ($status eq 'Failed') {
239                         record('CRITICAL');
240                         # skip drives that are known to have failed
241                         next;
242                 } else {
243                         record('UNKNOWN');
244                 }
245
246                 my $type;
247                 if ($drive =~ /^[0-9]+:[0-9]+$/) { # scsi drives
248                         $type = 'SCSI';
249                 } elsif ($drive =~ /^[0-9]+[EI]:[0-9]+:[0-9]+$/) { # SAS
250                         $type = 'SAS';
251                 } elsif ($drive =~ /^[0-9]+[C]:[0-9]+:[0-9]+$/) { # New 6GBPS SAS
252                         $type = 'SAS+';
253                 } else {
254                         warn ("Unknown diskdrive ID $drive\n");
255                         next;
256                 }
257
258                 my $key;
259                 my $expected;
260                 if ($type eq 'SCSI') {
261                         $key = 'Transfer Speed';
262                         if (!defined $value->{'Transfer Mode'}) {
263                                 record('WARNING');
264                                 push @{$status{'unknown transfer mode'}}, $drive;
265                                 next;
266                         } elsif ($value->{'Transfer Mode'} eq 'Ultra 3 Wide') {
267                                 $expected = '160 MB/Sec';
268                         } elsif ($value->{'Transfer Mode'} eq 'Ultra 320 Wide') {
269                                 $expected = '320 MB/Sec';
270                         } else {
271                                 record('WARNING');
272                                 push @{$status{'unknown transfer mode'}}, $drive."(".$value->{'Transfer Mode'}.")";
273                                 next;
274                         };
275                 } elsif ($type eq 'SAS' || $type eq 'SAS+') {
276                         $key = 'PHY Transfer Rate';
277                         if ($value->{'Interface Type'} eq 'SATA') {
278                                 $expected = [ '1.5Gbps', '3.0Gbps', '6.0Gbps' ];
279                         } elsif ($value->{'PHY Count'} eq '2') {
280                                 if (defined($value->{'Redundant Path(s)'})) {
281                                         $expected = [ '3.0GBPS, 3.0GBPS', '6.0GBPS, 6.0GBPS',
282                                                       '12.0GBPS, 12.0GBPS' ];
283                                 } else {
284                                         $expected = [ '3.0GBPS, Unknown', 'Unknown, 3.0GBPS',
285                                                       '6.0GBPS, Unknown', 'Unknown, 6.0GBPS',
286                                                       '12.0GBPS, Unknown', 'Unknown, 12.0GBPS' ];
287                                 }
288                         } else {
289                                 $expected = [ '3.0GBPS', '6.0GBPS', '12.0GBPS' ];
290                         }
291                 } else {
292                         warn "Should not be here.  Do not know what to do with type '$type'\n";
293                         next;
294                 }
295
296                 if ($params->{'ignore-transfer-speed'}) {
297                         if (grep { $drive eq $_ } @{$params->{'ignore-transfer-speed'}}) {
298                                 push @{$status{'ignored transfer speed'}}, $drive."(".$value->{$key}.")";
299                                 next;
300                         };
301                 };
302                 if (!defined $value->{$key}) {
303                         record('WARNING');
304                         push @{$status{'unknown transfer speed'}}, $drive;
305                 } elsif (ref($expected) eq 'ARRAY') {
306                         if (scalar(grep { uc($value->{$key}) eq uc($_) } @$expected) == 0) {
307                                 record('WARNING');
308                                 push @{$status{'bad transfer speed'}}, $drive."(".$value->{$key}.")";
309                         };
310                 } elsif (uc($value->{$key}) ne uc($expected)) {
311                         record('WARNING');
312                         push @{$status{'bad transfer speed'}}, $drive."(".$value->{$key}.")";
313                 };
314         };
315
316         if ($nodrives && scalar keys %status > 0) {
317                 push @resultstr, "Slot $slot: have no drives but status results?";
318                 record('UNKNOWN');
319                 next;
320         } elsif ($nodrives) {
321                 push @resultstr, "Slot $slot: no drives";
322                 next;
323         };
324
325         my $status = join(" - ", ((map { $_.": ".join(", ", @{$status{$_}}) } keys %status), @{$controllers{$slot}}));
326
327         push @resultstr, "Slot $slot: $status";
328 };
329
330 print "$EXITCODE: ", join(" --- ", @resultstr), "\n";
331 exit $CODE{$EXITCODE};