Merge branch 'master' of ssh://db.debian.org/git/dsa-nagios
authorStephen Gran <steve@lobefin.net>
Tue, 15 Apr 2014 15:54:52 +0000 (16:54 +0100)
committerStephen Gran <steve@lobefin.net>
Tue, 15 Apr 2014 15:54:52 +0000 (16:54 +0100)
config/nagios-master.cfg
dsa-nagios-checks/checks/dsa-check-edac [new file with mode: 0755]
dsa-nagios-checks/checks/dsa-check-hpacucli
dsa-nagios-checks/checks/dsa-check-running-kernel
dsa-nagios-checks/checks/dsa-check-zone-rrsig-expiration-many
dsa-nagios-checks/debian/changelog

index 1ecdaff..e70bdce 100644 (file)
@@ -90,6 +90,10 @@ servers:
     address: 129.143.57.177
     parents: gw-ubcece
     hostgroups: layer3-infrastructure
+  gw-leaseweb:
+    address: 185.17.185.190
+    parents: gw-ubcece
+    hostgroups: layer3-infrastructure
   gw-man-da:
     address: 82.195.75.126
     parents: gw-ubcece
@@ -202,10 +206,6 @@ servers:
     address: 217.140.96.56
     parents: gw-arm
     hostgroups: computers, hasbootfs, hassrvfs, porterbox, wheezy, deadslow
-  alain:
-    address: 217.140.96.58
-    parents: gw-arm
-    hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy, deadslow
   alwyn:
     address: 217.140.96.59
     parents: gw-arm
@@ -483,6 +483,32 @@ servers:
     parents: gw-isc
     hostgroups: computers, service, apache2-hosts, rsyncd-hosts, acpid-hosts, dl360, hasorgfs, xinetd-hosts, wheezy, security_mirror, no-bacula
   # }}}
+  # {{{ gw-leaseweb
+  lw01:
+    address: 185.17.185.177
+    parents: gw-leaseweb
+    hostgroups: computers, service, acpid-hosts, wheezy, dl180
+  lw02:
+    address: 185.17.185.178
+    parents: gw-leaseweb
+    hostgroups: computers, service, acpid-hosts, wheezy, dl180
+  lw03:
+    address: 185.17.185.179
+    parents: gw-leaseweb
+    hostgroups: computers, service, acpid-hosts, wheezy, dl180
+  lw04:
+    address: 185.17.185.180
+    parents: gw-leaseweb
+    hostgroups: computers,  service, acpid-hosts, wheezy, dl180
+  lw05:
+    address: 185.17.185.181
+    parents: gw-leaseweb
+    hostgroups: computers,  service, acpid-hosts, wheezy, dl120, sw-raid
+  lw06:
+    address: 185.17.185.182
+    parents: gw-leaseweb
+    hostgroups: computers,  service, acpid-hosts, wheezy, dl120, sw-raid
+  # }}}
   # {{{ gw-karlsruhe
   zemlinsky:
     address: 129.143.160.6
@@ -926,10 +952,6 @@ servers:
     hostgroups: secondary-IPs
   # }}}
   # {{{ gw-ynic
-  hildegard:
-    address: 144.32.168.74
-    parents: gw-ynic
-    hostgroups: computers, hasbootfs, hassrvfs, armhf, wheezy, deadslow, buildd
   howells:
     address: 144.32.168.75
     parents: gw-ynic
@@ -1032,6 +1054,12 @@ hostgroups:
   dl585:
     alias: HP DL385 hosts
     private: 1
+  dl180:
+    alias: HP DL180
+    private: 1
+  dl120:
+    alias: HP DL120
+    private: 1
   sw-raid:
     alias: Hosts with Linux software raid
     private: 1
@@ -2075,7 +2103,7 @@ services:
     servicegroups: raid
     nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli"
     normal_check_interval: 120
-    hostgroups: dl385, dl380, dl360, bl460
+    hostgroups: dl385, dl380, dl360, bl460, dl180
     excludehosts: schein, rietz
   -
     name: HW - hpacucli status
@@ -2102,6 +2130,13 @@ services:
     normal_check_interval: 120
     hostgroups: dl585
  ###
+  -
+    name: HW - edac status
+    nrpe: "/usr/lib/nagios/plugins/dsa-check-edac"
+    normal_check_interval: 120
+    hosts: lw05, lw06
+    #hostgroups: computers
+    #excludehosts: villa, lobos, senfl, schein
   -
     name: HW - hpasmcli status
     nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm"
diff --git a/dsa-nagios-checks/checks/dsa-check-edac b/dsa-nagios-checks/checks/dsa-check-edac
new file mode 100755 (executable)
index 0000000..e74795a
--- /dev/null
@@ -0,0 +1,38 @@
+#! /bin/bash
+#
+# Copyright 2014 Peter Palfrader
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+out=$(edac-util -q 2>&1)
+rc=$?
+
+if [ "$rc" != 0 ]; then
+       echo "UNKNOWN: edac-util failed: $out"
+       exit -1
+fi
+if [ -z "$out" ]; then
+       echo "OK: edac-util reported no errors"
+       exit 0
+else
+       echo "WARNING: edac-util reported errors."
+       echo "$out"
+       exit 1
+fi
index 8bbb7eb..e9a299e 100755 (executable)
@@ -108,10 +108,63 @@ if (scalar @controllers == 0) {
 my @resultstr;
 
 for my $slot (sort @controllers) {
-       my $pds = runcmd("controller slot=$slot pd all show");
        my @drives;
        my $nodrives = 0;
        my %status;
+
+       my $ldallshow = runcmd("controller slot=$slot ld all show");
+       my @logicaldrives;
+       for (@$ldallshow) {
+               chomp;
+               next if /^$/;
+               next if (/^\S.*in Slot $slot/);
+               next if /^ *array [A-Z]$/;
+               if (/logicaldrive ([0-9a-z]+)/) {
+                       push @logicaldrives, $1;
+                       next;
+               } elsif (/^Error: The specified device does not have any logical drives.$/) {
+                       $nodrives = 1;
+               } else {
+                       die ("Cannot read line '$_' gotten from hpacucli controller slot = $slot logicaldrive all show\n");
+               }
+       };
+
+       # check logicaldrives
+       for my $logicaldrive (sort @logicaldrives) {
+               my $lds = runcmd("controller slot=$slot ld $logicaldrive show");
+               for (@$lds) {
+                       chomp;
+                       next if /^$/;
+                       if (/^ *Parity Initialization Status: (Initialization Completed|Initialization Failed|Rebuilding)$/) {
+                               my $status = $1;
+                               if ($status eq 'Initialization Completed') {
+                                       push @{$status{'OK'}}, "Parity LD$logicaldrive";
+                               } elsif ($status eq 'Rebuilding') {
+                                       push @{$status{'Failed'}}, "Parity LD$logicaldrive";
+                                       record('WARNING');
+                               } elsif ($status eq 'Initialization Failed') {
+                                       push @{$status{'Failed'}}, "Parity LD$logicaldrive";
+                                       record('CRITICAL');
+                               } else {
+                                       record('UNKNOWN');
+                               }
+                       }
+               }
+       }
+
+       if (!$nodrives && scalar @logicaldrives == 0) {
+               push @resultstr, "Slot $slot: unexpectedly, found no logical drives in list.";
+               record('UNKNOWN');
+       } elsif ($nodrives && scalar keys %status > 0) {
+               push @resultstr, "Slot $slot: have no logical drives but status results?";
+               record('UNKNOWN');
+               next;
+       } elsif ($nodrives) {
+               push @resultstr, "Slot $slot: no logical drives";
+       };
+
+
+       my $pds = runcmd("controller slot=$slot pd all show");
        for (@$pds) {
                chomp;
                next if /^$/;
@@ -191,7 +244,9 @@ for my $slot (sort @controllers) {
                        };
                } elsif ($type eq 'SAS' || $type eq 'SAS+') {
                        $key = 'PHY Transfer Rate';
-                       if ($value{'PHY Count'} eq '2') {
+                       if ($value{'Interface Type'} eq 'SATA') {
+                               $expected = [ '1.5Gbps' ];
+                       } elsif ($value{'PHY Count'} eq '2') {
                                if (defined($value{'Redundant Path(s)'})) {
                                        $expected = [ '3.0GBPS, 3.0GBPS', '6.0GBPS, 6.0GBPS' ];
                                } else {
index ccdfd80..ca4170e 100755 (executable)
@@ -3,7 +3,7 @@
 # Check if the running kernel has the same version string as the on-disk
 # kernel image.
 
-# Copyright 2008,2009,2011 Peter Palfrader
+# Copyright 2008,2009,2011,2012,2013,2014 Peter Palfrader
 # Copyright 2009 Stephen Gran
 # Copyright 2010,2012,2013 Uli Martens
 # Copyright 2011 Alexander Reichle-Schmehl
@@ -37,9 +37,11 @@ get_offset() {
 
        file="$1"
        needle="$2"
+       pos="$3"
+
        perl -e '
                undef $/;
-               $i = index(<>, "'"$needle"'");
+               $i = index(<>, "'"$needle"'", '"$pos"');
                if ($i < 0) {
                        exit 1;
                };
@@ -128,18 +130,22 @@ cat_vmlinux() {
        header="$2"
        filter="$3"
        hdroff="$4"
+       nextoff=0
+
+       while : ; do
+               off=`get_offset "$image" $header $nextoff`
+               local ret="$?"
+               if [ "$ret" != 0 ]; then
+                       # not found, exit
+                       return 1
+               fi
 
-       off=`get_offset "$image" $header`
-       local ret="$?"
-       if [ "$ret" != 0 ]; then
-               # not found, exit
-               return 1
-       fi
-
-       (if [ "$off" != 0 ]; then
-          dd ibs="$((off+hdroff))" skip=1 count=0
-        fi &&
-        dd bs=512k) < "$image"  2>/dev/null | $filter 2>/dev/null
+               (if [ "$off" != 0 ]; then
+                  dd ibs="$((off+hdroff))" skip=1 count=0
+                fi &&
+                dd bs=512k) < "$image"  2>/dev/null | $filter 2>/dev/null
+               nextoff=$((off + 1))
+       done
        return 0
 }
 
@@ -149,13 +155,13 @@ get_image_linux() {
        image="$1"
 
        # gzip compressed image
-       if cat_vmlinux "$image" "\x1f\x8b\x08\x00"      "zcat"   0; then return; fi
-       if cat_vmlinux "$image" "\x1f\x8b\x08\x08"      "zcat"   0; then return; fi
+       cat_vmlinux "$image" "\x1f\x8b\x08\x00"      "zcat"   0
+       cat_vmlinux "$image" "\x1f\x8b\x08\x08"      "zcat"   0
        # lzma compressed image
-       if cat_vmlinux "$image" "\x00\x00\x00\x02\xff"  "xzcat" -1; then return; fi
-       if cat_vmlinux "$image" "\x00\x00\x00\x04\xff"  "xzcat" -1; then return; fi
+       cat_vmlinux "$image" "\x00\x00\x00\x02\xff"  "xzcat" -1
+       cat_vmlinux "$image" "\x00\x00\x00\x04\xff"  "xzcat" -1
        # xz compressed image
-       if cat_vmlinux "$image" "\xfd\x37\x7a\x58\x5a " "xzcat"  0; then return; fi
+       cat_vmlinux "$image" "\xfd\x37\x7a\x58\x5a " "xzcat"  0
 
        echo "ERROR: Unable to extract kernel image." 2>&1
        exit 1
index 397bb83..ec97557 100755 (executable)
@@ -187,7 +187,7 @@ for my $k (keys %$count) {
        @{$count->{$k}} = sort {$a cmp $b} @{$count->{$k}};
 }
 
-my $exit;
+my $exit = 0;
 my %state_mapping = (
        'unknown' => 255,
        'critical' => 2,
index c6b718b..ad83be0 100644 (file)
@@ -1,8 +1,21 @@
-dsa-nagios-checks (99) UNRELEASED; urgency=low
+dsa-nagios-checks (100) UNRELEASED; urgency=low
 
-  * 
+  [ Peter Palfrader ]
+  * dsa-check-running-kernel: Check from all compression tokens.
+  * dsa-check-zone-rrsig-expiration-many: Initialize exit properly, in case
+    it does not get set later.
+
+  [ George Kargiotakis ]
+  * dsa-check-hpacucli: Add Parity check for logical drives.
+
+ -- Peter Palfrader <weasel@debian.org>  Fri, 04 Apr 2014 18:55:33 +0200
+
+dsa-nagios-checks (99) unstable; urgency=low
+
+  * dsa-check-hpacucli: SATA drives have different transfer speed.
+  * dsa-check-edac: add.
 
- -- Peter Palfrader <weasel@debian.org>  Thu, 13 Mar 2014 09:49:55 +0100
+ -- Peter Palfrader <weasel@debian.org>  Mon, 31 Mar 2014 10:45:57 +0200
 
 dsa-nagios-checks (98) unstable; urgency=low