Remove hildegard and alain (arm* buildds)
[mirror/dsa-nagios.git] / config / nagios-master.cfg
index 8710917..e70bdce 100644 (file)
@@ -90,6 +90,10 @@ servers:
     address: 129.143.57.177
     parents: gw-ubcece
     hostgroups: layer3-infrastructure
+  gw-leaseweb:
+    address: 185.17.185.190
+    parents: gw-ubcece
+    hostgroups: layer3-infrastructure
   gw-man-da:
     address: 82.195.75.126
     parents: gw-ubcece
@@ -202,10 +206,6 @@ servers:
     address: 217.140.96.56
     parents: gw-arm
     hostgroups: computers, hasbootfs, hassrvfs, porterbox, wheezy, deadslow
-  alain:
-    address: 217.140.96.58
-    parents: gw-arm
-    hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy, deadslow
   alwyn:
     address: 217.140.96.59
     parents: gw-arm
@@ -365,6 +365,12 @@ servers:
     address: 5.153.231.20
     parents: ganeti-bytemark
     hostgroups: computers, general, kvmdomains, wheezy, nfs-client, autofs
+  moszumanska:
+    address: 5.153.231.21
+    parents: ganeti-bytemark
+    contact_groups: alioth-admins
+    hostgroups: computers, general, wheezy, postgres91-hosts, apache2-hosts, acpid-hosts, apache-https, brokensamhain, no-bacula, bind9-hosts, xinetd-hosts, alioth, heavy-exim, spamd
+    no-servicegroups: true
   dillon:
     address: 5.153.231.22
     parents: ganeti-bytemark
@@ -477,6 +483,32 @@ servers:
     parents: gw-isc
     hostgroups: computers, service, apache2-hosts, rsyncd-hosts, acpid-hosts, dl360, hasorgfs, xinetd-hosts, wheezy, security_mirror, no-bacula
   # }}}
+  # {{{ gw-leaseweb
+  lw01:
+    address: 185.17.185.177
+    parents: gw-leaseweb
+    hostgroups: computers, service, acpid-hosts, wheezy, dl180
+  lw02:
+    address: 185.17.185.178
+    parents: gw-leaseweb
+    hostgroups: computers, service, acpid-hosts, wheezy, dl180
+  lw03:
+    address: 185.17.185.179
+    parents: gw-leaseweb
+    hostgroups: computers, service, acpid-hosts, wheezy, dl180
+  lw04:
+    address: 185.17.185.180
+    parents: gw-leaseweb
+    hostgroups: computers,  service, acpid-hosts, wheezy, dl180
+  lw05:
+    address: 185.17.185.181
+    parents: gw-leaseweb
+    hostgroups: computers,  service, acpid-hosts, wheezy, dl120, sw-raid
+  lw06:
+    address: 185.17.185.182
+    parents: gw-leaseweb
+    hostgroups: computers,  service, acpid-hosts, wheezy, dl120, sw-raid
+  # }}}
   # {{{ gw-karlsruhe
   zemlinsky:
     address: 129.143.160.6
@@ -581,7 +613,7 @@ servers:
   zani:
     address: 148.100.88.22
     parents: gw-marist
-    hostgroups: computers, buildd, hassrvfs, wheezy, incomingmailrelayed
+    hostgroups: computers, buildd, hassrvfs, wheezy, incomingmailrelayed, ping-suckers
   # }}}
   # {{{ gw-osuosl
   busoni:
@@ -920,10 +952,6 @@ servers:
     hostgroups: secondary-IPs
   # }}}
   # {{{ gw-ynic
-  hildegard:
-    address: 144.32.168.74
-    parents: gw-ynic
-    hostgroups: computers, hasbootfs, hassrvfs, armhf, wheezy, deadslow, buildd
   howells:
     address: 144.32.168.75
     parents: gw-ynic
@@ -1026,6 +1054,12 @@ hostgroups:
   dl585:
     alias: HP DL385 hosts
     private: 1
+  dl180:
+    alias: HP DL180
+    private: 1
+  dl120:
+    alias: HP DL120
+    private: 1
   sw-raid:
     alias: Hosts with Linux software raid
     private: 1
@@ -1177,15 +1211,17 @@ hostgroups:
     # i.e. no port 25
     private: 1
 
-  ntpsuckers:
-    alias: "hosts who's ntp offset is often unknown"
-    private: 1
-
   brokensamhain:
     alias: machines that can not run samhain
     private: 1
   high-RTT:
-    alias: machines with hight round trip times
+    alias: machines with high round trip times
+    private: 1
+  ping-suckers:
+    alias: machines that just suck at icmp
+    private: 1
+  alioth:
+    alias: machines that just are just awkward
     private: 1
 
   security_mirror:
@@ -1223,7 +1259,7 @@ services:
     name: PING
     check: "check_ping!350.0,20%!600.0,40%"
     hostgroups: pingable
-    excludehostgroups: layer3-infrastructure, high-RTT
+    excludehostgroups: layer3-infrastructure, high-RTT, ping-suckers
     normal_check_interval: 5
     max_check_attempts: 4
     retry_check_interval: 1
@@ -1234,6 +1270,13 @@ services:
     normal_check_interval: 5
     max_check_attempts: 4
     retry_check_interval: 1
+  -
+    name: PING
+    check: "check_ping!600.0,90%!900.0,95%"
+    hostgroups: ping-suckers
+    normal_check_interval: 5
+    max_check_attempts: 4
+    retry_check_interval: 1
   -
     name: PING
     check: "check_ping!2000.0,60%!3000.0,80%"
@@ -1432,7 +1475,7 @@ services:
     servicegroups: backup
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u bacula -C bacula-fd -a '/usr/sbin/bacula-fd -c /etc/bacula/bacula-fd.conf'"
     hostgroups: computers
-    excludehostgroups: freebsd
+    excludehostgroups: freebsd, alioth
   -
     name: process - bacula-fd
     servicegroups: backup
@@ -1627,7 +1670,7 @@ services:
     hostgroups: computers
     depends: process - ntpd
     excludehosts: ancina
-    excludehostgroups: ntpsuckers, deadslow
+    excludehostgroups: deadslow
     servicegroups: time
   #
   -
@@ -1665,11 +1708,13 @@ services:
     remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$"
     runfrom: lotti
     hostgroups: computers
+    excludehostgroups: alioth
   -
     name: remote logging on lully
     remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$"
     runfrom: lully
     hostgroups: computers
+    excludehostgroups: alioth
   -
     name: MQ connection on rainier
     remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa"
@@ -1677,6 +1722,7 @@ services:
     hostgroups: computers
     normal_check_interval:  60
     retry_check_interval: 15
+    excludehostgroups: alioth
   -
     name: MQ connection on rapoport
     remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa"
@@ -1684,6 +1730,7 @@ services:
     hostgroups: computers
     normal_check_interval:  60
     retry_check_interval: 15
+    excludehostgroups: alioth
  ### MAIL STUFF
  ###
   -
@@ -1819,23 +1866,23 @@ services:
   -
     name: process - weightd - master
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (master)'"
-    hostgroups: heavy-postfix
+    hostgroups: heavy-postfix, alioth
   -
     name: process - weightd - cache
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (cache)'"
-    hostgroups: heavy-postfix
+    hostgroups: heavy-postfix, alioth
     depends: process - weightd - master
   -
     name: process - weightd - child
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:50 -c 1: -u polw -a 'policyd-weight (child)'"
-    hostgroups: heavy-postfix
+    hostgroups: heavy-postfix, alioth
     depends: process - weightd - master
  ###
   -
     name: unwanted process - policyd-weight
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C policyd-weight"
     hostgroups: computers
-    excludehostgroups: heavy-postfix, deadslow
+    excludehostgroups: heavy-postfix, deadslow, alioth
 
 
  ###
@@ -1926,6 +1973,7 @@ services:
     nrpe: "/usr/lib/nagios/plugins/dsa-check-config"
     hostgroups: computers
     normal_check_interval: 60
+    excludehostgroups: alioth
   -
     name: setup - local hostname etc-hosts
     nrpe: 'if getent ahosts `hostname` | grep -q 127.0; then echo "Warning: local hostname resolves to 127/8 address"; exit 1; else echo "OK: Hostname resolves to non-127/8 address."; exit 0; fi'
@@ -1953,6 +2001,7 @@ services:
     name: process - unbound
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u unbound -C unbound -a '/usr/sbin/unbound'"
     hostgroups: unbound-hosts, squeeze, wheezy
+    excludehostgroups: alioth
  ###
   -
     name: process - uptimed
@@ -2035,7 +2084,7 @@ services:
     name: process - ud-replicated
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C ud-replicated -a '/usr/bin/python /usr/bin/ud-replicated'"
     hostgroups: computers
-    excludehostgroups: squeeze,freebsd
+    excludehostgroups: squeeze, freebsd, alioth
   -
     name: process - ud-replicated
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C python2.7 -a '/usr/bin/python /usr/bin/ud-replicated'"
@@ -2048,13 +2097,13 @@ services:
     name: process - monit
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C monit -a '/usr/bin/monit -d 300 -I -c /etc/monit/monitrc -s /var/lib/monit/monit.state'"
     hostgroups: computers
-    excludehostgroups: squeeze
+    excludehostgroups: squeeze, alioth
   -
     name: HW - hpacucli status
     servicegroups: raid
     nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli"
     normal_check_interval: 120
-    hostgroups: dl385, dl380, dl360, bl460
+    hostgroups: dl385, dl380, dl360, bl460, dl180
     excludehosts: schein, rietz
   -
     name: HW - hpacucli status
@@ -2081,6 +2130,13 @@ services:
     normal_check_interval: 120
     hostgroups: dl585
  ###
+  -
+    name: HW - edac status
+    nrpe: "/usr/lib/nagios/plugins/dsa-check-edac"
+    normal_check_interval: 120
+    hosts: lw05, lw06
+    #hostgroups: computers
+    #excludehosts: villa, lobos, senfl, schein
   -
     name: HW - hpasmcli status
     nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm"
@@ -2411,7 +2467,7 @@ services:
     name: process - stunnel4 - puppet-ekeyd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -u stunnel4 -C stunnel4 -a '/usr/bin/stunnel4 /etc/stunnel/puppet-ekeyd.conf'"
     hostgroups: squeeze, wheezy
-    excludehostgroups: freebsd
+    excludehostgroups: freebsd, alioth
  ####
   -
     name: process - UPS - nut usbhid-ups - ups1