Add current chroots check
[mirror/dsa-nagios.git] / config / nagios-master.cfg
index 7389cd2..3729b3d 100644 (file)
@@ -20,15 +20,13 @@ servers:
     pingable: false
     check_command: dsa_check_always_ok
   gw-1und1:
-    address: 195.20.247.50
     parents: gw-ubcece
-    hostgroups: layer3-infrastructure
+    hostgroups: notacomputer
     pingable: false
     check_command: dsa_check_always_ok
   gw-1und1-sec:
-    address: 212.227.121.194
     parents: gw-ubcece
-    hostgroups: layer3-infrastructure
+    hostgroups: notacomputer
     pingable: false
     check_command: dsa_check_always_ok
   gw-accumu:
@@ -161,7 +159,7 @@ servers:
   pkgmirror-1and1:
     address: 213.165.95.4
     parents: powell
-    hostgroups: computers, service, kvmdomains, wheezy, apache2-hosts, no-bacula
+    hostgroups: computers, service, kvmdomains, wheezy, apache2-hosts, no-bacula, apache-https
   babin:
     address: 213.165.95.6
     parents: powell
@@ -292,7 +290,7 @@ servers:
   picconi:
     address: 5.153.231.3
     parents: gw-bytemark
-    hostgroups: computers, service, kvmdomains, wheezy, apache2-hosts, nfs-client, autofs, heavy-exim, spamd
+    hostgroups: computers, service, kvmdomains, wheezy, apache2-hosts, nfs-client, autofs, heavy-exim, spamd, apache-https
   senfter:
     address: 5.153.231.4
     parents: gw-bytemark
@@ -370,7 +368,7 @@ servers:
   dillon:
     address: 5.153.231.22
     parents: ganeti-bytemark
-    hostgroups: computers, general, kvmdomains, wheezy, nfs-client, autofs
+    hostgroups: computers, general, kvmdomains, wheezy, nfs-client, autofs, hassrvfs
   ticharich:
     address: 5.153.231.23
     parents: ganeti-bytemark
@@ -430,18 +428,18 @@ servers:
     hostgroups: computers, sw-raid, hassrvfs, wheezy
   # }}}
   # {{{ gw-ftcollins
-  alkman:
-    address: 192.25.206.63
-    parents: gw-ftcollins
-    hostgroups: computers, buildd, acpid-hosts, wheezy
-  merulo:
-    address: 192.25.206.58
-    parents: gw-ftcollins
-    hostgroups: computers, porterbox, hasusrfs, wheezy
-  mundy:
-    address: 192.25.206.62
-    parents: gw-ftcollins
-    hostgroups: computers, buildd, hassrvfs, sw-raid, acpid-hosts, wheezy
+  #alkman:
+  #  address: 192.25.206.63
+  #  parents: gw-ftcollins
+  #  hostgroups: computers, buildd, acpid-hosts, wheezy
+  #merulo:
+  #  address: 192.25.206.58
+  #  parents: gw-ftcollins
+  #  hostgroups: computers, porterbox, hasusrfs, wheezy
+  #mundy:
+  #  address: 192.25.206.62
+  #  parents: gw-ftcollins
+  #  hostgroups: computers, buildd, hassrvfs, sw-raid, acpid-hosts, wheezy
   spohr:
     address: 192.25.206.33
     parents: gw-ftcollins
@@ -597,7 +595,7 @@ servers:
   buxtehude:
     address: 140.211.166.26
     parents: byrd
-    hostgroups: computers, service, hassrvfs, acpid-hosts, apache2-hosts, heavy-exim, postgres91-hosts, wheezy, hasvarlogfs
+    hostgroups: computers, service, hassrvfs, acpid-hosts, apache2-hosts, heavy-exim, postgres91-hosts, wheezy, hasvarlogfs, apache-https
   # malo TODO
   mayer:
     address: 140.211.166.78
@@ -667,6 +665,10 @@ servers:
     address: 86.59.118.152
     parents: gw-sil
     hostgroups: computers, buildd, wheezy
+  eberlin:
+    address: 86.59.118.155
+    parents: gw-sil
+    hostgroups: computers, buildd, wheezy
   # }}}
   # {{{ gw-ubcece
   sw-ubcece:
@@ -743,10 +745,6 @@ servers:
     address: 206.12.19.13
     parents: sw-ubcece-kais
     hostgroups: computers, hashomefs, sw-raid, rsyncd-hosts, apache2-hosts, xinetd-hosts, service, nfs-server, squeeze, hassrvfs
-  paganini:
-    address: 206.12.19.10
-    parents: sw-ubcece-kais
-    hostgroups: computers, hasbootfs, aacraid, hassrvfs, nfs-client, service, squeeze, autofs
   respighi:
     address: 206.12.19.11
     parents: sw-ubcece-kais
@@ -872,7 +870,7 @@ servers:
   beach:
     address: 206.12.19.140
     parents: ganeti2
-    hostgroups: computers, service, kvmdomains, wheezy, apache2-hosts, xinetd-hosts, hassrvfs, nfs-server, rsyncd-hosts, no-bacula
+    hostgroups: computers, service, kvmdomains, wheezy, apache2-hosts, xinetd-hosts, hassrvfs, nfs-server, rsyncd-hosts, no-bacula, apache-https
   ullmann:
     address: 206.12.19.141
     parents: ganeti2
@@ -885,10 +883,6 @@ servers:
     address: 206.12.19.143
     parents: ganeti2
     hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, apache2-hosts, apache-https
-  stanley:
-    address: 206.12.19.145
-    parents: ganeti2
-    hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, apache2-hosts, no-bacula
   muffat:
     address: 206.12.19.146
     parents: ganeti2
@@ -1250,6 +1244,7 @@ services:
 
  ############ Disk Usage ############
  ####
+
   -
     name: disk usage - all
     servicegroups: diskspace
@@ -1349,9 +1344,9 @@ services:
     nrpe: "/usr/lib/nagios/plugins/check_disk 97 95 /srv/farm-snapshot/farm-misc"
     hosts: sibelius
   -
-    name: disk usage on /var/lib/postgresql/9.1/dak
+    name: disk usage on /var/lib/postgresql/9.1
     servicegroups: diskspace
-    nrpe: "/usr/lib/nagios/plugins/check_disk 75 85 /var/lib/postgresql/9.1/dak"
+    nrpe: "/usr/lib/nagios/plugins/check_disk 75 85 /var/lib/postgresql/9.1"
     hosts: franck
   -
     name: disk usage on /srv/ftp-master.debian.org
@@ -1541,6 +1536,12 @@ services:
     nrpe: "/usr/lib/nagios/plugins/dsa-check-uptime"
     hostgroups: computers
  ####
+  -
+    name: processes - samhain zombies
+    nrpe: "/usr/lib/nagios/plugins/check_procs 3 6 -s Z -u root -a samhain"
+    event_handler: dsa_event_handler_restart_samhain
+    hostgroups: computers
+    excludehostgroups: brokensamhain
   -
     name: processes - zombies
     nrpe: "/usr/lib/nagios/plugins/check_procs 5 10 -s Z"
@@ -1666,6 +1667,20 @@ services:
     remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$"
     runfrom: lully
     hostgroups: computers
+  -
+    name: MQ connection on rainier
+    remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa"
+    runfrom: rainier
+    hostgroups: computers
+    normal_check_interval:  60
+    retry_check_interval: 15
+  -
+    name: MQ connection on rapoport
+    remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa"
+    runfrom: rapoport
+    hostgroups: computers
+    normal_check_interval:  60
+    retry_check_interval: 15
  ### MAIL STUFF
  ###
   -
@@ -1913,10 +1928,6 @@ services:
     nrpe: 'if getent ahosts `hostname` | grep -q 127.0; then echo "Warning: local hostname resolves to 127/8 address"; exit 1; else echo "OK: Hostname resolves to non-127/8 address."; exit 0; fi'
     hostgroups: computers
     normal_check_interval: 60
-  -
-    name: setup - ud-ldap freshness
-    nrpe: "/usr/lib/nagios/plugins/dsa-check-udldap-freshness"
-    hostgroups: computers
   -
     name: system - available entropy
     nrpe: "/usr/lib/nagios/plugins/dsa-check-entropy"
@@ -1988,10 +1999,6 @@ services:
     name: "host SSL cert"
     nrpe: "if [ -e /etc/ssl/certs/thishost.pem ]; then /usr/lib/nagios/plugins/dsa-check-cert-expire /etc/ssl/certs/thishost.pem; else echo 'No thishost.pem on this host.'; fi"
     hostgroups: computers
-  -
-    name: "pg SSL cert"
-    nrpe: "/usr/lib/nagios/plugins/dsa-check-cert-expire /etc/ssl/certs/pg-ubcece.debian.org-chained.pem"
-    hosts: danzi
 
  ############ Processes/Services that only run on some computers ############
  ####
@@ -2021,6 +2028,15 @@ services:
     hostgroups: sw-raid
 
  ###
+  -
+    name: process - ud-replicated
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C ud-replicated -a '/usr/bin/python /usr/bin/ud-replicated'"
+    hostgroups: computers
+    excludehostgroups: squeeze,freebsd
+  -
+    name: process - ud-replicated
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C python2.7 -a '/usr/bin/python /usr/bin/ud-replicated'"
+    hostgroups: freebsd
   -
     name: process - monit
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C monit -a '/usr/sbin/monit -d 300 -I -c /etc/monit/monitrc -s /var/lib/monit/monit.state'"
@@ -2372,7 +2388,7 @@ services:
     hostgroups: computers
   -
     name: process - postgresql91 - master
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:4 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.1/bin/postgres'"
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.1/bin/postgres'"
     hostgroups: postgres91-hosts
   -
     name: postgresql backups
@@ -2485,6 +2501,13 @@ services:
 
  ############ MISC OTHER Stuff ############
  #####
+  -
+    name: puppetmaster cert
+    nrpe: "/usr/lib/nagios/plugins/dsa-check-cert-expire /var/lib/puppet/ssl/certs/ca.pem"
+    hosts: handel
+    normal_check_interval: 60
+    max_check_attempts: 2
+    retry_check_interval: 5
   -
     name: mirror sync - bugs
     check: "dsa_check_mirrorsync_skew!bugs.debian.org!project/trace/bugs-master.debian.org!120:600"
@@ -2571,5 +2594,12 @@ services:
     remotecheck: "/usr/lib/nagios/plugins/dsa-check-msa-eventlog --start=7778 $HOSTADDRESS$ public"
     runfrom: dijkstra
     hosts: giustini
+ ############
+  -
+    name: current chroots
+    remotecheck: "/usr/lib/nagios/plugins/dsa-check-dchroots-current"
+    hostgroups: porterbox
+    normal_check_interval:  60
+    retry_check_interval: 15
 
 # vim: set ts=2 sw=2 et ai si fdm=marker: