[project @ peter@palfrader.org-20080406164112-dl81rh2h3iecdveq]
[mirror/dsa-nagios.git] / nagios-master.cfg
index 0d48fe6..5817e32 100644 (file)
 #  - puccini: mailgraph
 #  -
 #  - agnesi
-#  - casals
-#  - crest
-#  - elara
-#  - europa
 #  - kassia
-#  - kullervo
 #  - lebrun
 #  - murphy
 #  - piatti
 #  - tartini
 #sarge:
-#  - caballero
 #  - spontini
 
 # down:
@@ -115,6 +109,18 @@ servers:
     address: 195.20.247.53
     parents: gw-HP-ftc
     hostgroups: routing-infrastructure
+  gw-blackcat:
+    address: 193.201.200.129
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
+  gw-xandros:
+    address: 142.46.212.33
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
+  gw-nmmn:
+    address: 217.114.76.81
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
 
   samosa:
     address: 192.25.206.57
@@ -126,7 +132,7 @@ servers:
   gluck:
     address: 192.25.206.10
     parents: samosa
-    hostgroups: computers, general, dl380, apache1-hosts, bind9-hosts, rsyncd-hosts, heavy-exim
+    hostgroups: computers, general, dl380, apache1-hosts, bind9-hosts, rsyncd-hosts, heavy-exim, highload
   merkel:
     address: 192.25.206.16
     parents: samosa
@@ -198,7 +204,7 @@ servers:
   rietz:
     address: 140.211.166.43
     parents: gw-osuosl
-    hostgroups: computers, service, apache2-hosts, bind9-hosts, rsyncd-hosts, dl385, heavy-exim
+    hostgroups: computers, service, apache2-hosts, bind9-hosts, rsyncd-hosts, dl385, heavy-exim, highload
   rietz2:
     address: 140.211.166.44
     parents: rietz
@@ -217,6 +223,10 @@ servers:
     address: 128.31.0.36
     parents: gw-mit-csail
     hostgroups: computers, service, apache2-hosts, ftpd-hosts, rsyncd-hosts, sw-raid
+  casals:
+    address: 128.31.0.16
+    parents: gw-mit-csail
+    hostgroups: deadslow
 
   klecker:
     address: 194.109.137.218
@@ -274,7 +284,31 @@ servers:
   puccini:
     address: 87.106.4.56
     parents: gw-frost
-    hostgroups: computers, service, apache2-hosts, bind9-hosts, postfix-hosts, heavy-postfix
+    hostgroups: computers, service, apache2-hosts, bind9-hosts, postfix-hosts, heavy-postfix, amavis-hosts
+
+  caballero:
+    address: 193.201.200.200
+    parents: gw-blackcat
+    hostgroups: computers, buildd, sw-raid
+
+  elara:
+    address: 142.46.212.46
+    parents: gw-xandros
+    hostgroups: deadslow
+  europa:
+    address: 142.46.212.46
+    parents: gw-xandros
+    hostgroups: deadslow
+
+  kullervo:
+    address: 217.114.76.82
+    parents: gw-nmmn
+    hostgroups: deadslow
+  crest:
+    address: 217.114.76.83
+    parents: gw-nmmn
+    hostgroups: deadslow
+
 
 #############################
 # host groups
@@ -290,6 +324,8 @@ hostgroups:
     alias: Internet routers and friends
     extinfo-icon_image: base/switch40.png
     extinfo-icon_image_alt: router
+  deadslow:
+    alias: Systems too slow to run any real checks
 
   porterbox:
     alias: developer accessible porter machines
@@ -359,6 +395,10 @@ hostgroups:
     alias: hosts running postgres81
     private: 1
 
+  highload:
+    alias: "hosts on which high load is normal"
+    private: 1
+
   secondary-IPs:
     alias: secondary IP addresses
     private: 1
@@ -383,7 +423,7 @@ services:
     hostgroups: computers
   -
     name: disk usage on /
-    nrpe: "/usr/lib/nagios/plugins/check_disk 60 80 /"
+    nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /"
     hostgroups: computers
   -
     name: disk usage on /boot
@@ -391,7 +431,7 @@ services:
     hosts: sperger, rietz, steffani, penalosa, peri, albeniz, escher, goetz, mayer, mayr, paer
   -
     name: disk usage on /var
-    nrpe: "/usr/lib/nagios/plugins/check_disk 50 75 /var"
+    nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /var"
     hosts: bartok, samosa, raff, lobos, villa, gluck, saens, escher, voltaire, puccini
   -
     name: disk usage on /org
@@ -429,6 +469,10 @@ services:
     name: disk usage on /mnt/hdc
     nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /mnt/hdc"
     hosts: voltaire
+  -
+    name: disk usage on /x
+    nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /x"
+    hosts: voltaire
 
  ############ All Computers ############
  ####
@@ -450,6 +494,11 @@ services:
     name: load
     nrpe: "/usr/lib/nagios/plugins/check_load -w 12,10,8 -c 22,18,14"
     hostgroups: computers
+    excludehostgroups: highload
+  -
+    name: load
+    nrpe: "/usr/lib/nagios/plugins/check_load -w 100,80,60 -c 200,160,140"
+    hostgroups: highload
  ####
   -
     name: processes - zombies
@@ -487,6 +536,13 @@ services:
     depends: process - sshd
     normal_check_interval:  60
     notification_interval:  60
+
+  -
+    name: "network service - sshd"
+    check: check_ssh
+    hostgroups: deadslow
+    normal_check_interval:  180
+    notification_interval:  180
  ####
   -
     name: network service - nrpe
@@ -561,7 +617,7 @@ services:
  ###
   -
     name: process - exim
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u Debian-exim -C exim4 -a '/usr/sbin/exim4 -bd -q'"
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u Debian-exim -C exim4 -a '/usr/sbin/exim4 -bd -q'"
     hostgroups: computers
     excludehostgroups: postfix-hosts
     excludehosts: master, rietz, merkel
@@ -604,16 +660,19 @@ services:
   -
     name: process - spamd - master
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd --create-prefs --max-children 5 --helper-home-dir -d --pidfile=/var/run/spamd.pid'"
-    hosts: liszt
     hostgroups: heavy-exim
-    excludehosts: rietz
+    excludehosts: rietz, merkel, raff
+  -
+    name: process - spamd - master
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd --create-prefs --max-children 10 --helper-home-dir -d --pidfile=/var/run/spamd.pid'"
+    hosts: liszt
   -
     name: process - spamd - child
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -C spamd -a 'spamd child'"
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:11 -c 1: -C spamd -a 'spamd child'"
     hosts: liszt
     hostgroups: heavy-exim
     depends: process - spamd - master
-    excludehosts: rietz
+    excludehosts: rietz, merkel, raff
   #
   -
     name: process - spamd - master
@@ -629,7 +688,11 @@ services:
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C spamd"
     hostgroups: computers
     excludehostgroups: heavy-exim
-    excludehosts: listz
+    excludehosts: liszt
+  -
+    name: unwanted process - spamd
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C spamd"
+    hosts: merkel, raff
 
  ###
   -
@@ -659,12 +722,10 @@ services:
     name: process - amavis - master
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u amavis -C amavisd-new -a 'amavisd (master)'"
     hostgroups: amavis-hosts
-    excludehosts: liszt
   -
     name: process - amavis - all
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1:10 -u amavis -C amavisd-new -a 'amavisd '"
     hostgroups: amavis-hosts
-    excludehosts: liszt
     depends: process - amavis - master
   #
   -
@@ -684,8 +745,14 @@ services:
     depends: process - weightd - master
   -
     name: process - weightd - child
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:30 -c 1: -u polw -C policyd-weight -a 'policyd-weight (child)'"
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:10 -c 1: -u polw -C policyd-weight -a 'policyd-weight (child)'"
     hostgroups: heavy-postfix
+    excludehosts: liszt
+    depends: process - weightd - master
+  -
+    name: process - weightd - child
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:40 -c 1: -u polw -C policyd-weight -a 'policyd-weight (child)'"
+    hosts: liszt
     depends: process - weightd - master
   #
   -
@@ -763,19 +830,19 @@ services:
     depends: process - postfix - master
   -
     name: process - postfix - smtp
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:250 -c 0:500 -u postfix -C smtp -a 'smtp -t unix -u -c'"
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:350 -c 0:500 -u postfix -C smtp -a 'smtp -t unix -u -c'"
     hosts: liszt
     depends: process - postfix - master
 
  ###
   -
     name: network service - smtp
-    check: check_smtp
+    check: dsa_check_smtp
     hostgroups: postfix-hosts
     depends: process - postfix - master
   -
     name: network service - smtp
-    check: check_smtp
+    check: dsa_check_smtp
     hostgroups: computers
     excludehostgroups: postfix-hosts
     depends: process - exim
@@ -820,6 +887,11 @@ services:
     name: unwanted process - inetd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C inetd"
     hostgroups: computers
+  -
+    name: unwanted process - snmpd
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C snmpd"
+    hostgroups: computers
+    excludehosts: lobos, villa
 
 
  ############ Processes/Services that only run on some computers ############
@@ -909,8 +981,8 @@ services:
     hosts: samosa
   -
     name: process - nagios3
-    # it forks one instance to check, so make it -w 1:2
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:2 -c 1: -u nagios -C nagios3 -a '/usr/sbin/nagios3 -d /etc/nagios3/nagios.cfg'"
+    # there is always one extra process per check currently running..
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:30 -c 1: -u nagios -C nagios3 -a '/usr/sbin/nagios3 -d /etc/nagios3/nagios.cfg'"
     hosts: samosa
 
  ###
@@ -920,7 +992,7 @@ services:
     hostgroups: apache2-hosts
   -
     name: process - apache2 - worker
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:50 -c 1:100 -u www-data -C apache2 -a /usr/sbin/apache2"
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:50 -c 1:100 -u www-data -C apache2 -a /usr/sbin/apache2"
     hostgroups: apache2-hosts
     depends: process - apache2 - master
   -