[project @ peter@palfrader.org-20080424195804-xh6bwo5xec2w4ada]
[mirror/dsa-nagios.git] / nagios-master.cfg
index 3642c47..d7a1965 100644 (file)
@@ -8,52 +8,35 @@
 #  - raff:  identd, hpasmd
 #  - spohr: hpasmd
 #  - *: mailq
+#  - *: check munin stats collection works
+#  - *: check backups are successful
+#  - *: unwanted: network: auth, discard, daytime, time (on some), cvs-pserver, rsync (on some), ftp (on some), http (on some)
+#  - goetz: gmond/gmetad
+#  - verdi: pg upgrade, openvpn
+#  - mundy: salinfo_decode
+#  - puccini: mailgraph
+#  - lebrun: ippl
 #  -
-#  - agnesi
-#  - albeniz
-#  - bruckner
-#  - caballero
-#  - casals
-#  - crest
-#  - elara
-#  - escher
-#  - europa
-#  - goedel
-#  - goetz
-#  - kassia
-#  - kullervo
-#  - lebrun
-#  - leisner
-#  - liszt
-#  - malo
-#  - mayer
-#  - mayr
-#  - mundy
-#  - murphy
-#  - paer
-#  - penalosa
-#  - pergolesi
-#  - peri
 #  - piatti
-#  - puccini
-#  - raptopr
+#  - tartini
+
+# down:
 #  - ravel
 #  - sarti
+#  - goedel
+#  - leisner
 #  - schulz
-#  - spontini
-#  - tartini
-#  - verdi
-#  - voltaire
 
 ---
 #############################
 # hosts
 #############################
 servers:
-  gw-MAN-DA-debian:
+  gw-man-da:
     address: 82.195.75.126
     parents: gw-HP-ftc
     hostgroups: routing-infrastructure
+    contacts: joerg, bzed
   gw-HP-ftc:
     address: 192.25.206.1
     parents: samosa
@@ -94,58 +77,168 @@ servers:
     address: 195.49.152.190
     parents: gw-HP-ftc
     hostgroups: routing-infrastructure
+    contacts: bzed
+  gw-freenet:
+    address: 62.104.23.249
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
+  gw-topalis:
+    address: 195.243.109.254
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
+  gw-sanger:
+    address: 193.62.202.18
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
+    contacts: tjrc1
+  gw-cst:
+    address: 213.188.99.215
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
+  gw-lrz:
+    address: 129.187.0.150
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
+  gw-frost:
+    address: 130.81.242.195
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
+  gw-1und1:
+    address: 195.20.247.54
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
+    contacts: joerg
+  gw-blackcat:
+    address: 193.201.200.129
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
+  gw-xandros:
+    address: 142.46.212.33
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
+  gw-nmmn:
+    address: 217.114.76.81
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
+  gw-utwente:
+    address: 130.89.160.1
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
+  #gw-ughent:
+  #  address: 157.193.39.254
+  #  parents: gw-HP-ftc
+  #  hostgroups: routing-infrastructure
+  gw-agnesi:
+    address: 65.173.90.18
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
+  gw-ubc:
+    address: 137.82.84.41
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
+  gw-carnet:
+    address: 161.53.160.1
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
 
   samosa:
     address: 192.25.206.57
-    hostgroups: computers, service, dl380, apache2-hosts, bind9-hosts, heavy-email
+    hostgroups: computers, service, dl380, apache2-hosts, bind9-hosts, heavy-exim
   raff:
     address: 192.25.206.59
     parents: samosa
-    hostgroups: computers, service, dl380, apache1-hosts, bind9-hosts, rsyncd-hosts, heavy-email
+    hostgroups: computers, service, dl380, apache1-hosts, bind9-hosts, rsyncd-hosts, heavy-exim
   gluck:
     address: 192.25.206.10
     parents: samosa
-    hostgroups: computers, general, dl380, apache1-hosts, bind9-hosts, rsyncd-hosts, heavy-email
+    hostgroups: computers, general, dl380, apache1-hosts, bind9-hosts, rsyncd-hosts, heavy-exim, highload
   merkel:
     address: 192.25.206.16
     parents: samosa
-    hostgroups: computers, general, apache1-hosts, bind9-hosts, rsyncd-hosts, sw-raid, postgres81-hosts, heavy-email
+    hostgroups: computers, general, apache1-hosts, bind9-hosts, rsyncd-hosts, sw-raid, postgres81-hosts, heavy-exim
   spohr:
     address: 192.25.206.33
     parents: samosa
     hostgroups: computers, service, dl380, apache2-hosts, postgres81-hosts
+  peri:
+    address: 192.25.206.15
+    parents: samosa
+    hostgroups: computers, buildd, sw-raid, single-cpu
+    contacts: dannf
+  penalosa:
+    address: 192.25.206.68
+    parents: samosa
+    hostgroups: computers, buildd, sw-raid, single-cpu
+    contacts: dannf
+  mundy:
+    address: 192.25.206.62
+    parents: samosa
+    hostgroups: computers, buildd
+  paer:
+    address: 192.25.206.11
+    parents: samosa
+    hostgroups: computers, porterbox, bind9-hosts
+  merulo:
+    address: 192.25.206.58
+    parents: samosa
+    hostgroups: computers, porterbox
 
   bartok:
     address: 82.195.75.91
-    parents: gw-MAN-DA-debian
-    hostgroups: computers, service, syslog-ng-hosts, postfix-hosts, dl385, heavy-email
+    parents: gw-man-da
+    hostgroups: computers, service, syslog-ng-hosts, postfix-hosts, dl385
+    contacts: joerg, bzed
   sperger:
     address: 82.195.75.98
-    parents: gw-MAN-DA-debian
+    parents: gw-man-da
     hostgroups: computers, porterbox, sw-raid
+    contacts: bzed
   agricola:
     address: 82.195.75.86
-    parents: gw-MAN-DA-debian
-    hostgroups: computers, porterbox, sw-raid
+    parents: gw-man-da
+    hostgroups: computers, porterbox, sw-raid, single-cpu
+    contacts: bzed
   arcadelt:
     address: 82.195.75.87
-    parents: gw-MAN-DA-debian
-    hostgroups: computers, buildd, sw-raid
+    parents: gw-man-da
+    hostgroups: computers, buildd, sw-raid, single-cpu
+    contacts: bzed
+  liszt:
+    address: 82.195.75.100
+    parents: gw-man-da
+    hostgroups: computers, service, apache2-hosts, bind9-hosts, postfix-hosts, heavy-postfix, dl385
+    contacts: bzed
 
   master:
     address: 70.103.162.29
     parents: gw-brainfood
-    hostgroups: computers, general, apache2-hosts, bind9-hosts, heavy-email
+    hostgroups: computers, general, apache2-hosts, bind9-hosts, heavy-exim, highload
+  murphy:
+    address: 70.103.162.31
+    parents: gw-brainfood
+    hostgroups: computers, general, apache2-hosts, bind9-hosts, postfix-hosts
 
   ries:
     address: 128.148.34.103
-    parents: gw-osuosl
-    hostgroups: computers, service, apache2-hosts, bind9-hosts, ftpd-hosts, dl385, rsyncd-hosts, postgres81-hosts, heavy-email
+    parents: gw-brown.edu
+    hostgroups: computers, service, apache2-hosts, bind9-hosts, ftpd-hosts, dl385, rsyncd-hosts, postgres81-hosts, heavy-exim
 
+  mayer:
+    address: 140.211.166.78
+    parents: gw-osuosl
+    hostgroups: computers, buildd
+  mayr:
+    address: 140.211.166.58
+    parents: gw-osuosl
+    hostgroups: computers, buildd
+  malo:
+    address: 140.211.166.27
+    parents: gw-osuosl
+    hostgroups: computers, buildd, tftpd-hosts
   rietz:
     address: 140.211.166.43
-    parents: gw-brown.edu
-    hostgroups: computers, service, apache2-hosts, bind9-hosts, rsyncd-hosts, dl385, heavy-email
+    parents: gw-osuosl
+    hostgroups: computers, service, apache2-hosts, bind9-hosts, rsyncd-hosts, dl385, heavy-exim, highload
   rietz2:
     address: 140.211.166.44
     parents: rietz
@@ -164,22 +257,125 @@ servers:
     address: 128.31.0.36
     parents: gw-mit-csail
     hostgroups: computers, service, apache2-hosts, ftpd-hosts, rsyncd-hosts, sw-raid
+  casals:
+    address: 128.31.0.16
+    parents: gw-mit-csail
+    hostgroups: deadslow
 
   klecker:
     address: 194.109.137.218
     parents: gw-xs4all
-    hostgroups: computers, service, apache2-hosts, ftpd-hosts, rsyncd-hosts, bind9-hosts, dl385, postgres81-hosts, heavy-email
+    hostgroups: computers, service, apache2-hosts, ftpd-hosts, rsyncd-hosts, bind9-hosts, dl385, postgres81-hosts, heavy-exim
 
   saens:
     address: 128.101.240.212
     parents: gw-umn.edu
-    hostgroups: computers, service, apache2-hosts, ftpd-hosts, rsyncd-hosts, dl380, heavy-email
+    hostgroups: computers, service, apache2-hosts, ftpd-hosts, rsyncd-hosts, dl380, heavy-exim
 
   argento:
     address: 195.49.152.174
     parents: gw-dg-i.net
+    hostgroups: computers, buildd, sw-raid, single-cpu
+    contacts: bzed
+
+  pergolesi:
+    address: 62.104.23.252
+    parents: gw-freenet
+    hostgroups: computers, porterbox, sw-raid
+  bruckner:
+    address: 62.104.23.253
+    parents: gw-freenet
+    hostgroups: computers, porterbox, single-cpu
+
+  raptor:
+    address: 195.243.109.162
+    parents: gw-topalis
+    hostgroups: computers, porterbox
+
+  albeniz:
+    address: 193.62.202.27
+    parents: gw-sanger
+    hostgroups: computers, porterbox, sw-raid
+    contacts: tjrc1
+  goetz:
+    address: 193.62.202.26
+    parents: gw-sanger
+    hostgroups: computers, buildd, sw-raid
+    contacts: tjrc1
+
+  escher:
+    address: 213.188.99.215
+    parents: gw-cst
+    hostgroups: computers, porterbox, single-cpu
+
+  verdi:
+    address: 192.54.42.193
+    parents: gw-lrz
+    hostgroups: computers, service, apache2-hosts, ftpd-hosts, rsyncd-hosts, postgres81-hosts, postfix-hosts
+
+  voltaire:
+    address: 72.66.115.54
+    parents: gw-frost
+    hostgroups: computers, buildd
+    contacts: sfrost
+
+  puccini:
+    address: 87.106.4.56
+    parents: gw-1und1
+    hostgroups: computers, service, apache2-hosts, bind9-hosts, postfix-hosts, heavy-postfix, amavis-hosts
+    contacts: joerg
+
+  caballero:
+    address: 193.201.200.200
+    parents: gw-blackcat
     hostgroups: computers, buildd, sw-raid
 
+  elara:
+    address: 142.46.212.46
+    parents: gw-xandros
+    hostgroups: deadslow
+  europa:
+    address: 142.46.212.46
+    parents: gw-xandros
+    hostgroups: deadslow
+
+  kullervo:
+    address: 217.114.76.82
+    parents: gw-nmmn
+    hostgroups: deadslow
+    contacts: luk
+  crest:
+    address: 217.114.76.83
+    parents: gw-nmmn
+    hostgroups: deadslow
+    contacts: luk
+
+  kassia:
+    address: 130.89.175.54
+    parents: gw-utwente
+    hostgroups: computers, service, postfix-hosts, apache2-hosts, ftpd-hosts, rsyncd-hosts, dl360
+
+  allegri:
+    address: 157.193.39.233
+    parents: gw-HP-ftc
+    hostgroups: computers, buildd, postfix-hosts, sw-raid, single-cpu
+    contacts: luk
+
+  agnesi:
+    address: 65.173.90.83
+    parents: gw-agnesi
+    hostgroups: deadslow
+
+  spontini:
+    address: 137.82.84.42
+    parents: gw-ubc
+    hostgroups: computers, buildd
+
+  lebrun:
+    address: 161.53.160.165
+    parents: gw-carnet
+    hostgroups: computers, buildd
+
 #############################
 # host groups
 #
@@ -194,6 +390,8 @@ hostgroups:
     alias: Internet routers and friends
     extinfo-icon_image: base/switch40.png
     extinfo-icon_image_alt: router
+  deadslow:
+    alias: Systems too slow to run any real checks
 
   porterbox:
     alias: developer accessible porter machines
@@ -222,9 +420,15 @@ hostgroups:
   dl385:
     alias: HP DL385 hosts
     private: 1
+  dl360:
+    alias: HP DL360 hosts
+    private: 1
   sw-raid:
     alias: Hosts with Linux software raid
     private: 1
+  single-cpu:
+    alias: Hosts with only one CPU
+    private: 1
 
   syslog-ng-hosts:
     alias: hosts running syslog-ng instead of sysklogd
@@ -232,9 +436,12 @@ hostgroups:
   postfix-hosts:
     alias: hosts running postfix instead of exim
     private: 1
-  heavy-email:
+  heavy-exim:
     alias: "hosts running the full mail stuff, including clamav, SA, and greylistd"
     private: 1
+  heavy-postfix:
+    alias: "postfix hosts running the full mail stuff, including clamav, SA, postgrey, amavis, policyd-weight"
+    private: 1
   apache2-hosts:
     alias: hosts running apache2
     private: 1
@@ -244,9 +451,15 @@ hostgroups:
   bind9-hosts:
     alias: hosts running bind9
     private: 1
+  amavis-hosts:
+    alias: hosts running amavis
+    private: 1
   ftpd-hosts:
     alias: hosts running vsftpd
     private: 1
+  tftpd-hosts:
+    alias: hosts running a tftpd (tftpd-hpa as a daemon)
+    private: 1
   rsyncd-hosts:
     alias: hosts providing rsync services via xinetd
     private: 1
@@ -254,10 +467,26 @@ hostgroups:
     alias: hosts running postgres81
     private: 1
 
+  highload:
+    alias: "hosts on which high load is normal"
+    private: 1
+
   secondary-IPs:
     alias: secondary IP addresses
     private: 1
 
+
+#############################
+# servicegroups
+#############################
+servicegroups:
+  diskspace:
+    alias: diskusage checks
+  buildd:
+    alias: buildd checks
+  raid:
+    alias: raid checks
+
 #############################
 # services
 #############################
@@ -266,6 +495,17 @@ services:
     name: PING
     check: "check_ping!300.0,20%!600.0,40%"
     hostgroups: all
+    excludehostgroups: routing-infrastructure
+    normal_check_interval: 5
+    max_check_attempts: 4
+    retry_check_interval: 1
+  -
+    name: PING
+    check: "check_ping!2000.0,60%!3000.0,80%"
+    hostgroups: routing-infrastructure
+    normal_check_interval: 5
+    max_check_attempts: 4
+    retry_check_interval: 1
 
  ############ Services ############
  ###
@@ -274,48 +514,84 @@ services:
  ####
   -
     name: disk usage - all
+    servicegroups: diskspace
     nrpe: "/usr/lib/nagios/plugins/check_disk 90 95"
     hostgroups: computers
   -
     name: disk usage on /
-    nrpe: "/usr/lib/nagios/plugins/check_disk 60 80 /"
+    servicegroups: diskspace
+    nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /"
     hostgroups: computers
   -
     name: disk usage on /boot
-    nrpe: "/usr/lib/nagios/plugins/check_disk 60 80 /boot"
-    hosts: sperger, rietz, steffani
+    servicegroups: diskspace
+    nrpe: "/usr/lib/nagios/plugins/check_disk 75 85 /boot"
+    hosts: sperger, rietz, steffani, penalosa, peri, albeniz, escher, goetz, mayer, mayr, paer, spontini
   -
     name: disk usage on /var
-    nrpe: "/usr/lib/nagios/plugins/check_disk 50 75 /var"
-    hosts: bartok, samosa, raff, lobos, villa, gluck, saens
+    servicegroups: diskspace
+    nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /var"
+    hosts: bartok, samosa, raff, lobos, villa, gluck, saens, escher, voltaire, puccini, lebrun
   -
     name: disk usage on /org
+    servicegroups: diskspace
     nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /org"
-    hosts: bartok, sperger, samosa, raff, lobos, villa, steffani, merkel, saens
+    hosts: bartok, sperger, samosa, raff, lobos, villa, steffani, saens, pergolesi, verdi, puccini, spontini
+  -
+    name: disk usage on /org
+    servicegroups: diskspace
+    nrpe: "/usr/lib/nagios/plugins/check_disk 90 95 /org"
+    hosts: merkel
   -
     name: disk usage on /srv
+    servicegroups: diskspace
     nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /srv"
-    hosts: agricola, arcadelt, argento
-  -
-    name: disk usage on /org/scratch2
-    nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /org/scratch2"
-    hosts: merkel
+    hosts: agricola, arcadelt, argento, allegri
   -
-    name: disk usage on /oldorg
-    nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /oldorg"
+    name: disk usage on /org/scratch
+    servicegroups: diskspace
+    nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /org/scratch"
     hosts: merkel
   -
     name: disk usage on /tmp
+    servicegroups: diskspace
     nrpe: "/usr/lib/nagios/plugins/check_disk 60 80 /tmp"
-    hosts: samosa, raff, gluck, saens
+    hosts: samosa, raff, gluck, saens, escher, puccini, merkel
   -
     name: disk usage on /usr
+    servicegroups: diskspace
     nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /usr"
-    hosts: samosa, raff, lobos, villa, gluck, saens
+    hosts: samosa, raff, lobos, villa, gluck, saens, pergolesi, puccini, merulo
   -
     name: disk usage on /home
+    servicegroups: diskspace
     nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /home"
+    hosts: raptor, escher, voltaire, lebrun
+  -
+    name: disk usage on /home
+    servicegroups: diskspace
+    nrpe: "/usr/lib/nagios/plugins/check_disk 90 95 /home"
     hosts: gluck
+  -
+    name: disk usage on /chroot
+    servicegroups: diskspace
+    nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /chroot"
+    hosts: raptor
+  -
+    name: disk usage on /mnt/hdc
+    servicegroups: diskspace
+    nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /mnt/hdc"
+    hosts: voltaire
+  -
+    name: disk usage on /mnt/sdb1
+    servicegroups: diskspace
+    nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /mnt/sdb1"
+    hosts: spontini
+  -
+    name: disk usage on /x
+    servicegroups: diskspace
+    nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /x"
+    hosts: caballero
 
  ############ All Computers ############
  ####
@@ -327,6 +603,15 @@ services:
   #  notification_interval: 480
   #  max_check_attempts: 4
   #  retry_check_interval: 12
+ ####
+  -
+    name: backup
+    nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-dabackup"
+    hostgroups: computers
+    normal_check_interval: 180
+    max_check_attempts: 2
+    retry_check_interval: 5
+
  ####
   -
     name: users
@@ -335,8 +620,18 @@ services:
  ####
   -
     name: load
-    nrpe: "/usr/lib/nagios/plugins/check_load -w 12,10,8 -c 22,18,14"
+    nrpe: "/usr/lib/nagios/plugins/check_load -w 30,28,26 -c 50,45,50"
     hostgroups: computers
+    excludehostgroups: highload
+  -
+    name: load
+    nrpe: "/usr/lib/nagios/plugins/check_load -w 140,120,100 -c 240,220,200"
+    hostgroups: highload
+    excludehosts: rietz
+  -
+    name: load
+    nrpe: "/usr/lib/nagios/plugins/check_load -w 200,200,200 -c 350,350,350"
+    hosts: rietz
  ####
   -
     name: processes - zombies
@@ -369,11 +664,22 @@ services:
     hostgroups: computers
   -
     name: "network service - sshd"
-    check: check_ssh
+    check: dsa_check_ssh
     hostgroups: computers
     depends: process - sshd
     normal_check_interval:  60
-    notification_interval:  60
+
+  -
+    name: "network service - sshd"
+    check: dsa_check_ssh
+    hostgroups: deadslow
+    excludehosts: agnesi
+    normal_check_interval:  180
+  -
+    name: "network service - sshd - 2260"
+    check: dsa_check_ssh_port!2260
+    hosts: agnesi
+    normal_check_interval:  180
  ####
   -
     name: network service - nrpe
@@ -402,11 +708,20 @@ services:
     # etch: nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u ntp -C ntpd -a '/usr/sbin/ntpd -p /var/run/ntpd.pid'"
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -C ntpd -a '/usr/sbin/ntpd -p /var/run/ntpd.pid'"
     hostgroups: computers
+    excludehosts: raptor
   -
     name: network service - ntp
     check: check_ntp
     hostgroups: computers
     depends: process - ntpd
+    excludehosts: raptor, allegri
+  #
+  -
+    name: network service - time
+    check: dsa_check_time
+    hosts: raptor, allegri
+    depends: process - xinetd
+
  ###
   -
     name: process - atd
@@ -415,7 +730,7 @@ services:
  ###
   -
     name: process - cron
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C cron -a /usr/sbin/cron"
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:2 -c 1: -u root -C cron -a /usr/sbin/cron"
     hostgroups: computers
 
  ###
@@ -439,7 +754,7 @@ services:
  ###
   -
     name: process - exim
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u Debian-exim -C exim4 -a '/usr/sbin/exim4 -bd -q'"
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u Debian-exim -C exim4 -a '/usr/sbin/exim4 -bd -q'"
     hostgroups: computers
     excludehostgroups: postfix-hosts
     excludehosts: master, rietz, merkel
@@ -457,40 +772,44 @@ services:
   -
     name: process - clamav - clamd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:2 -c 1: -u clamav -C clamd -a '/usr/sbin/clamd'"
-    hostgroups: heavy-email
+    hostgroups: heavy-exim, heavy-postfix
   -
     name: service - clamav
     nrpe: "/usr/lib/nagios/plugins/check_clamd -H /var/run/clamav/clamd.ctl"
-    hostgroups: heavy-email
+    hostgroups: heavy-exim, heavy-postfix
     depends: process - clamav - clamd
   -
     name: process - clamav - freshclam
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u clamav -C freshclam -a '/usr/bin/freshclam -d --quiet'"
-    hostgroups: heavy-email
+    hostgroups: heavy-exim, heavy-postfix
   #
   -
     name: unwanted process - clamav
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C clamd"
     hostgroups: computers
-    excludehostgroups: heavy-email
+    excludehostgroups: heavy-exim, heavy-postfix
   -
     name: unwanted process - freshclam
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C freshclam"
     hostgroups: computers
-    excludehostgroups: heavy-email
+    excludehostgroups: heavy-exim, heavy-postfix
  ###
   -
     name: process - spamd - master
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd --create-prefs --max-children 5 --helper-home-dir -d --pidfile=/var/run/spamd.pid'"
-    #hosts: samosa
-    hostgroups: heavy-email
-    excludehosts: rietz
+    hostgroups: heavy-exim
+    excludehosts: rietz, merkel, raff
+  -
+    name: process - spamd - master
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd --create-prefs --max-children 10 --helper-home-dir -d --pidfile=/var/run/spamd.pid'"
+    hosts: liszt
   -
     name: process - spamd - child
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -C spamd -a 'spamd child'"
-    hostgroups: heavy-email
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:11 -c 1: -C spamd -a 'spamd child'"
+    hosts: liszt
+    hostgroups: heavy-exim
     depends: process - spamd - master
-    excludehosts: rietz
+    excludehosts: rietz, merkel, raff
   #
   -
     name: process - spamd - master
@@ -505,19 +824,74 @@ services:
     name: unwanted process - spamd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C spamd"
     hostgroups: computers
-    excludehostgroups: heavy-email
+    excludehostgroups: heavy-exim
+    excludehosts: liszt
+  -
+    name: unwanted process - spamd
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C spamd"
+    hosts: merkel, raff
 
  ###
   -
     name: process - greylistd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u greylist -C greylistd -a '/usr/bin/python /usr/sbin/greylistd'"
-    hostgroups: heavy-email
+    hostgroups: heavy-exim
   #
   -
     name: unwanted process - greylistd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C greylistd"
     hostgroups: computers
-    excludehostgroups: heavy-email
+    excludehostgroups: heavy-exim
+
+ ###
+  -
+    name: process - postgrey
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgrey -C postgrey -a '/usr/sbin/postgrey --pidfile=/var/run/postgrey.pid --daemonize --inet=127.0.0.1:60000'"
+    hostgroups: heavy-postfix
+  #
+  -
+    name: unwanted process - postgrey
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C postgrey"
+    hostgroups: computers
+    excludehostgroups: heavy-postfix
+ ###
+  -
+    name: process - amavis - master
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u amavis -C amavisd-new -a 'amavisd (master)'"
+    hostgroups: amavis-hosts
+  -
+    name: process - amavis - all
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1:10 -u amavis -C amavisd-new -a 'amavisd '"
+    hostgroups: amavis-hosts
+    depends: process - amavis - master
+  #
+  -
+    name: unwanted process - amavis
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C amavisd-new"
+    hostgroups: computers
+    excludehostgroups: amavis-hosts
+ ###
+  -
+    name: process - weightd - master
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -C policyd-weight -a 'policyd-weight (master)'"
+    hostgroups: heavy-postfix
+  -
+    name: process - weightd - cache
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -C policyd-weight -a 'policyd-weight (cache)'"
+    hostgroups: heavy-postfix
+    depends: process - weightd - master
+  -
+    name: process - weightd - child
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:50 -c 1: -u polw -C policyd-weight -a 'policyd-weight (child)'"
+    hostgroups: heavy-postfix
+    depends: process - weightd - master
+  #
+  -
+    name: unwanted process - policyd-weight
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C policyd-weight"
+    hostgroups: computers
+    excludehostgroups: heavy-postfix
+
 
 
  ###
@@ -542,7 +916,7 @@ services:
     depends: process - postfix - master
   -
     name: process - postfix - anvil
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postfix -C anvil -a 'anvil -l -t unix -u'"
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:1 -c 0: -u postfix -C anvil -a 'anvil -l -t unix -u'"
     hostgroups: postfix-hosts
     depends: process - postfix - master
 
@@ -556,11 +930,6 @@ services:
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:10 -c 0:15 -u postfix -C proxymap -a 'proxymap -t unix -u'"
     hostgroups: postfix-hosts
     depends: process - postfix - master
-  -
-    name: process - postfix - smtpd
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:10 -c 0:15 -u postfix -C smtpd -a 'smtpd -n smtp -t inet -u -c'"
-    hostgroups: postfix-hosts
-    depends: process - postfix - master
   -
     name: process - postfix - cleanup
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:30 -c 0:50 -u postfix -C cleanup -a 'cleanup -z -t unix -u -c'"
@@ -572,21 +941,67 @@ services:
     hostgroups: postfix-hosts
     depends: process - postfix - master
 
-
- ###
   -
-    name: network service - smtp
-    check: check_smtp
+    name: process - postfix - smtpd
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:40 -c 0:90 -u postfix -C smtpd -a 'smtpd -n smtp -t inet -u -c'"
+    hostgroups: postfix-hosts
+    excludehosts: liszt
+    depends: process - postfix - master
+  -
+    name: process - postfix - smtp
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:10 -c 0:15 -u postfix -C smtp -a 'smtp -t unix -u -c'"
     hostgroups: postfix-hosts
+    excludehosts: liszt
     depends: process - postfix - master
+
+  -
+    name: process - postfix - smtpd
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:150 -c 0:200 -u postfix -C smtpd -a 'smtpd -n smtp -t inet -u -c'"
+    hosts: liszt
+    depends: process - postfix - master
+  -
+    name: process - postfix - smtp
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:350 -c 0:500 -u postfix -C smtp -a 'smtp -t unix -u -c'"
+    hosts: liszt
+    depends: process - postfix - master
+
+ ###
   -
     name: network service - smtp
-    check: check_smtp
+    check: dsa_check_smtp
     hostgroups: computers
     excludehostgroups: postfix-hosts
     depends: process - exim
 
+  -
+    name: network service - smtp
+    check: dsa_check_smtp
+    hostgroups: postfix-hosts
+    excludehosts: verdi, kassia, allegri
+    depends: process - postfix - master
+  -
+    name: network service - smtp - port 2025
+    check: dsa_check_smtp_port!2025
+    hosts: verdi, kassia, murphy, allegri
+    depends: process - postfix - master
 
+  -
+    name: network service local - smtps cert
+    nrpe: "/usr/lib/nagios/plugins/check_http -H localhost -p 465 -S -C 14 -t 45"
+    hostgroups: postfix-hosts
+    depends: process - postfix - master
+    normal_check_interval: 120
+
+
+  -
+    name: setup - debian-admin in etc aliases
+    nrpe: "/usr/lib/nagios/plugins/dsa-check-da-in-aliases"
+    hostgroups: computers
+    normal_check_interval: 120
+  -
+    name: setup - ud-ldap freshness
+    nrpe: "/usr/lib/nagios/plugins/dsa-check-udldap-freshness"
+    hostgroups: computers
  ###
   -
     name: process - uptimed
@@ -597,8 +1012,43 @@ services:
     name: process - irqbalance
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C irqbalance -a '/usr/sbin/irqbalance'"
     hostgroups: computers
-    excludehosts: arcadelt, agricola, argento
+    excludehostgroups: single-cpu
+  -
+    name: unwanted process - named
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C irqbalance"
+    hostgroups: single-cpu
 
+ ####
+ ###
+  #-
+  #  name: unwanted process - system-tools-backends
+  #  nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C system-tools-ba"
+  #  hostgroups: computers
+  #-
+  #  name: unwanted process - dbus-daemon
+  #  nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C dbus-daemon"
+  #  hostgroups: computers
+  -
+    name: unwanted process - gkrellmd
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C gkrellmd"
+    hostgroups: computers
+  -
+    name: unwanted process - portmap
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C portmap"
+    hostgroups: computers
+  -
+    name: unwanted process - rpc.statd
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C rpc.statd"
+    hostgroups: computers
+  -
+    name: unwanted process - inetd
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C inetd"
+    hostgroups: computers
+  -
+    name: unwanted process - snmpd
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C snmpd"
+    hostgroups: computers
+    excludehosts: lobos, villa
 
 
  ############ Processes/Services that only run on some computers ############
@@ -617,22 +1067,38 @@ services:
  ###
   -
     name: process - mdadm monitor
+    servicegroups: raid
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C mdadm -a '/sbin/mdadm --monitor --pid-file /var/run/mdadm/monitor.pid --daemonise --scan'"
     hostgroups: sw-raid
   -
     name: RAID - sw raid
+    servicegroups: raid
     nrpe: "/usr/lib/nagios/plugins/dsa-check-raid-sw"
     hostgroups: sw-raid
 
  ###
   -
     name: process - cpqarrayd
+    servicegroups: raid
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C cpqarrayd -a '/usr/sbin/cpqarrayd'"
-    hostgroups: dl385, dl380
+    hostgroups: dl385, dl380, dl360
   -
     name: RAID - arrayprobe
+    servicegroups: raid
     nrpe: "sudo /usr/bin/arrayprobe"
-    hostgroups: dl385, dl380
+    hostgroups: dl385, dl380, dl360
+ ###
+  -
+    name: RAID - DAC960
+    servicegroups: raid
+    nrpe: "/usr/lib/nagios/plugins/dsa-check-raid-dac960"
+    hosts: verdi
+ ###
+  -
+    name: RAID - 3ware
+    servicegroups: raid
+    nrpe: "/usr/lib/nagios/plugins/dsa-check-raid-3ware"
+    hosts: puccini
 
  ###
   -
@@ -643,19 +1109,25 @@ services:
   -
     name: process - udevd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C udevd -a 'udevd'"
-    hosts: sperger, ries, steffani, merkel, spohr
+    hosts: sperger, ries, steffani, merkel, spohr, peri, penalosa, albeniz, escher, verdi, liszt, kassia, agricola, arcadelt, argento, allegri
  ###
   -
     name: process - acpid
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C acpid -a '/usr/sbin/acpid -c /etc/acpi/events -s /var/run/acpid.socket'"
-    hosts: ries, steffani
+    hosts: ries, steffani, liszt, kassia
 
  ###
   -
     name: process - xinetd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C xinetd -a '/usr/sbin/xinetd -pidfile /var/run/xinetd.pid -stayalive'"
-    hosts: samosa
+    hosts: samosa, raptor, allegri
     hostgroups: rsyncd-hosts
+  -
+    name: unwanted process - xinetd
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C xinetd"
+    hostgroups: computers
+    excludehosts: samosa, raptor, allegri
+    excludehostgroups: rsyncd-hosts
  ###
   -
     name: network service - finger
@@ -676,14 +1148,10 @@ services:
     depends: rietz:process - xinetd
 
  ###
-  -
-    name: process - nagios1
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u nagios -C nagios -a '/usr/sbin/nagios -d /etc/nagios/nagios.cfg'"
-    hosts: samosa
   -
     name: process - nagios3
-    # it forks one instance to check, so make it -w 1:2
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:2 -c 1: -u nagios -C nagios3 -a '/usr/sbin/nagios3 -d /etc/nagios3/nagios.cfg'"
+    # there is always one extra process per check currently running..
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:30 -c 1: -u nagios -C nagios3 -a '/usr/sbin/nagios3 -d /etc/nagios3/nagios.cfg'"
     hosts: samosa
 
  ###
@@ -693,7 +1161,7 @@ services:
     hostgroups: apache2-hosts
   -
     name: process - apache2 - worker
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:50 -c 1:100 -u www-data -C apache2 -a /usr/sbin/apache2"
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:50 -c 1:100 -u www-data -C apache2 -a /usr/sbin/apache2"
     hostgroups: apache2-hosts
     depends: process - apache2 - master
   -
@@ -731,14 +1199,12 @@ services:
     hosts: samosa
     depends: "process - apache2 - master"
     normal_check_interval: 120
-    notification_interval: 120
   -
     name: network service - https cert
     check: dsa_check_cert!443
     hosts: samosa
     depends: network service - https
     normal_check_interval: 240
-    notification_interval: 240
  ####
   -
     name: process - named
@@ -749,6 +1215,11 @@ services:
     check: check_dns
     hostgroups: bind9-hosts
     depends: process - named
+  -
+    name: unwanted process - named
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C named"
+    hostgroups: computers
+    excludehostgroups: bind9-hosts
 
  ####
   -
@@ -757,7 +1228,7 @@ services:
     hostgroups: ftpd-hosts
   -
     name: process - vsftp - instance
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:10 -c 0:30 -u ftp -C vsftpd -a 'vsftpd: '"
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:80 -c 0:100 -u ftp -C vsftpd -a 'vsftpd: '"
     hostgroups: ftpd-hosts
   -
     name: network service - ftp
@@ -802,5 +1273,28 @@ services:
  ###
   -
     name: process - buildd
+    servicegroups: buildd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u buildd -C buildd '/usr/bin/perl /usr/bin/buildd'"
     hostgroups: buildd
+
+ ###
+  -
+    name: process - iscsid
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:2 -c 1: -u root -C iscsid '/usr/sbin/iscsid'"
+    hosts: raptor
+
+ ###
+  -
+    name: process - tftpd
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C in.tftpd -a '/usr/sbin/in.tftpd -l -B 1450 -s /var/lib/tftpboot'"
+    hostgroups: tftpd-hosts
+ ###
+  -
+    name: process - dhcpd
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C dhcpd3 -a '/usr/sbin/dhcpd3 -q eth0'"
+    hosts: paer
+ ###
+  -
+    name: process - monit
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C monit -a '/usr/sbin/monit -d 30 -c /etc/monit/monitrc -s /var/lib/monit/monit.state'"
+    hosts: villa, lobos, steffani, kassia, master