[project @ peter@palfrader.org-20080421154509-fl846h0d4az2vr6t]
[mirror/dsa-nagios.git] / nagios-master.cfg
index 3f5d97e..fd0718e 100644 (file)
 #  - verdi: pg upgrade, openvpn
 #  - mundy: salinfo_decode
 #  - puccini: mailgraph
+#  - lebrun: ippl
 #  -
-#  - agnesi
-#  - kassia
-#  - lebrun
-#  - murphy
 #  - piatti
 #  - tartini
-#sarge:
-#  - spontini
 
 # down:
 #  - ravel
@@ -106,7 +101,7 @@ servers:
     parents: gw-HP-ftc
     hostgroups: routing-infrastructure
   gw-1und1:
-    address: 195.20.247.53
+    address: 195.20.247.54
     parents: gw-HP-ftc
     hostgroups: routing-infrastructure
   gw-blackcat:
@@ -125,6 +120,22 @@ servers:
     address: 130.89.160.1
     parents: gw-HP-ftc
     hostgroups: routing-infrastructure
+  #gw-ughent:
+  #  address: 157.193.39.254
+  #  parents: gw-HP-ftc
+  #  hostgroups: routing-infrastructure
+  gw-agnesi:
+    address: 65.173.90.18
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
+  gw-ubc:
+    address: 137.82.84.41
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
+  gw-carnet:
+    address: 161.53.160.1
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
 
   samosa:
     address: 192.25.206.57
@@ -148,11 +159,11 @@ servers:
   peri:
     address: 192.25.206.15
     parents: samosa
-    hostgroups: computers, buildd, sw-raid
+    hostgroups: computers, buildd, sw-raid, single-cpu
   penalosa:
     address: 192.25.206.68
     parents: samosa
-    hostgroups: computers, buildd, sw-raid
+    hostgroups: computers, buildd, sw-raid, single-cpu
   mundy:
     address: 192.25.206.62
     parents: samosa
@@ -161,6 +172,10 @@ servers:
     address: 192.25.206.11
     parents: samosa
     hostgroups: computers, porterbox, bind9-hosts
+  merulo:
+    address: 192.25.206.58
+    parents: samosa
+    hostgroups: computers, porterbox
 
   bartok:
     address: 82.195.75.91
@@ -173,11 +188,11 @@ servers:
   agricola:
     address: 82.195.75.86
     parents: gw-man-da
-    hostgroups: computers, porterbox, sw-raid
+    hostgroups: computers, porterbox, sw-raid, single-cpu
   arcadelt:
     address: 82.195.75.87
     parents: gw-man-da
-    hostgroups: computers, buildd, sw-raid
+    hostgroups: computers, buildd, sw-raid, single-cpu
   liszt:
     address: 82.195.75.100
     parents: gw-man-da
@@ -186,7 +201,11 @@ servers:
   master:
     address: 70.103.162.29
     parents: gw-brainfood
-    hostgroups: computers, general, apache2-hosts, bind9-hosts, heavy-exim
+    hostgroups: computers, general, apache2-hosts, bind9-hosts, heavy-exim, highload
+  murphy:
+    address: 70.103.162.31
+    parents: gw-brainfood
+    hostgroups: computers, general, apache2-hosts, bind9-hosts, postfix-hosts
 
   ries:
     address: 128.148.34.103
@@ -245,7 +264,7 @@ servers:
   argento:
     address: 195.49.152.174
     parents: gw-dg-i.net
-    hostgroups: computers, buildd, sw-raid
+    hostgroups: computers, buildd, sw-raid, single-cpu
 
   pergolesi:
     address: 62.104.23.252
@@ -254,7 +273,7 @@ servers:
   bruckner:
     address: 62.104.23.253
     parents: gw-freenet
-    hostgroups: computers, porterbox
+    hostgroups: computers, porterbox, single-cpu
 
   raptor:
     address: 195.243.109.162
@@ -273,7 +292,7 @@ servers:
   escher:
     address: 213.188.99.215
     parents: gw-cst
-    hostgroups: computers, porterbox
+    hostgroups: computers, porterbox, single-cpu
 
   verdi:
     address: 192.54.42.193
@@ -289,6 +308,7 @@ servers:
     address: 87.106.4.56
     parents: gw-1und1
     hostgroups: computers, service, apache2-hosts, bind9-hosts, postfix-hosts, heavy-postfix, amavis-hosts
+    contacts: joerg
 
   caballero:
     address: 193.201.200.200
@@ -316,7 +336,27 @@ servers:
   kassia:
     address: 130.89.175.54
     parents: gw-utwente
-    hostgroups: computers, service, apache2-hosts, ftpd-hosts, dl360
+    hostgroups: computers, service, postfix-hosts, apache2-hosts, ftpd-hosts, rsyncd-hosts, dl360
+
+  allegri:
+    address: 157.193.39.233
+    parents: gw-HP-ftc
+    hostgroups: computers, buildd, postfix-hosts, sw-raid, single-cpu
+
+  agnesi:
+    address: 65.173.90.83
+    parents: gw-agnesi
+    hostgroups: deadslow
+
+  spontini:
+    address: 137.82.84.42
+    parents: gw-ubc
+    hostgroups: computers, buildd
+
+  lebrun:
+    address: 161.53.160.165
+    parents: gw-carnet
+    hostgroups: computers, buildd
 
 #############################
 # host groups
@@ -368,6 +408,9 @@ hostgroups:
   sw-raid:
     alias: Hosts with Linux software raid
     private: 1
+  single-cpu:
+    alias: Hosts with only one CPU
+    private: 1
 
   syslog-ng-hosts:
     alias: hosts running syslog-ng instead of sysklogd
@@ -422,6 +465,17 @@ services:
     name: PING
     check: "check_ping!300.0,20%!600.0,40%"
     hostgroups: all
+    excludehostgroups: routing-infrastructure
+    normal_check_interval: 5
+    max_check_attempts: 4
+    retry_check_interval: 1
+  -
+    name: PING
+    check: "check_ping!2000.0,60%!3000.0,80%"
+    hostgroups: routing-infrastructure
+    normal_check_interval: 5
+    max_check_attempts: 4
+    retry_check_interval: 1
 
  ############ Services ############
  ###
@@ -439,39 +493,43 @@ services:
   -
     name: disk usage on /boot
     nrpe: "/usr/lib/nagios/plugins/check_disk 75 85 /boot"
-    hosts: sperger, rietz, steffani, penalosa, peri, albeniz, escher, goetz, mayer, mayr, paer
+    hosts: sperger, rietz, steffani, penalosa, peri, albeniz, escher, goetz, mayer, mayr, paer, spontini
   -
     name: disk usage on /var
     nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /var"
-    hosts: bartok, samosa, raff, lobos, villa, gluck, saens, escher, voltaire, puccini
+    hosts: bartok, samosa, raff, lobos, villa, gluck, saens, escher, voltaire, puccini, lebrun
   -
     name: disk usage on /org
     nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /org"
-    hosts: bartok, sperger, samosa, raff, lobos, villa, steffani, merkel, saens, pergolesi, verdi, puccini
+    hosts: bartok, sperger, samosa, raff, lobos, villa, steffani, saens, pergolesi, verdi, puccini, spontini
+  -
+    name: disk usage on /org
+    nrpe: "/usr/lib/nagios/plugins/check_disk 90 95 /org"
+    hosts: merkel
   -
     name: disk usage on /srv
     nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /srv"
-    hosts: agricola, arcadelt, argento
-  -
-    name: disk usage on /org/scratch2
-    nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /org/scratch2"
-    hosts: merkel
+    hosts: agricola, arcadelt, argento, allegri
   -
-    name: disk usage on /oldorg
-    nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /oldorg"
+    name: disk usage on /org/scratch
+    nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /org/scratch"
     hosts: merkel
   -
     name: disk usage on /tmp
     nrpe: "/usr/lib/nagios/plugins/check_disk 60 80 /tmp"
-    hosts: samosa, raff, gluck, saens, escher, puccini
+    hosts: samosa, raff, gluck, saens, escher, puccini, merkel
   -
     name: disk usage on /usr
     nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /usr"
-    hosts: samosa, raff, lobos, villa, gluck, saens, pergolesi, puccini
+    hosts: samosa, raff, lobos, villa, gluck, saens, pergolesi, puccini, merulo
   -
     name: disk usage on /home
     nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /home"
-    hosts: gluck, raptor, escher, voltaire
+    hosts: raptor, escher, voltaire, lebrun
+  -
+    name: disk usage on /home
+    nrpe: "/usr/lib/nagios/plugins/check_disk 90 95 /home"
+    hosts: gluck
   -
     name: disk usage on /chroot
     nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /chroot"
@@ -480,6 +538,10 @@ services:
     name: disk usage on /mnt/hdc
     nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /mnt/hdc"
     hosts: voltaire
+  -
+    name: disk usage on /mnt/sdb1
+    nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /mnt/sdb1"
+    hosts: spontini
   -
     name: disk usage on /x
     nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /x"
@@ -495,6 +557,15 @@ services:
   #  notification_interval: 480
   #  max_check_attempts: 4
   #  retry_check_interval: 12
+ ####
+  -
+    name: backup
+    nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-dabackup"
+    hostgroups: computers
+    normal_check_interval: 180
+    max_check_attempts: 2
+    retry_check_interval: 5
+
  ####
   -
     name: users
@@ -503,13 +574,18 @@ services:
  ####
   -
     name: load
-    nrpe: "/usr/lib/nagios/plugins/check_load -w 12,10,8 -c 22,18,14"
+    nrpe: "/usr/lib/nagios/plugins/check_load -w 30,28,26 -c 50,45,50"
     hostgroups: computers
     excludehostgroups: highload
   -
     name: load
-    nrpe: "/usr/lib/nagios/plugins/check_load -w 100,80,60 -c 200,160,140"
+    nrpe: "/usr/lib/nagios/plugins/check_load -w 140,120,100 -c 240,220,200"
     hostgroups: highload
+    excludehosts: rietz
+  -
+    name: load
+    nrpe: "/usr/lib/nagios/plugins/check_load -w 200,200,200 -c 350,350,350"
+    hosts: rietz
  ####
   -
     name: processes - zombies
@@ -542,18 +618,22 @@ services:
     hostgroups: computers
   -
     name: "network service - sshd"
-    check: check_ssh
+    check: dsa_check_ssh
     hostgroups: computers
     depends: process - sshd
     normal_check_interval:  60
-    notification_interval:  60
 
   -
     name: "network service - sshd"
-    check: check_ssh
+    check: dsa_check_ssh
     hostgroups: deadslow
+    excludehosts: agnesi
+    normal_check_interval:  180
+  -
+    name: "network service - sshd - 2260"
+    check: dsa_check_ssh_port!2260
+    hosts: agnesi
     normal_check_interval:  180
-    notification_interval:  180
  ####
   -
     name: network service - nrpe
@@ -588,12 +668,12 @@ services:
     check: check_ntp
     hostgroups: computers
     depends: process - ntpd
-    excludehosts: raptor
+    excludehosts: raptor, allegri
   #
   -
     name: network service - time
-    check: check_time
-    hosts: raptor
+    check: dsa_check_time
+    hosts: raptor, allegri
     depends: process - xinetd
 
  ###
@@ -756,14 +836,8 @@ services:
     depends: process - weightd - master
   -
     name: process - weightd - child
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:10 -c 1: -u polw -C policyd-weight -a 'policyd-weight (child)'"
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:50 -c 1: -u polw -C policyd-weight -a 'policyd-weight (child)'"
     hostgroups: heavy-postfix
-    excludehosts: liszt
-    depends: process - weightd - master
-  -
-    name: process - weightd - child
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:40 -c 1: -u polw -C policyd-weight -a 'policyd-weight (child)'"
-    hosts: liszt
     depends: process - weightd - master
   #
   -
@@ -823,7 +897,7 @@ services:
 
   -
     name: process - postfix - smtpd
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:10 -c 0:15 -u postfix -C smtpd -a 'smtpd -n smtp -t inet -u -c'"
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:40 -c 0:90 -u postfix -C smtpd -a 'smtpd -n smtp -t inet -u -c'"
     hostgroups: postfix-hosts
     excludehosts: liszt
     depends: process - postfix - master
@@ -836,7 +910,7 @@ services:
 
   -
     name: process - postfix - smtpd
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:100 -c 0:150 -u postfix -C smtpd -a 'smtpd -n smtp -t inet -u -c'"
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:150 -c 0:200 -u postfix -C smtpd -a 'smtpd -n smtp -t inet -u -c'"
     hosts: liszt
     depends: process - postfix - master
   -
@@ -846,11 +920,6 @@ services:
     depends: process - postfix - master
 
  ###
-  -
-    name: network service - smtp
-    check: dsa_check_smtp
-    hostgroups: postfix-hosts
-    depends: process - postfix - master
   -
     name: network service - smtp
     check: dsa_check_smtp
@@ -858,7 +927,35 @@ services:
     excludehostgroups: postfix-hosts
     depends: process - exim
 
+  -
+    name: network service - smtp
+    check: dsa_check_smtp
+    hostgroups: postfix-hosts
+    excludehosts: verdi, kassia, allegri
+    depends: process - postfix - master
+  -
+    name: network service - smtp - port 2025
+    check: dsa_check_smtp_port!2025
+    hosts: verdi, kassia, murphy, allegri
+    depends: process - postfix - master
+
+  -
+    name: network service local - smtps cert
+    nrpe: "/usr/lib/nagios/plugins/check_http -H localhost -p 465 -S -C 14 -t 45"
+    hostgroups: postfix-hosts
+    depends: process - postfix - master
+    normal_check_interval: 120
+
 
+  -
+    name: setup - debian-admin in etc aliases
+    nrpe: "/usr/lib/nagios/plugins/dsa-check-da-in-aliases"
+    hostgroups: computers
+    normal_check_interval: 120
+  -
+    name: setup - ud-ldap freshness
+    nrpe: "/usr/lib/nagios/plugins/dsa-check-udldap-freshness"
+    hostgroups: computers
  ###
   -
     name: process - uptimed
@@ -869,19 +966,22 @@ services:
     name: process - irqbalance
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C irqbalance -a '/usr/sbin/irqbalance'"
     hostgroups: computers
-    excludehosts: arcadelt, agricola, argento, penalosa, peri, escher, bruckner
-
+    excludehostgroups: single-cpu
+  -
+    name: unwanted process - named
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C irqbalance"
+    hostgroups: single-cpu
 
  ####
  ###
-  -
-    name: unwanted process - system-tools-backends
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C system-tools-ba"
-    hostgroups: computers
-  -
-    name: unwanted process - dbus-daemon
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C dbus-daemon"
-    hostgroups: computers
+  #-
+  #  name: unwanted process - system-tools-backends
+  #  nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C system-tools-ba"
+  #  hostgroups: computers
+  #-
+  #  name: unwanted process - dbus-daemon
+  #  nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C dbus-daemon"
+  #  hostgroups: computers
   -
     name: unwanted process - gkrellmd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C gkrellmd"
@@ -947,7 +1047,7 @@ services:
   -
     name: process - udevd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C udevd -a 'udevd'"
-    hosts: sperger, ries, steffani, merkel, spohr, peri, penalosa, albeniz, escher, verdi, liszt, kassia
+    hosts: sperger, ries, steffani, merkel, spohr, peri, penalosa, albeniz, escher, verdi, liszt, kassia, agricola, arcadelt, argento, allegri
  ###
   -
     name: process - acpid
@@ -958,13 +1058,13 @@ services:
   -
     name: process - xinetd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C xinetd -a '/usr/sbin/xinetd -pidfile /var/run/xinetd.pid -stayalive'"
-    hosts: samosa, raptor
+    hosts: samosa, raptor, allegri
     hostgroups: rsyncd-hosts
   -
     name: unwanted process - xinetd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C xinetd"
     hostgroups: computers
-    excludehosts: samosa, raptor
+    excludehosts: samosa, raptor, allegri
     excludehostgroups: rsyncd-hosts
  ###
   -
@@ -1041,14 +1141,12 @@ services:
     hosts: samosa
     depends: "process - apache2 - master"
     normal_check_interval: 120
-    notification_interval: 120
   -
     name: network service - https cert
     check: dsa_check_cert!443
     hosts: samosa
     depends: network service - https
     normal_check_interval: 240
-    notification_interval: 240
  ####
   -
     name: process - named
@@ -1072,7 +1170,7 @@ services:
     hostgroups: ftpd-hosts
   -
     name: process - vsftp - instance
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:30 -c 0:50 -u ftp -C vsftpd -a 'vsftpd: '"
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:80 -c 0:100 -u ftp -C vsftpd -a 'vsftpd: '"
     hostgroups: ftpd-hosts
   -
     name: network service - ftp
@@ -1136,3 +1234,8 @@ services:
     name: process - dhcpd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C dhcpd3 -a '/usr/sbin/dhcpd3 -q eth0'"
     hosts: paer
+ ###
+  -
+    name: process - monit
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C monit -a '/usr/sbin/monit -d 30 -c /etc/monit/monitrc -s /var/lib/monit/monit.state'"
+    hosts: villa, lobos, steffani, kassia, master