[project @ peter@palfrader.org-20080424195804-xh6bwo5xec2w4ada]
[mirror/dsa-nagios.git] / nagios-master.cfg
index 40b20fd..d7a1965 100644 (file)
 #  - verdi: pg upgrade, openvpn
 #  - mundy: salinfo_decode
 #  - puccini: mailgraph
+#  - lebrun: ippl
 #  -
-#  - agnesi
-#  - lebrun
-#  - murphy
 #  - piatti
 #  - tartini
-#sarge:
-#  - spontini
 
 # down:
 #  - ravel
@@ -40,6 +36,7 @@ servers:
     address: 82.195.75.126
     parents: gw-HP-ftc
     hostgroups: routing-infrastructure
+    contacts: joerg, bzed
   gw-HP-ftc:
     address: 192.25.206.1
     parents: samosa
@@ -80,6 +77,7 @@ servers:
     address: 195.49.152.190
     parents: gw-HP-ftc
     hostgroups: routing-infrastructure
+    contacts: bzed
   gw-freenet:
     address: 62.104.23.249
     parents: gw-HP-ftc
@@ -92,6 +90,7 @@ servers:
     address: 193.62.202.18
     parents: gw-HP-ftc
     hostgroups: routing-infrastructure
+    contacts: tjrc1
   gw-cst:
     address: 213.188.99.215
     parents: gw-HP-ftc
@@ -105,9 +104,10 @@ servers:
     parents: gw-HP-ftc
     hostgroups: routing-infrastructure
   gw-1und1:
-    address: 195.20.247.53
+    address: 195.20.247.54
     parents: gw-HP-ftc
     hostgroups: routing-infrastructure
+    contacts: joerg
   gw-blackcat:
     address: 193.201.200.129
     parents: gw-HP-ftc
@@ -124,8 +124,20 @@ servers:
     address: 130.89.160.1
     parents: gw-HP-ftc
     hostgroups: routing-infrastructure
-  gw-ughent:
-    address: 157.193.39.254
+  #gw-ughent:
+  #  address: 157.193.39.254
+  #  parents: gw-HP-ftc
+  #  hostgroups: routing-infrastructure
+  gw-agnesi:
+    address: 65.173.90.18
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
+  gw-ubc:
+    address: 137.82.84.41
+    parents: gw-HP-ftc
+    hostgroups: routing-infrastructure
+  gw-carnet:
+    address: 161.53.160.1
     parents: gw-HP-ftc
     hostgroups: routing-infrastructure
 
@@ -151,11 +163,13 @@ servers:
   peri:
     address: 192.25.206.15
     parents: samosa
-    hostgroups: computers, buildd, sw-raid
+    hostgroups: computers, buildd, sw-raid, single-cpu
+    contacts: dannf
   penalosa:
     address: 192.25.206.68
     parents: samosa
-    hostgroups: computers, buildd, sw-raid
+    hostgroups: computers, buildd, sw-raid, single-cpu
+    contacts: dannf
   mundy:
     address: 192.25.206.62
     parents: samosa
@@ -164,27 +178,36 @@ servers:
     address: 192.25.206.11
     parents: samosa
     hostgroups: computers, porterbox, bind9-hosts
+  merulo:
+    address: 192.25.206.58
+    parents: samosa
+    hostgroups: computers, porterbox
 
   bartok:
     address: 82.195.75.91
     parents: gw-man-da
     hostgroups: computers, service, syslog-ng-hosts, postfix-hosts, dl385
+    contacts: joerg, bzed
   sperger:
     address: 82.195.75.98
     parents: gw-man-da
     hostgroups: computers, porterbox, sw-raid
+    contacts: bzed
   agricola:
     address: 82.195.75.86
     parents: gw-man-da
-    hostgroups: computers, porterbox, sw-raid
+    hostgroups: computers, porterbox, sw-raid, single-cpu
+    contacts: bzed
   arcadelt:
     address: 82.195.75.87
     parents: gw-man-da
-    hostgroups: computers, buildd, sw-raid
+    hostgroups: computers, buildd, sw-raid, single-cpu
+    contacts: bzed
   liszt:
     address: 82.195.75.100
     parents: gw-man-da
     hostgroups: computers, service, apache2-hosts, bind9-hosts, postfix-hosts, heavy-postfix, dl385
+    contacts: bzed
 
   master:
     address: 70.103.162.29
@@ -252,7 +275,8 @@ servers:
   argento:
     address: 195.49.152.174
     parents: gw-dg-i.net
-    hostgroups: computers, buildd, sw-raid
+    hostgroups: computers, buildd, sw-raid, single-cpu
+    contacts: bzed
 
   pergolesi:
     address: 62.104.23.252
@@ -261,7 +285,7 @@ servers:
   bruckner:
     address: 62.104.23.253
     parents: gw-freenet
-    hostgroups: computers, porterbox
+    hostgroups: computers, porterbox, single-cpu
 
   raptor:
     address: 195.243.109.162
@@ -272,15 +296,17 @@ servers:
     address: 193.62.202.27
     parents: gw-sanger
     hostgroups: computers, porterbox, sw-raid
+    contacts: tjrc1
   goetz:
     address: 193.62.202.26
     parents: gw-sanger
     hostgroups: computers, buildd, sw-raid
+    contacts: tjrc1
 
   escher:
     address: 213.188.99.215
     parents: gw-cst
-    hostgroups: computers, porterbox
+    hostgroups: computers, porterbox, single-cpu
 
   verdi:
     address: 192.54.42.193
@@ -291,11 +317,13 @@ servers:
     address: 72.66.115.54
     parents: gw-frost
     hostgroups: computers, buildd
+    contacts: sfrost
 
   puccini:
     address: 87.106.4.56
     parents: gw-1und1
     hostgroups: computers, service, apache2-hosts, bind9-hosts, postfix-hosts, heavy-postfix, amavis-hosts
+    contacts: joerg
 
   caballero:
     address: 193.201.200.200
@@ -315,10 +343,12 @@ servers:
     address: 217.114.76.82
     parents: gw-nmmn
     hostgroups: deadslow
+    contacts: luk
   crest:
     address: 217.114.76.83
     parents: gw-nmmn
     hostgroups: deadslow
+    contacts: luk
 
   kassia:
     address: 130.89.175.54
@@ -327,8 +357,24 @@ servers:
 
   allegri:
     address: 157.193.39.233
-    parents: gw-ughent
-    hostgroups: computers, buildd, postfix-hosts, sw-raid
+    parents: gw-HP-ftc
+    hostgroups: computers, buildd, postfix-hosts, sw-raid, single-cpu
+    contacts: luk
+
+  agnesi:
+    address: 65.173.90.83
+    parents: gw-agnesi
+    hostgroups: deadslow
+
+  spontini:
+    address: 137.82.84.42
+    parents: gw-ubc
+    hostgroups: computers, buildd
+
+  lebrun:
+    address: 161.53.160.165
+    parents: gw-carnet
+    hostgroups: computers, buildd
 
 #############################
 # host groups
@@ -380,6 +426,9 @@ hostgroups:
   sw-raid:
     alias: Hosts with Linux software raid
     private: 1
+  single-cpu:
+    alias: Hosts with only one CPU
+    private: 1
 
   syslog-ng-hosts:
     alias: hosts running syslog-ng instead of sysklogd
@@ -426,6 +475,18 @@ hostgroups:
     alias: secondary IP addresses
     private: 1
 
+
+#############################
+# servicegroups
+#############################
+servicegroups:
+  diskspace:
+    alias: diskusage checks
+  buildd:
+    alias: buildd checks
+  raid:
+    alias: raid checks
+
 #############################
 # services
 #############################
@@ -440,7 +501,7 @@ services:
     retry_check_interval: 1
   -
     name: PING
-    check: "check_ping!900.0,60%!1500.0,80%"
+    check: "check_ping!2000.0,60%!3000.0,80%"
     hostgroups: routing-infrastructure
     normal_check_interval: 5
     max_check_attempts: 4
@@ -453,66 +514,82 @@ services:
  ####
   -
     name: disk usage - all
+    servicegroups: diskspace
     nrpe: "/usr/lib/nagios/plugins/check_disk 90 95"
     hostgroups: computers
   -
     name: disk usage on /
+    servicegroups: diskspace
     nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /"
     hostgroups: computers
   -
     name: disk usage on /boot
+    servicegroups: diskspace
     nrpe: "/usr/lib/nagios/plugins/check_disk 75 85 /boot"
-    hosts: sperger, rietz, steffani, penalosa, peri, albeniz, escher, goetz, mayer, mayr, paer
+    hosts: sperger, rietz, steffani, penalosa, peri, albeniz, escher, goetz, mayer, mayr, paer, spontini
   -
     name: disk usage on /var
+    servicegroups: diskspace
     nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /var"
-    hosts: bartok, samosa, raff, lobos, villa, gluck, saens, escher, voltaire, puccini
+    hosts: bartok, samosa, raff, lobos, villa, gluck, saens, escher, voltaire, puccini, lebrun
   -
     name: disk usage on /org
+    servicegroups: diskspace
     nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /org"
-    hosts: bartok, sperger, samosa, raff, lobos, villa, steffani, saens, pergolesi, verdi, puccini
+    hosts: bartok, sperger, samosa, raff, lobos, villa, steffani, saens, pergolesi, verdi, puccini, spontini
   -
     name: disk usage on /org
+    servicegroups: diskspace
     nrpe: "/usr/lib/nagios/plugins/check_disk 90 95 /org"
     hosts: merkel
   -
     name: disk usage on /srv
+    servicegroups: diskspace
     nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /srv"
     hosts: agricola, arcadelt, argento, allegri
   -
-    name: disk usage on /org/scratch2
-    nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /org/scratch2"
-    hosts: merkel
-  -
-    name: disk usage on /oldorg
-    nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /oldorg"
+    name: disk usage on /org/scratch
+    servicegroups: diskspace
+    nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /org/scratch"
     hosts: merkel
   -
     name: disk usage on /tmp
+    servicegroups: diskspace
     nrpe: "/usr/lib/nagios/plugins/check_disk 60 80 /tmp"
-    hosts: samosa, raff, gluck, saens, escher, puccini
+    hosts: samosa, raff, gluck, saens, escher, puccini, merkel
   -
     name: disk usage on /usr
+    servicegroups: diskspace
     nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /usr"
-    hosts: samosa, raff, lobos, villa, gluck, saens, pergolesi, puccini
+    hosts: samosa, raff, lobos, villa, gluck, saens, pergolesi, puccini, merulo
   -
     name: disk usage on /home
+    servicegroups: diskspace
     nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /home"
-    hosts: raptor, escher, voltaire
+    hosts: raptor, escher, voltaire, lebrun
   -
     name: disk usage on /home
+    servicegroups: diskspace
     nrpe: "/usr/lib/nagios/plugins/check_disk 90 95 /home"
     hosts: gluck
   -
     name: disk usage on /chroot
+    servicegroups: diskspace
     nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /chroot"
     hosts: raptor
   -
     name: disk usage on /mnt/hdc
+    servicegroups: diskspace
     nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /mnt/hdc"
     hosts: voltaire
+  -
+    name: disk usage on /mnt/sdb1
+    servicegroups: diskspace
+    nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /mnt/sdb1"
+    hosts: spontini
   -
     name: disk usage on /x
+    servicegroups: diskspace
     nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /x"
     hosts: caballero
 
@@ -526,6 +603,15 @@ services:
   #  notification_interval: 480
   #  max_check_attempts: 4
   #  retry_check_interval: 12
+ ####
+  -
+    name: backup
+    nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-dabackup"
+    hostgroups: computers
+    normal_check_interval: 180
+    max_check_attempts: 2
+    retry_check_interval: 5
+
  ####
   -
     name: users
@@ -578,18 +664,22 @@ services:
     hostgroups: computers
   -
     name: "network service - sshd"
-    check: check_ssh
+    check: dsa_check_ssh
     hostgroups: computers
     depends: process - sshd
     normal_check_interval:  60
-    notification_interval:  60
 
   -
     name: "network service - sshd"
-    check: check_ssh
+    check: dsa_check_ssh
     hostgroups: deadslow
+    excludehosts: agnesi
+    normal_check_interval:  180
+  -
+    name: "network service - sshd - 2260"
+    check: dsa_check_ssh_port!2260
+    hosts: agnesi
     normal_check_interval:  180
-    notification_interval:  180
  ####
   -
     name: network service - nrpe
@@ -624,12 +714,12 @@ services:
     check: check_ntp
     hostgroups: computers
     depends: process - ntpd
-    excludehosts: raptor
+    excludehosts: raptor, allegri
   #
   -
     name: network service - time
-    check: check_time
-    hosts: raptor
+    check: dsa_check_time
+    hosts: raptor, allegri
     depends: process - xinetd
 
  ###
@@ -640,7 +730,7 @@ services:
  ###
   -
     name: process - cron
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C cron -a /usr/sbin/cron"
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:2 -c 1: -u root -C cron -a /usr/sbin/cron"
     hostgroups: computers
 
  ###
@@ -826,7 +916,7 @@ services:
     depends: process - postfix - master
   -
     name: process - postfix - anvil
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postfix -C anvil -a 'anvil -l -t unix -u'"
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:1 -c 0: -u postfix -C anvil -a 'anvil -l -t unix -u'"
     hostgroups: postfix-hosts
     depends: process - postfix - master
 
@@ -853,7 +943,7 @@ services:
 
   -
     name: process - postfix - smtpd
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:20 -c 0:50 -u postfix -C smtpd -a 'smtpd -n smtp -t inet -u -c'"
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:40 -c 0:90 -u postfix -C smtpd -a 'smtpd -n smtp -t inet -u -c'"
     hostgroups: postfix-hosts
     excludehosts: liszt
     depends: process - postfix - master
@@ -892,21 +982,26 @@ services:
   -
     name: network service - smtp - port 2025
     check: dsa_check_smtp_port!2025
-    hosts: verdi, kassia, murphy
-    depends: process - postfix - master
-  -
-    name: network service - smtp - port 8080
-    check: dsa_check_smtp_port!8080
-    hosts: allegri
+    hosts: verdi, kassia, murphy, allegri
     depends: process - postfix - master
 
   -
     name: network service local - smtps cert
     nrpe: "/usr/lib/nagios/plugins/check_http -H localhost -p 465 -S -C 14 -t 45"
     hostgroups: postfix-hosts
-    depends: network service local - smtp - smtps
-    normal_check_interval: 1440
+    depends: process - postfix - master
+    normal_check_interval: 120
+
 
+  -
+    name: setup - debian-admin in etc aliases
+    nrpe: "/usr/lib/nagios/plugins/dsa-check-da-in-aliases"
+    hostgroups: computers
+    normal_check_interval: 120
+  -
+    name: setup - ud-ldap freshness
+    nrpe: "/usr/lib/nagios/plugins/dsa-check-udldap-freshness"
+    hostgroups: computers
  ###
   -
     name: process - uptimed
@@ -917,8 +1012,11 @@ services:
     name: process - irqbalance
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C irqbalance -a '/usr/sbin/irqbalance'"
     hostgroups: computers
-    excludehosts: arcadelt, agricola, argento, penalosa, peri, escher, bruckner, allegri
-
+    excludehostgroups: single-cpu
+  -
+    name: unwanted process - named
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C irqbalance"
+    hostgroups: single-cpu
 
  ####
  ###
@@ -969,22 +1067,38 @@ services:
  ###
   -
     name: process - mdadm monitor
+    servicegroups: raid
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C mdadm -a '/sbin/mdadm --monitor --pid-file /var/run/mdadm/monitor.pid --daemonise --scan'"
     hostgroups: sw-raid
   -
     name: RAID - sw raid
+    servicegroups: raid
     nrpe: "/usr/lib/nagios/plugins/dsa-check-raid-sw"
     hostgroups: sw-raid
 
  ###
   -
     name: process - cpqarrayd
+    servicegroups: raid
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C cpqarrayd -a '/usr/sbin/cpqarrayd'"
     hostgroups: dl385, dl380, dl360
   -
     name: RAID - arrayprobe
+    servicegroups: raid
     nrpe: "sudo /usr/bin/arrayprobe"
     hostgroups: dl385, dl380, dl360
+ ###
+  -
+    name: RAID - DAC960
+    servicegroups: raid
+    nrpe: "/usr/lib/nagios/plugins/dsa-check-raid-dac960"
+    hosts: verdi
+ ###
+  -
+    name: RAID - 3ware
+    servicegroups: raid
+    nrpe: "/usr/lib/nagios/plugins/dsa-check-raid-3ware"
+    hosts: puccini
 
  ###
   -
@@ -1006,13 +1120,13 @@ services:
   -
     name: process - xinetd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C xinetd -a '/usr/sbin/xinetd -pidfile /var/run/xinetd.pid -stayalive'"
-    hosts: samosa, raptor
+    hosts: samosa, raptor, allegri
     hostgroups: rsyncd-hosts
   -
     name: unwanted process - xinetd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C xinetd"
     hostgroups: computers
-    excludehosts: samosa, raptor
+    excludehosts: samosa, raptor, allegri
     excludehostgroups: rsyncd-hosts
  ###
   -
@@ -1034,10 +1148,6 @@ services:
     depends: rietz:process - xinetd
 
  ###
-  -
-    name: process - nagios1
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u nagios -C nagios -a '/usr/sbin/nagios -d /etc/nagios/nagios.cfg'"
-    hosts: samosa
   -
     name: process - nagios3
     # there is always one extra process per check currently running..
@@ -1089,14 +1199,12 @@ services:
     hosts: samosa
     depends: "process - apache2 - master"
     normal_check_interval: 120
-    notification_interval: 120
   -
     name: network service - https cert
     check: dsa_check_cert!443
     hosts: samosa
     depends: network service - https
     normal_check_interval: 240
-    notification_interval: 240
  ####
   -
     name: process - named
@@ -1165,6 +1273,7 @@ services:
  ###
   -
     name: process - buildd
+    servicegroups: buildd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u buildd -C buildd '/usr/bin/perl /usr/bin/buildd'"
     hostgroups: buildd