X-Git-Url: https://git.adam-barratt.org.uk/?a=blobdiff_plain;f=nagios-master.cfg;h=d7a19650058cd99992117a89596171934a3ce96b;hb=dedd2f50e4e88e1bb29f78da0c6550bcf97be774;hp=e3d5f7f4a5eb9d808ccfe0e9bd59726ef8e97965;hpb=92b5857e660c8259844ec1f6e740287dd6f1ef61;p=mirror%2Fdsa-nagios.git diff --git a/nagios-master.cfg b/nagios-master.cfg index e3d5f7f..d7a1965 100644 --- a/nagios-master.cfg +++ b/nagios-master.cfg @@ -15,14 +15,10 @@ # - verdi: pg upgrade, openvpn # - mundy: salinfo_decode # - puccini: mailgraph +# - lebrun: ippl # - -# - agnesi -# - lebrun -# - murphy # - piatti # - tartini -#sarge: -# - spontini # down: # - ravel @@ -40,6 +36,7 @@ servers: address: 82.195.75.126 parents: gw-HP-ftc hostgroups: routing-infrastructure + contacts: joerg, bzed gw-HP-ftc: address: 192.25.206.1 parents: samosa @@ -80,6 +77,7 @@ servers: address: 195.49.152.190 parents: gw-HP-ftc hostgroups: routing-infrastructure + contacts: bzed gw-freenet: address: 62.104.23.249 parents: gw-HP-ftc @@ -92,6 +90,7 @@ servers: address: 193.62.202.18 parents: gw-HP-ftc hostgroups: routing-infrastructure + contacts: tjrc1 gw-cst: address: 213.188.99.215 parents: gw-HP-ftc @@ -105,9 +104,10 @@ servers: parents: gw-HP-ftc hostgroups: routing-infrastructure gw-1und1: - address: 195.20.247.53 + address: 195.20.247.54 parents: gw-HP-ftc hostgroups: routing-infrastructure + contacts: joerg gw-blackcat: address: 193.201.200.129 parents: gw-HP-ftc @@ -124,6 +124,22 @@ servers: address: 130.89.160.1 parents: gw-HP-ftc hostgroups: routing-infrastructure + #gw-ughent: + # address: 157.193.39.254 + # parents: gw-HP-ftc + # hostgroups: routing-infrastructure + gw-agnesi: + address: 65.173.90.18 + parents: gw-HP-ftc + hostgroups: routing-infrastructure + gw-ubc: + address: 137.82.84.41 + parents: gw-HP-ftc + hostgroups: routing-infrastructure + gw-carnet: + address: 161.53.160.1 + parents: gw-HP-ftc + hostgroups: routing-infrastructure samosa: address: 192.25.206.57 @@ -147,11 +163,13 @@ servers: peri: address: 192.25.206.15 parents: samosa - hostgroups: computers, buildd, sw-raid + hostgroups: computers, buildd, sw-raid, single-cpu + contacts: dannf penalosa: address: 192.25.206.68 parents: samosa - hostgroups: computers, buildd, sw-raid + hostgroups: computers, buildd, sw-raid, single-cpu + contacts: dannf mundy: address: 192.25.206.62 parents: samosa @@ -160,32 +178,45 @@ servers: address: 192.25.206.11 parents: samosa hostgroups: computers, porterbox, bind9-hosts + merulo: + address: 192.25.206.58 + parents: samosa + hostgroups: computers, porterbox bartok: address: 82.195.75.91 parents: gw-man-da hostgroups: computers, service, syslog-ng-hosts, postfix-hosts, dl385 + contacts: joerg, bzed sperger: address: 82.195.75.98 parents: gw-man-da hostgroups: computers, porterbox, sw-raid + contacts: bzed agricola: address: 82.195.75.86 parents: gw-man-da - hostgroups: computers, porterbox, sw-raid + hostgroups: computers, porterbox, sw-raid, single-cpu + contacts: bzed arcadelt: address: 82.195.75.87 parents: gw-man-da - hostgroups: computers, buildd, sw-raid + hostgroups: computers, buildd, sw-raid, single-cpu + contacts: bzed liszt: address: 82.195.75.100 parents: gw-man-da hostgroups: computers, service, apache2-hosts, bind9-hosts, postfix-hosts, heavy-postfix, dl385 + contacts: bzed master: address: 70.103.162.29 parents: gw-brainfood hostgroups: computers, general, apache2-hosts, bind9-hosts, heavy-exim, highload + murphy: + address: 70.103.162.31 + parents: gw-brainfood + hostgroups: computers, general, apache2-hosts, bind9-hosts, postfix-hosts ries: address: 128.148.34.103 @@ -244,7 +275,8 @@ servers: argento: address: 195.49.152.174 parents: gw-dg-i.net - hostgroups: computers, buildd, sw-raid + hostgroups: computers, buildd, sw-raid, single-cpu + contacts: bzed pergolesi: address: 62.104.23.252 @@ -253,7 +285,7 @@ servers: bruckner: address: 62.104.23.253 parents: gw-freenet - hostgroups: computers, porterbox + hostgroups: computers, porterbox, single-cpu raptor: address: 195.243.109.162 @@ -264,15 +296,17 @@ servers: address: 193.62.202.27 parents: gw-sanger hostgroups: computers, porterbox, sw-raid + contacts: tjrc1 goetz: address: 193.62.202.26 parents: gw-sanger hostgroups: computers, buildd, sw-raid + contacts: tjrc1 escher: address: 213.188.99.215 parents: gw-cst - hostgroups: computers, porterbox + hostgroups: computers, porterbox, single-cpu verdi: address: 192.54.42.193 @@ -283,11 +317,13 @@ servers: address: 72.66.115.54 parents: gw-frost hostgroups: computers, buildd + contacts: sfrost puccini: address: 87.106.4.56 parents: gw-1und1 hostgroups: computers, service, apache2-hosts, bind9-hosts, postfix-hosts, heavy-postfix, amavis-hosts + contacts: joerg caballero: address: 193.201.200.200 @@ -307,15 +343,38 @@ servers: address: 217.114.76.82 parents: gw-nmmn hostgroups: deadslow + contacts: luk crest: address: 217.114.76.83 parents: gw-nmmn hostgroups: deadslow + contacts: luk kassia: address: 130.89.175.54 parents: gw-utwente - hostgroups: computers, service, apache2-hosts, ftpd-hosts, rsyncd-hosts, dl360 + hostgroups: computers, service, postfix-hosts, apache2-hosts, ftpd-hosts, rsyncd-hosts, dl360 + + allegri: + address: 157.193.39.233 + parents: gw-HP-ftc + hostgroups: computers, buildd, postfix-hosts, sw-raid, single-cpu + contacts: luk + + agnesi: + address: 65.173.90.83 + parents: gw-agnesi + hostgroups: deadslow + + spontini: + address: 137.82.84.42 + parents: gw-ubc + hostgroups: computers, buildd + + lebrun: + address: 161.53.160.165 + parents: gw-carnet + hostgroups: computers, buildd ############################# # host groups @@ -367,6 +426,9 @@ hostgroups: sw-raid: alias: Hosts with Linux software raid private: 1 + single-cpu: + alias: Hosts with only one CPU + private: 1 syslog-ng-hosts: alias: hosts running syslog-ng instead of sysklogd @@ -413,6 +475,18 @@ hostgroups: alias: secondary IP addresses private: 1 + +############################# +# servicegroups +############################# +servicegroups: + diskspace: + alias: diskusage checks + buildd: + alias: buildd checks + raid: + alias: raid checks + ############################# # services ############################# @@ -421,6 +495,17 @@ services: name: PING check: "check_ping!300.0,20%!600.0,40%" hostgroups: all + excludehostgroups: routing-infrastructure + normal_check_interval: 5 + max_check_attempts: 4 + retry_check_interval: 1 + - + name: PING + check: "check_ping!2000.0,60%!3000.0,80%" + hostgroups: routing-infrastructure + normal_check_interval: 5 + max_check_attempts: 4 + retry_check_interval: 1 ############ Services ############ ### @@ -429,58 +514,82 @@ services: #### - name: disk usage - all + servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk 90 95" hostgroups: computers - name: disk usage on / + servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /" hostgroups: computers - name: disk usage on /boot + servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk 75 85 /boot" - hosts: sperger, rietz, steffani, penalosa, peri, albeniz, escher, goetz, mayer, mayr, paer + hosts: sperger, rietz, steffani, penalosa, peri, albeniz, escher, goetz, mayer, mayr, paer, spontini - name: disk usage on /var + servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /var" - hosts: bartok, samosa, raff, lobos, villa, gluck, saens, escher, voltaire, puccini + hosts: bartok, samosa, raff, lobos, villa, gluck, saens, escher, voltaire, puccini, lebrun - name: disk usage on /org + servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /org" - hosts: bartok, sperger, samosa, raff, lobos, villa, steffani, merkel, saens, pergolesi, verdi, puccini + hosts: bartok, sperger, samosa, raff, lobos, villa, steffani, saens, pergolesi, verdi, puccini, spontini + - + name: disk usage on /org + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 90 95 /org" + hosts: merkel - name: disk usage on /srv + servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /srv" - hosts: agricola, arcadelt, argento - - - name: disk usage on /org/scratch2 - nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /org/scratch2" - hosts: merkel + hosts: agricola, arcadelt, argento, allegri - - name: disk usage on /oldorg - nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /oldorg" + name: disk usage on /org/scratch + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /org/scratch" hosts: merkel - name: disk usage on /tmp + servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk 60 80 /tmp" - hosts: samosa, raff, gluck, saens, escher, puccini + hosts: samosa, raff, gluck, saens, escher, puccini, merkel - name: disk usage on /usr + servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /usr" - hosts: samosa, raff, lobos, villa, gluck, saens, pergolesi, puccini + hosts: samosa, raff, lobos, villa, gluck, saens, pergolesi, puccini, merulo - name: disk usage on /home + servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /home" - hosts: gluck, raptor, escher, voltaire + hosts: raptor, escher, voltaire, lebrun + - + name: disk usage on /home + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 90 95 /home" + hosts: gluck - name: disk usage on /chroot + servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /chroot" hosts: raptor - name: disk usage on /mnt/hdc + servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /mnt/hdc" hosts: voltaire + - + name: disk usage on /mnt/sdb1 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /mnt/sdb1" + hosts: spontini - name: disk usage on /x + servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /x" hosts: caballero @@ -494,6 +603,15 @@ services: # notification_interval: 480 # max_check_attempts: 4 # retry_check_interval: 12 + #### + - + name: backup + nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-dabackup" + hostgroups: computers + normal_check_interval: 180 + max_check_attempts: 2 + retry_check_interval: 5 + #### - name: users @@ -509,6 +627,11 @@ services: name: load nrpe: "/usr/lib/nagios/plugins/check_load -w 140,120,100 -c 240,220,200" hostgroups: highload + excludehosts: rietz + - + name: load + nrpe: "/usr/lib/nagios/plugins/check_load -w 200,200,200 -c 350,350,350" + hosts: rietz #### - name: processes - zombies @@ -541,18 +664,22 @@ services: hostgroups: computers - name: "network service - sshd" - check: check_ssh + check: dsa_check_ssh hostgroups: computers depends: process - sshd normal_check_interval: 60 - notification_interval: 60 - name: "network service - sshd" - check: check_ssh + check: dsa_check_ssh hostgroups: deadslow + excludehosts: agnesi + normal_check_interval: 180 + - + name: "network service - sshd - 2260" + check: dsa_check_ssh_port!2260 + hosts: agnesi normal_check_interval: 180 - notification_interval: 180 #### - name: network service - nrpe @@ -587,12 +714,12 @@ services: check: check_ntp hostgroups: computers depends: process - ntpd - excludehosts: raptor + excludehosts: raptor, allegri # - name: network service - time - check: check_time - hosts: raptor + check: dsa_check_time + hosts: raptor, allegri depends: process - xinetd ### @@ -603,7 +730,7 @@ services: ### - name: process - cron - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C cron -a /usr/sbin/cron" + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:2 -c 1: -u root -C cron -a /usr/sbin/cron" hostgroups: computers ### @@ -789,7 +916,7 @@ services: depends: process - postfix - master - name: process - postfix - anvil - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postfix -C anvil -a 'anvil -l -t unix -u'" + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:1 -c 0: -u postfix -C anvil -a 'anvil -l -t unix -u'" hostgroups: postfix-hosts depends: process - postfix - master @@ -816,7 +943,7 @@ services: - name: process - postfix - smtpd - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:20 -c 0:50 -u postfix -C smtpd -a 'smtpd -n smtp -t inet -u -c'" + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:40 -c 0:90 -u postfix -C smtpd -a 'smtpd -n smtp -t inet -u -c'" hostgroups: postfix-hosts excludehosts: liszt depends: process - postfix - master @@ -829,7 +956,7 @@ services: - name: process - postfix - smtpd - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:100 -c 0:150 -u postfix -C smtpd -a 'smtpd -n smtp -t inet -u -c'" + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:150 -c 0:200 -u postfix -C smtpd -a 'smtpd -n smtp -t inet -u -c'" hosts: liszt depends: process - postfix - master - @@ -839,11 +966,6 @@ services: depends: process - postfix - master ### - - - name: network service - smtp - check: dsa_check_smtp - hostgroups: postfix-hosts - depends: process - postfix - master - name: network service - smtp check: dsa_check_smtp @@ -851,7 +973,35 @@ services: excludehostgroups: postfix-hosts depends: process - exim + - + name: network service - smtp + check: dsa_check_smtp + hostgroups: postfix-hosts + excludehosts: verdi, kassia, allegri + depends: process - postfix - master + - + name: network service - smtp - port 2025 + check: dsa_check_smtp_port!2025 + hosts: verdi, kassia, murphy, allegri + depends: process - postfix - master + + - + name: network service local - smtps cert + nrpe: "/usr/lib/nagios/plugins/check_http -H localhost -p 465 -S -C 14 -t 45" + hostgroups: postfix-hosts + depends: process - postfix - master + normal_check_interval: 120 + + - + name: setup - debian-admin in etc aliases + nrpe: "/usr/lib/nagios/plugins/dsa-check-da-in-aliases" + hostgroups: computers + normal_check_interval: 120 + - + name: setup - ud-ldap freshness + nrpe: "/usr/lib/nagios/plugins/dsa-check-udldap-freshness" + hostgroups: computers ### - name: process - uptimed @@ -862,8 +1012,11 @@ services: name: process - irqbalance nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C irqbalance -a '/usr/sbin/irqbalance'" hostgroups: computers - excludehosts: arcadelt, agricola, argento, penalosa, peri, escher, bruckner - + excludehostgroups: single-cpu + - + name: unwanted process - named + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C irqbalance" + hostgroups: single-cpu #### ### @@ -914,22 +1067,38 @@ services: ### - name: process - mdadm monitor + servicegroups: raid nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C mdadm -a '/sbin/mdadm --monitor --pid-file /var/run/mdadm/monitor.pid --daemonise --scan'" hostgroups: sw-raid - name: RAID - sw raid + servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-raid-sw" hostgroups: sw-raid ### - name: process - cpqarrayd + servicegroups: raid nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C cpqarrayd -a '/usr/sbin/cpqarrayd'" hostgroups: dl385, dl380, dl360 - name: RAID - arrayprobe + servicegroups: raid nrpe: "sudo /usr/bin/arrayprobe" hostgroups: dl385, dl380, dl360 + ### + - + name: RAID - DAC960 + servicegroups: raid + nrpe: "/usr/lib/nagios/plugins/dsa-check-raid-dac960" + hosts: verdi + ### + - + name: RAID - 3ware + servicegroups: raid + nrpe: "/usr/lib/nagios/plugins/dsa-check-raid-3ware" + hosts: puccini ### - @@ -940,7 +1109,7 @@ services: - name: process - udevd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C udevd -a 'udevd'" - hosts: sperger, ries, steffani, merkel, spohr, peri, penalosa, albeniz, escher, verdi, liszt, kassia + hosts: sperger, ries, steffani, merkel, spohr, peri, penalosa, albeniz, escher, verdi, liszt, kassia, agricola, arcadelt, argento, allegri ### - name: process - acpid @@ -951,13 +1120,13 @@ services: - name: process - xinetd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C xinetd -a '/usr/sbin/xinetd -pidfile /var/run/xinetd.pid -stayalive'" - hosts: samosa, raptor + hosts: samosa, raptor, allegri hostgroups: rsyncd-hosts - name: unwanted process - xinetd nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C xinetd" hostgroups: computers - excludehosts: samosa, raptor + excludehosts: samosa, raptor, allegri excludehostgroups: rsyncd-hosts ### - @@ -979,10 +1148,6 @@ services: depends: rietz:process - xinetd ### - - - name: process - nagios1 - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u nagios -C nagios -a '/usr/sbin/nagios -d /etc/nagios/nagios.cfg'" - hosts: samosa - name: process - nagios3 # there is always one extra process per check currently running.. @@ -1034,14 +1199,12 @@ services: hosts: samosa depends: "process - apache2 - master" normal_check_interval: 120 - notification_interval: 120 - name: network service - https cert check: dsa_check_cert!443 hosts: samosa depends: network service - https normal_check_interval: 240 - notification_interval: 240 #### - name: process - named @@ -1110,6 +1273,7 @@ services: ### - name: process - buildd + servicegroups: buildd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u buildd -C buildd '/usr/bin/perl /usr/bin/buildd'" hostgroups: buildd