X-Git-Url: https://git.adam-barratt.org.uk/?a=blobdiff_plain;f=config%2Fnagios-master.cfg;h=8d1eb68666095a09c49851d38c043fbe93089f8e;hb=0a3ef657b90cf98cf29101d9afa964ea6758d6cd;hp=d3790e37f7c5b2481ef0742329ea24cc4d42bb68;hpb=49f5b3e3ad0f97df84509772ffe023ccd2a57e27;p=mirror%2Fdsa-nagios.git diff --git a/config/nagios-master.cfg b/config/nagios-master.cfg index d3790e3..8d1eb68 100644 --- a/config/nagios-master.cfg +++ b/config/nagios-master.cfg @@ -78,6 +78,10 @@ servers: address: 192.25.206.1 parents: gw-ubcece hostgroups: layer3-infrastructure + gw-gatech: + address: 128.61.240.1 + parents: gw-ubcece + hostgroups: layer3-infrastructure gw-grnet: address: 194.177.211.193 parents: gw-ubcece @@ -139,6 +143,10 @@ servers: address: 128.101.240.222 parents: gw-ubcece hostgroups: layer3-infrastructure + gw-unicamp: + address: 143.106.167.234 + parents: gw-ubcece + hostgroups: layer3-infrastructure gw-utwente: address: 130.89.149.1 parents: gw-ubcece @@ -205,39 +213,43 @@ servers: abel: address: 217.140.96.56 parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, porterbox, wheezy, deadslow - alwyn: - address: 217.140.96.59 - parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy, deadslow + hostgroups: computers, hasbootfs, hassrvfs, porterbox, wheezy, broken_mq antheil: address: 217.140.96.60 parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy, deadslow - arne: - address: 217.140.96.61 - parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy, deadslow + hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy, broken_mq arnold: address: 217.140.96.57 parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy, deadslow + hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy, broken_mq + arm-arm-01: + address: 217.140.96.58 + parents: gw-arm + hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy, broken_mq + arm-arm-02: + address: 217.140.96.59 + parents: gw-arm + hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy, broken_mq harris: address: 217.140.96.66 parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, porterbox, deadslow + hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, porterbox, broken_mq + hartmann: + address: 217.140.96.67 + parents: gw-arm + hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, buildd, broken_mq hasse: address: 217.140.96.68 parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, buildd, deadslow + hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, buildd, broken_mq henze: address: 217.140.96.70 parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, buildd, deadslow + hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, buildd, broken_mq hoiby: address: 217.140.96.71 parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, armhf, wheezy, buildd, deadslow + hostgroups: computers, hasbootfs, hassrvfs, armhf, wheezy, buildd, broken_mq # }}} # {{{ gw-brown franck: @@ -281,19 +293,19 @@ servers: bm-bl9: address: 5.153.231.249 parents: gw-bytemark - hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute + hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute, broken_mq bm-bl10: address: 5.153.231.250 parents: gw-bytemark - hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute + hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute, broken_mq bm-bl11: address: 5.153.231.251 parents: gw-bytemark - hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute + hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute, broken_mq bm-bl12: address: 5.153.231.252 parents: gw-bytemark - hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute + hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute, broken_mq milanollo: address: 5.153.231.2 @@ -340,7 +352,7 @@ servers: coccia: address: 5.153.231.11 parents: ganeti-bytemark - hostgroups: computers, hassrvfs, kvmdomains, wheezy, autofs, nfs-client + hostgroups: computers, hassrvfs, kvmdomains, wheezy, autofs, nfs-client, uploadqueue, xinetd-hosts backuphost: address: 5.153.231.12 parents: ganeti-bytemark @@ -391,10 +403,6 @@ servers: address: 5.153.231.23 parents: ganeti-bytemark hostgroups: computers, general, kvmdomains, wheezy, nfs-client, autofs, apache2-hosts, apache-https, service - diamond: - address: 5.153.231.24 - parents: ganeti-bytemark - hostgroups: computers, service, kvmdomains, wheezy, bind9-hosts, no-bacula petrova: address: 5.153.231.25 parents: ganeti-bytemark @@ -402,7 +410,7 @@ servers: oyens: address: 5.153.231.26 parents: ganeti-bytemark - hostgroups: computers, kvmdomains, wheezy, apache2-hosts, openstack-controller, apache-https + hostgroups: computers, kvmdomains, wheezy, apache2-hosts, openstack-controller, apache-https, broken_mq barriere: address: 5.153.231.27 parents: ganeti-bytemark @@ -410,7 +418,19 @@ servers: quantz: address: 5.153.231.28 parents: ganeti-bytemark - hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, nfs-client, xinetd-hosts, heavy-exim, apache2-hosts, autofs + hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, nfs-client, xinetd-hosts, heavy-exim, apache2-hosts, autofs, apache-https + portman: + address: 5.153.231.29 + parents: ganeti-bytemark + hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, apache2-hosts + paradis: + address: 5.153.231.30 + parents: ganeti-bytemark + hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, apache2-hosts, apache-https + all-bm-01: + address: 5.153.231.32 + parents: ganeti-bytemark + hostgroups: computers, buildd, wheezy # }}} # {{{ gw-c3sl santoro: @@ -449,27 +469,23 @@ servers: senfl: address: 128.31.0.51 parents: gw-csail - hostgroups: computers, service, dl360, acpid-hosts, hassrvfs, apache2-hosts, rsyncd-hosts, bind9-hosts, xinetd-hosts, squeeze, apache-https + hostgroups: computers, service, dl360, acpid-hosts, hassrvfs, apache2-hosts, rsyncd-hosts, xinetd-hosts, squeeze, apache-https steffani: address: 128.31.0.36 parents: gw-csail hostgroups: computers, service, apache2-hosts, rsyncd-hosts, sw-raid, acpid-hosts, hasbootfs, hasorgfs, xinetd-hosts, wheezy, security_mirror, no-bacula # }}} # {{{ gw-dgi - argento: - address: 93.94.130.160 + storace: + address: 93.94.130.161 parents: gw-dgi - hostgroups: computers, sw-raid, hassrvfs, wheezy + hostgroups: computers, acpid-hosts, wheezy, dl380, nfs-client, hassrvfs # }}} # {{{ gw-ftcollins #alkman: # address: 192.25.206.63 # parents: gw-ftcollins # hostgroups: computers, buildd, acpid-hosts, wheezy - #merulo: - # address: 192.25.206.58 - # parents: gw-ftcollins - # hostgroups: computers, porterbox, hasusrfs, wheezy #mundy: # address: 192.25.206.62 # parents: gw-ftcollins @@ -479,6 +495,12 @@ servers: parents: gw-ftcollins hostgroups: computers, service, dl380, apache2-hosts, wheezy, no-bacula # }}} + # {{{ gw-gatech + sechter: + address: 128.61.240.73 + parents: gw-gatech + hostgroups: computers, service, apache2-hosts, rsyncd-hosts, sw-raid, acpid-hosts, hasbootfs, hassrvfs, xinetd-hosts, wheezy, security_mirror + # }}} # {{{ gw-grnet barber: address: 194.177.211.203 @@ -488,10 +510,6 @@ servers: address: 194.177.211.204 parents: gw-grnet hostgroups: computers, acpid-hosts, buildd, hassrvfs, mptraid, wheezy - orff: - address: 194.177.211.209 - parents: gw-grnet - hostgroups: computers, acpid-hosts, bind9-hosts, mptraid, service, hassrvfs, apache2-hosts, squeeze rautavaara: address: 194.177.211.199 parents: gw-grnet @@ -511,33 +529,34 @@ servers: lw01: address: 185.17.185.177 parents: gw-leaseweb - hostgroups: computers, service, acpid-hosts, wheezy, dl180 + hostgroups: computers, service, acpid-hosts, wheezy, dl180, nfs-server lw02: address: 185.17.185.178 parents: gw-leaseweb - hostgroups: computers, service, acpid-hosts, wheezy, dl180 + hostgroups: computers, service, acpid-hosts, wheezy, dl180, nfs-server lw03: address: 185.17.185.179 parents: gw-leaseweb - hostgroups: computers, service, acpid-hosts, wheezy, dl180 + hostgroups: computers, service, acpid-hosts, wheezy, dl180, nfs-server lw04: address: 185.17.185.180 parents: gw-leaseweb - hostgroups: computers, service, acpid-hosts, wheezy, dl180 - lw05: - address: 185.17.185.181 + hostgroups: computers, service, acpid-hosts, wheezy, dl180, nfs-server + lw07: + address: 185.17.185.187 parents: gw-leaseweb - hostgroups: computers, service, acpid-hosts, wheezy, dl120, sw-raid - lw06: - address: 185.17.185.182 + hostgroups: computers, service, acpid-hosts, wheezy, dl180, nfs-client, autofs, hassrvfs, postgres91-hosts, apache2-hosts + lw08: + address: 185.17.185.189 parents: gw-leaseweb - hostgroups: computers, service, acpid-hosts, wheezy, dl120, sw-raid + hostgroups: computers, service, acpid-hosts, wheezy, dl180, nfs-client, autofs, hassrvfs + #, apache2-hosts # }}} # {{{ gw-karlsruhe zemlinsky: address: 129.143.160.6 parents: gw-karlsruhe - hostgroups: computers, buildd, hassrvfs, wheezy + hostgroups: computers, buildd, wheezy contacts: pkern # }}} # {{{ gw-man-da @@ -562,6 +581,14 @@ servers: address: 82.195.75.110 parents: ganeti3 hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, spamd, heavy-exim, highload + mipsel-manda-01: + address: 82.195.75.72 + parents: gw-arm + hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy + mipsel-manda-02: + address: 82.195.75.74 + parents: gw-arm + hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy fils: address: 82.195.75.89 parents: ganeti3 @@ -628,6 +655,14 @@ servers: address: 82.195.75.92 parents: ganeti3 hostgroups: computers, service, kvmdomains, wheezy + mipsel-manda-01: + address: 82.195.75.72 + parents: gw-man-da + hostgroups: computers, buildd, wheezy, hassrvfs, sw-raid + mipsel-manda-02: + address: 82.195.75.74 + parents: gw-man-da + hostgroups: computers, buildd, wheezy, hassrvfs, sw-raid # }}} # {{{ gw-marist zani: @@ -657,6 +692,10 @@ servers: # address: 140.211.166.58 # parents: gw-osuosl # hostgroups: computers, buildd, hasbootfs + merulo: + address: 140.211.166.46 + parents: gw-osuosl + hostgroups: computers, porterbox, hasusrfs, wheezy parry: address: 140.211.15.153 parents: gw-osuosl @@ -748,7 +787,7 @@ servers: ravel: address: 206.12.19.5 parents: sw-ubcece-kais - hostgroups: computers, general, dl385, apache2-hosts, acpid-hosts, hasbootfs, nfs-client, rsyncd-hosts, bind9-hosts, uploadqueue, hasorgfs, xinetd-hosts, wheezy, autofs + hostgroups: computers, general, dl385, apache2-hosts, acpid-hosts, hasbootfs, nfs-client, hasorgfs, wheezy, autofs dijkstra: address: 206.12.19.218 parents: sw-ubcece-kais @@ -797,10 +836,6 @@ servers: address: 206.12.19.13 parents: sw-ubcece-kais hostgroups: computers, hashomefs, sw-raid, rsyncd-hosts, apache2-hosts, xinetd-hosts, service, nfs-server, squeeze, hassrvfs - respighi: - address: 206.12.19.11 - parents: sw-ubcece-kais - hostgroups: computers, hasbootfs, aacraid, hassrvfs, service, apache2-hosts, squeeze # MSA 2000 (2012i) giustini: address: 192.168.2.6 @@ -945,6 +980,16 @@ servers: parents: gw-umn hostgroups: computers, service, apache2-hosts, rsyncd-hosts, dl380, hasvarfs, hasusrfs, hasorgfs, xinetd-hosts, wheezy, security_mirror, no-bacula # }}} + # {{{ gw-unicamp + asgard: + address: 143.106.167.145 + parents: gw-unicamp + hostgroups: layer3-infrastructure + powerpc-unicamp-01: + address: 143.106.167.149 + parents: asgard + hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy + # }}} # {{{ gw-utwente klecker: address: 130.89.148.10 @@ -967,11 +1012,11 @@ servers: howells: address: 144.32.168.75 parents: gw-ynic - hostgroups: computers, hasbootfs, hassrvfs, armhf, wheezy, deadslow, buildd + hostgroups: computers, hasbootfs, hassrvfs, armhf, wheezy, buildd hummel: address: 144.32.168.76 parents: gw-ynic - hostgroups: computers, hasbootfs, hassrvfs, armhf, wheezy, deadslow, buildd + hostgroups: computers, hasbootfs, hassrvfs, armhf, wheezy, buildd porpora: address: 144.32.168.78 parents: gw-ynic @@ -993,13 +1038,7 @@ servers: # }}} # }}} - -############################# -# host groups -# -# hostgroups ircd and all are automatically defined -# -############################# +# {{{ ############################# host groups ############################# hostgroups: computers: alias: computers @@ -1015,8 +1054,6 @@ hostgroups: notacomputer: alias: Systems that are not really systems. Yeah :) private: 1 - deadslow: - alias: Systems too slow to run any real checks freebsd: alias: freebsd private: 1 @@ -1078,9 +1115,6 @@ hostgroups: aacraid: alias: Hosts with Adaptec AACraid private: 1 -# megactl: -# alias: Hosts with LSI Logic MegaRAID, but not usable with megaraid check -# private: 1 megaraid: alias: Hosts with LSI Logic MegaRAID private: 1 @@ -1120,31 +1154,19 @@ hostgroups: jetty-hosts: alias: hosts running jetty private: 1 - #varnish-hosts: - # alias: hosts running varnish - # private: 1 bind9-hosts: alias: hosts running bind9 private: 1 # once every host runs unbound, do away with this group and check "computers" instead - unbound-hosts: - alias: hosts running unbound - private: 1 amavis-hosts: alias: hosts running amavis private: 1 - #tftpd-hosts: - # alias: hosts running a tftpd (tftpd-hpa as a daemon) - # private: 1 rsyncd-hosts: alias: hosts providing rsync services via xinetd private: 1 xinetd-hosts: alias: hosts providing services via xinetd private: 1 - #postgres81-hosts: - # alias: hosts running postgres81 - # private: 1 postgres91-hosts: alias: hosts running postgres91 private: 1 @@ -1210,9 +1232,6 @@ hostgroups: hasusrfs: alias: hosts with a /usr filesystem private: 1 -# hasvicepa: -# alias: hosts with a /vicepa -# private: 1 incomingmailrelayed: alias: incoming mail needs to go through a mail relay @@ -1245,9 +1264,12 @@ hostgroups: security_mirror: alias: hosts that are security mirrors private: 1 -############################# -# servicegroups -############################# + + broken_mq: + alias: hosts whose MQ is broken + private: 1 +# }}} +# {{{ ############################# servicegroups ############################# servicegroups: diskspace: alias: diskusage checks @@ -1268,11 +1290,12 @@ servicegroups: security: alias: security servicegroup_members: apt, kernel, samhain - -############################# -# services -############################# + MQ: + alias: rabbitMQ stuff +# }}} +# {{{ ############################# services ############################# services: + # {{{ ### basic networking - name: PING check: "check_ping!350.0,20%!600.0,40%" @@ -1302,13 +1325,8 @@ services: normal_check_interval: 5 max_check_attempts: 4 retry_check_interval: 1 - - ############ Services ############ - ### - - ############ Disk Usage ############ - #### - + # }}} + # {{{ ### disk usage - name: disk usage - all servicegroups: diskspace @@ -1376,7 +1394,7 @@ services: name: disk usage on /var/lib/postgresql servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /var/lib/postgresql" - hosts: sibelius, busoni, buxtehude + hosts: sibelius, busoni, buxtehude, lw07 - name: disk usage on /var/log servicegroups: diskspace @@ -1437,17 +1455,100 @@ services: servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk 85 95 /srv/bacula" hosts: beethoven - - ############ All Computers ############ - #### - - name: apt - security updates - servicegroups: apt - nrpe: "/usr/lib/nagios/plugins/dsa-check-statusfile /var/cache/dsa/nagios/apt" + name: disk usage on /storage/snapshot-farm-1 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 98 92 /storage/snapshot-farm-1" + hosts: lw01 + - + name: disk usage on /storage/snapshot-farm-2 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 98 92 /storage/snapshot-farm-2" + hosts: lw02 + - + name: disk usage on /storage/snapshot-farm-3 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 98 92 /storage/snapshot-farm-3" + hosts: lw03 + - + name: disk usage on /storage/snapshot-farm-4 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 98 92 /storage/snapshot-farm-4" + hosts: lw04 + - + name: disk usage on /srv/morgue.debian.org/ + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 95 90 /srv/morgue.debian.org" + hosts: lw03 + # }}} + # {{{ ### system + # {{{ setup + - + name: setup - dsa config + nrpe: "/usr/lib/nagios/plugins/dsa-check-config" hostgroups: computers + normal_check_interval: 60 + excludehostgroups: alioth + - + name: setup - local hostname etc-hosts + nrpe: 'if getent ahosts `hostname` | grep -q 127.0; then echo "Warning: local hostname resolves to 127/8 address"; exit 1; else echo "OK: Hostname resolves to non-127/8 address."; exit 0; fi' + hostgroups: computers + normal_check_interval: 60 + # }}} + # {{{ os health + #### + - + name: users + nrpe: "/usr/lib/nagios/plugins/check_users 30 35" + hostgroups: computers + - + name: load + nrpe: "/usr/lib/nagios/plugins/check_load -w 30,28,26 -c 50,45,50" + hostgroups: computers + excludehostgroups: highload + - + name: load + nrpe: "/usr/lib/nagios/plugins/check_load -w 140,120,100 -c 240,220,200" + hostgroups: highload + - + name: uptime check + nrpe: "/usr/lib/nagios/plugins/dsa-check-uptime" + hostgroups: computers + - + name: processes - total + nrpe: "/usr/lib/nagios/plugins/check_procs 620 700" + hostgroups: computers + - + name: swap usage - percent + nrpe: "/usr/lib/nagios/plugins/check_swap -w 20% -c 10%" + hostgroups: computers + - + name: swap usage - mb + nrpe: "/usr/lib/nagios/plugins/check_swap -w 20000 -c 5000" + hostgroups: computers + - + name: process - getty + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C getty -a /sbin/getty" + hostgroups: computers + excludehosts: zelenka, zandonai + - + name: processes - zombies + nrpe: "/usr/lib/nagios/plugins/check_procs 5 10 -s Z" + hostgroups: computers + - + name: system - available entropy + nrpe: "/usr/lib/nagios/plugins/dsa-check-entropy" + event_handler: dsa_event_handler_restart_ekey + hostgroups: computers + excludehostgroups: freebsd + - + name: system - filesystem check + nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-filesystems" normal_check_interval: 60 retry_check_interval: 15 - #### + hostgroups: computers + # }}} + # {{{ backup - name: backup servicegroups: backup @@ -1495,7 +1596,13 @@ services: nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C bacula-fd -a '/usr/sbin/bacula-fd -c /etc/bacula/bacula-fd.conf'" hostgroups: freebsd - #### + #### + - + name: process - acc.umu.se backup + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -a 'dsmc'" + hosts: sibelius + # }}} + # {{{ security - name: running kernel servicegroups: kernel @@ -1503,43 +1610,13 @@ services: hostgroups: computers normal_check_interval: 60 retry_check_interval: 5 - - #### -# - name: afs - space -# hostgroups: bosserver -# nrpe: "/usr/lib/nagios/plugins/dsa-check-afs-space -H localhost" -# normal_check_interval: 60 -# max_check_attempts: 2 -# retry_check_interval: 5 -# -# - name: afs - bos -# hostgroups: bosserver -# nrpe: "/usr/lib/nagios/plugins/dsa-check-afs-bos -H localhost" -# normal_check_interval: 60 -# max_check_attempts: 2 -# retry_check_interval: 5 -# -# - name: afs - waiting connections -# hostgroups: bosserver -# nrpe: "/usr/lib/nagios/plugins/dsa-check-afs-rxdebug -H localhost" -# normal_check_interval: 60 -# max_check_attempts: 2 -# retry_check_interval: 5 -# - #- name: afs - udebug - # hostgroups: bosserver - # nrpe: "/usr/lib/nagios/plugins/dsa-check-afs-udebug -H localhost" - # normal_check_interval: 60 - # max_check_attempts: 2 - # retry_check_interval: 5 - - #### - #- - # name: puppet - # nrpe: "/usr/lib/nagios/plugins/dsa-check-file_age -i 540 -f /var/lib/puppet/state/state.yaml" - # hostgroups: computers - - #### + - + name: apt - security updates + servicegroups: apt + nrpe: "/usr/lib/nagios/plugins/dsa-check-statusfile /var/cache/dsa/nagios/apt" + hostgroups: computers + normal_check_interval: 60 + retry_check_interval: 15 - name: installed firewall nrpe: "/usr/lib/nagios/plugins/dsa-check-file -w -f /etc/ferm/ferm.conf" @@ -1550,11 +1627,16 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-file -w -f /etc/ferm/conf.d/defs.conf" hostgroups: computers excludehostgroups: freebsd - #### - - name: ganeti - job watcher paused - nrpe: "/usr/lib/nagios/plugins/negate /usr/lib/nagios/plugins/dsa-check-file -f /var/lib/ganeti/watcher.pause" + - + name: process - ulogd + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C ulogd -a '/usr/sbin/ulogd -d'" hostgroups: computers - #### + excludehostgroups: freebsd, sparc + - + name: unexpected process - ulogd + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C ulogd" + hostgroups: freebsd, sparc + #### - name: process - samhain nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C samhain -a '/usr/sbin/samhain'" @@ -1569,64 +1651,39 @@ services: normal_check_interval: 60 retry_check_interval: 5 excludehostgroups: brokensamhain - #### - - - name: process - acc.umu.se backup - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -a 'dsmc'" - hosts: sibelius - #### - - - name: users - nrpe: "/usr/lib/nagios/plugins/check_users 30 35" - hostgroups: computers - #### - - - name: load - nrpe: "/usr/lib/nagios/plugins/check_load -w 30,28,26 -c 50,45,50" - hostgroups: computers - excludehostgroups: highload - - - name: load - nrpe: "/usr/lib/nagios/plugins/check_load -w 140,120,100 -c 240,220,200" - hostgroups: highload - #### - - - name: uptime check - nrpe: "/usr/lib/nagios/plugins/dsa-check-uptime" - hostgroups: computers - #### - name: processes - samhain zombies nrpe: "/usr/lib/nagios/plugins/check_procs 3 6 -s Z -u root -a samhain" event_handler: dsa_event_handler_restart_samhain hostgroups: computers excludehostgroups: brokensamhain + # }}} + # {{{ logging - - name: processes - zombies - nrpe: "/usr/lib/nagios/plugins/check_procs 5 10 -s Z" - hostgroups: computers - #### - - - name: processes - total - nrpe: "/usr/lib/nagios/plugins/check_procs 620 700" + name: process - syslog-ng + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C syslog-ng -a '/sbin/syslog-ng -p /var/run/syslog-ng.pid'" hostgroups: computers - #### + excludehostgroups: freebsd + - - name: swap usage - percent - nrpe: "/usr/lib/nagios/plugins/check_swap -w 20% -c 10%" - hostgroups: computers - #### + name: process - syslog-ng + nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:2 -c 2: -u root -C syslog-ng -a '/sbin/syslog-ng -p /var/run/syslog-ng.pid'" + hostgroups: freebsd + - - name: swap usage - mb - nrpe: "/usr/lib/nagios/plugins/check_swap -w 20000 -c 5000" + name: remote logging on lotti + remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$" + runfrom: lotti hostgroups: computers - #### + excludehostgroups: alioth - - name: process - getty - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C getty -a /sbin/getty" + name: remote logging on lully + remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$" + runfrom: lully hostgroups: computers - excludehosts: zelenka, zandonai - #### + excludehostgroups: alioth + # }}} + # {{{ base service - name: process - sshd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:20 -c 1: -u root -C sshd -a '/usr/sbin/sshd'" @@ -1638,7 +1695,7 @@ services: depends: process - sshd normal_check_interval: 60 notification_interval: 1440 - #### + #### - name: network service - nrpe check: check_tcp!5666 @@ -1651,7 +1708,7 @@ services: hostgroups: computers max_check_attempts: -1 depends: network service - nrpe - ### + ### - name: process - munin-node nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C munin-node -a '/usr/sbin/munin-node'" @@ -1671,7 +1728,7 @@ services: check: check_tcp!4949 hostgroups: computers depends: process - munin-node - ### + ### - name: process - ntpd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -p 1 -C ntpd -a '/usr/sbin/ntpd -p /var/run/ntpd.pid'" @@ -1683,396 +1740,156 @@ services: hostgroups: computers depends: process - ntpd excludehosts: ancina - excludehostgroups: deadslow servicegroups: time - # + ### - name: network service - time check: dsa_check_time hosts: ancina depends: process - xinetd servicegroups: time - - ### + ### - name: process - atd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u daemon -C atd -a /usr/sbin/atd" hostgroups: computers - ### + ### - name: process - cron nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C cron -a /usr/sbin/cron" hostgroups: computers - - ### + ### - - name: process - syslog-ng - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C syslog-ng -a '/sbin/syslog-ng -p /var/run/syslog-ng.pid'" + name: process - ud-replicated + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C ud-replicated -a '/usr/bin/python /usr/bin/ud-replicated'" hostgroups: computers - excludehostgroups: freebsd - + excludehostgroups: squeeze, freebsd, alioth - - name: process - syslog-ng - nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:2 -c 2: -u root -C syslog-ng -a '/sbin/syslog-ng -p /var/run/syslog-ng.pid'" + name: process - ud-replicated + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C python2.7 -a '/usr/bin/python /usr/bin/ud-replicated'" hostgroups: freebsd - + ### - - name: remote logging on lotti - remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$" - runfrom: lotti - hostgroups: computers - excludehostgroups: alioth + name: process - monit + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C monit -a '/usr/sbin/monit -d 300 -I -c /etc/monit/monitrc -s /var/lib/monit/monit.state'" + hostgroups: squeeze - - name: remote logging on lully - remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$" - runfrom: lully + name: process - monit + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C monit -a '/usr/bin/monit -d 300 -I -c /etc/monit/monitrc -s /var/lib/monit/monit.state'" hostgroups: computers - excludehostgroups: alioth + excludehostgroups: squeeze, alioth + ### - name: MQ connection on rainier + servicegroups: MQ remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa" runfrom: rainier hostgroups: computers normal_check_interval: 60 retry_check_interval: 15 - excludehostgroups: alioth + excludehostgroups: alioth, broken_mq - name: MQ connection on rapoport + servicegroups: MQ remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa" runfrom: rapoport hostgroups: computers normal_check_interval: 60 retry_check_interval: 15 - excludehostgroups: alioth - ### MAIL STUFF - ### + excludehostgroups: alioth, broken_mq + ### - - name: process - exim - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u Debian-exim -C exim4 -a '/usr/sbin/exim4 -bd -q'" + name: local resolver + nrpe: "/usr/lib/nagios/plugins/dsa-check-resolver www.debian.org www.google.com" hostgroups: computers - excludehostgroups: postfix-hosts, mail-relay - excludehosts: master, busoni, quantz, buxtehude + normal_check_interval: 60 - - name: process - exim - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:25 -c 1: -u Debian-exim -C exim4 -a '/usr/sbin/exim4 -bd -q'" - hostgroups: mail-relay + name: process - unbound + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u unbound -C unbound -a '/usr/sbin/unbound'" + hostgroups: computers + excludehostgroups: alioth + ### - - name: process - exim - total - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:50 -c 1: -C exim4" + name: process - uptimed + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u daemon -C uptimed -a '/usr/sbin/uptimed'" hostgroups: computers - excludehostgroups: postfix-hosts - excludehosts: master, busoni, quantz, buxtehude + ### - - name: process - exim - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:300 -c 1:500 -C exim4 -a '/usr/sbin/exim4'" - hosts: master, busoni, quantz, buxtehude - - - name: mail queue - nrpe: "/usr/lib/nagios/plugins/check_mailq -M exim -w 1000 -c 2000" - hostgroups: heavy-exim - ### - - - name: process - clamav - clamd - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:2 -c 1: -u clamav -C clamd -a '/usr/sbin/clamd'" - hostgroups: heavy-exim, heavy-postfix - - - name: service - clamav - nrpe: "/usr/lib/nagios/plugins/check_clamd -H /var/run/clamav/clamd.ctl" - hostgroups: heavy-exim, heavy-postfix - depends: process - clamav - clamd - - - name: process - clamav - freshclam - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u clamav -C freshclam -a '/usr/bin/freshclam -d --quiet'" - hostgroups: heavy-exim, heavy-postfix - - - name: unwanted process - clamav - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C clamd" - hostgroups: computers - excludehostgroups: heavy-exim, heavy-postfix, deadslow - - - name: unwanted process - freshclam - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C freshclam" + name: process - udevd + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -p 1 -C udevd -a 'udevd'" hostgroups: computers - excludehostgroups: heavy-exim, heavy-postfix, deadslow - ### - - - name: process - spamd - master - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd --create-prefs --max-children 5 --helper-home-dir -d --pidfile=/var/run/spamd.pid'" - hostgroups: spamd - excludehosts: picconi - excludehostgroups: deadslow - - - name: process - spamd - master - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd --create-prefs --max-children 20 --min-spare=5 --helper-home-dir -d --pidfile=/var/run/spamd.pid'" - hosts: picconi - - - name: process - spamd - master - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd --create-prefs --max-children 10 --helper-home-dir -d --pidfile=/var/run/spamd.pid'" - hosts: bendel - - - name: process - spamd - child - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:11 -c 1: -C spamd -a 'spamd child'" - hosts: bendel - hostgroups: spamd - depends: process - spamd - master - # - - - name: process - spamd - master - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u debbugs -C spamd -a '/usr/sbin/spamd -d '" - hosts: buxtehude + excludehostgroups: freebsd - - name: process - spamd - child - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -u debbugs -C spamd -a 'spamd child'" - hosts: buxtehude - # + name: unexpected process - udev + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C udevd" + hostgroups: freebsd + ### - - name: unwanted process - spamd - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C spamd" - hostgroups: computers - excludehostgroups: spamd, deadslow - excludehosts: bendel, busoni, buxtehude - - ### - #- - # name: process - greylistd - # nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u greylist -C greylistd -a '/usr/bin/python /usr/sbin/greylistd'" - # hostgroups: heavy-exim - # + name: process - acpid + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C acpid -a '/usr/sbin/acpid'" + hostgroups: acpid-hosts - - name: unwanted process - greylistd - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C greylistd" + name: unexpected process - acpid + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C acpid" hostgroups: computers - excludehostgroups: deadslow - - ### - - - name: process - postgrey - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgrey -a '/usr/sbin/postgrey --pidfile=/var/run/postgrey.pid --daemonize --unix=/var/run/postgrey/socket --retry-window=4 --auto-whitelist-clients=10 --exim'" - hostgroups: heavy-exim + excludehostgroups: acpid-hosts, kvmdomains + ### - - name: process - postgrey - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgrey -a '/usr/sbin/postgrey --pidfile=/var/run/postgrey.pid --daemonize --inet=127.0.0.1:60000'" - hostgroups: heavy-postfix - # + name: process - xinetd + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C xinetd -a '/usr/sbin/xinetd -pidfile /var/run/xinetd.pid -stayalive'" + hostgroups: xinetd-hosts - - name: unwanted process - postgrey - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C postgrey" + name: unwanted process - xinetd + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C xinetd" hostgroups: computers - excludehostgroups: heavy-postfix, heavy-exim, deadslow - ### - - - name: process - amavis - master - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u amavis -a 'amavisd-new (master)'" - hostgroups: amavis-hosts + excludehostgroups: xinetd-hosts + ### - - name: process - amavis - all - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1:10 -u amavis -a 'amavisd-new '" - hostgroups: amavis-hosts - depends: process - amavis - master - # + name: process - stunnel4 - puppet-ekeyd + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -u stunnel4 -C stunnel4 -a '/usr/bin/stunnel4 /etc/stunnel/puppet-ekeyd.conf'" + hostgroups: squeeze, wheezy + excludehostgroups: freebsd, alioth + # }}} + # {{{ anti-services - - name: unwanted process - amavis - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C amavisd-new" + name: ganeti - job watcher paused + nrpe: "/usr/lib/nagios/plugins/negate /usr/lib/nagios/plugins/dsa-check-file -f /var/lib/ganeti/watcher.pause" hostgroups: computers - excludehostgroups: amavis-hosts, deadslow - ### - - name: process - weightd - master - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (master)'" - hostgroups: heavy-postfix, alioth - - - name: process - weightd - cache - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (cache)'" - hostgroups: heavy-postfix, alioth - depends: process - weightd - master - - - name: process - weightd - child - nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:50 -c 1: -u polw -a 'policyd-weight (child)'" - hostgroups: heavy-postfix, alioth - depends: process - weightd - master - ### - - - name: unwanted process - policyd-weight - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C policyd-weight" + name: unwanted process - irqbalance + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C irqbalance" hostgroups: computers - excludehostgroups: heavy-postfix, deadslow, alioth - - - ### - - - name: process - postfix - master - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C master -a '/usr/lib/postfix/master'" - hostgroups: postfix-hosts - - - name: process - postfix - qmgr - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postfix -C qmgr -a 'qmgr -l -t fifo -u'" - hostgroups: postfix-hosts - depends: process - postfix - master - #- - # name: process - postfix - tlsmgr - # nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postfix -C tlsmgr -a 'tlsmgr -l -t fifo -u'" - # hostgroups: postfix-hosts - # depends: process - postfix - master - - - name: process - postfix - pickup - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postfix -C pickup -a 'pickup -l -t fifo -u -c'" - hostgroups: postfix-hosts - depends: process - postfix - master - - - name: process - postfix - anvil - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:1 -c 0: -u postfix -C anvil -a 'anvil -l -t unix -u'" - hostgroups: postfix-hosts - depends: process - postfix - master - - - - name: process - postfix - trivial-rewrite - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:10 -c 0:15 -u postfix -C trivial-rewrite -a 'trivial-rewrite -n rewrite -t unix -u -c'" - hostgroups: postfix-hosts - depends: process - postfix - master - - - name: process - postfix - proxymap - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:10 -c 0:15 -u postfix -C proxymap -a 'proxymap -t unix -u'" - hostgroups: postfix-hosts - depends: process - postfix - master - - - name: process - postfix - cleanup - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:30 -c 0:50 -u postfix -C cleanup -a 'cleanup -z -t unix -u -c'" - hostgroups: postfix-hosts - depends: process - postfix - master - - - name: process - postfix - local - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:30 -c 0:50 -u postfix -C local -a 'local -t unix'" - hostgroups: postfix-hosts - depends: process - postfix - master - name: unwanted process - openvpn nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C openvpn" hostgroups: computers normal_check_interval: 120 - - - ### - - - name: network service - smtp - check: dsa_check_smtp - hostgroups: computers - excludehostgroups: postfix-hosts, incomingmailrelayed, incomingmailrelayed2025 - depends: process - exim - - - - name: network service - smtp - check: dsa_check_smtp - hostgroups: postfix-hosts - depends: process - postfix - master - - - name: network service - submission - check: dsa_check_smtp_port!587 - hostgroups: incomingmailrelayed - depends: process - exim - - - name: network service - smtp 2025 - check: dsa_check_smtp_port!2025 - hostgroups: incomingmailrelayed2025 - depends: process - exim - - - name: network service local - smtps cert - nrpe: "/usr/lib/nagios/plugins/check_http -H localhost -p 465 -S -C 14 -t 45" - hostgroups: postfix-hosts - depends: process - postfix - master - normal_check_interval: 120 - - - - name: setup - dsa config - nrpe: "/usr/lib/nagios/plugins/dsa-check-config" - hostgroups: computers - normal_check_interval: 60 - excludehostgroups: alioth - - - name: setup - local hostname etc-hosts - nrpe: 'if getent ahosts `hostname` | grep -q 127.0; then echo "Warning: local hostname resolves to 127/8 address"; exit 1; else echo "OK: Hostname resolves to non-127/8 address."; exit 0; fi' - hostgroups: computers - normal_check_interval: 60 - - - name: system - available entropy - nrpe: "/usr/lib/nagios/plugins/dsa-check-entropy" - event_handler: dsa_event_handler_restart_ekey - hostgroups: computers - excludehostgroups: freebsd - - - name: system - filesystem check - nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-filesystems" - normal_check_interval: 60 - retry_check_interval: 15 - hostgroups: computers - ### - - - name: local resolver - nrpe: "/usr/lib/nagios/plugins/dsa-check-resolver www.debian.org www.google.com" - hostgroups: computers - normal_check_interval: 60 - - - name: process - unbound - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u unbound -C unbound -a '/usr/sbin/unbound'" - hostgroups: unbound-hosts, squeeze, wheezy - excludehostgroups: alioth - ### - - - name: process - uptimed - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u daemon -C uptimed -a '/usr/sbin/uptimed'" - hostgroups: computers - ### - - - name: unwanted process - irqbalance - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C irqbalance" - hostgroups: computers - excludehostgroups: deadslow - - #### - ### - #- - # name: unwanted process - system-tools-backends - # nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C system-tools-ba" - # hostgroups: computers - #- - # name: unwanted process - dbus-daemon - # nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C dbus-daemon" - # hostgroups: computers - name: unwanted process - gkrellmd nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C gkrellmd" hostgroups: computers - excludehostgroups: deadslow - name: unwanted process - rpc.statd nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C rpc.statd" hostgroups: computers - excludehostgroups: nfs-client, nfs-server, deadslow + excludehostgroups: nfs-client, nfs-server - name: unwanted process - inetd nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C inetd" hostgroups: computers - excludehosts: abel, alwyn - excludehostgroups: deadslow + excludehosts: abel - name: unwanted process - snmpd nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C snmpd" hostgroups: computers - excludehostgroups: deadslow - - #### + # }}} + # {{{ ssl certs - name: "host SSL cert" nrpe: "if [ -e /etc/ssl/certs/thishost.pem ]; then /usr/lib/nagios/plugins/dsa-check-cert-expire /etc/ssl/certs/thishost.pem; else echo 'No thishost.pem on this host.'; fi" hostgroups: computers - - ############ Processes/Services that only run on some computers ############ - #### - ### - - - name: process - rngd - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C rngd -a '/usr/sbin/rngd -r /dev/hwrng'" - hostgroups: dl385 - ### + # }}} + # {{{ HW health/raid - name: process - mdadm monitor servicegroups: raid @@ -2091,26 +1908,7 @@ services: servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-raid-sw" hostgroups: sw-raid - - ### - - - name: process - ud-replicated - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C ud-replicated -a '/usr/bin/python /usr/bin/ud-replicated'" - hostgroups: computers - excludehostgroups: squeeze, freebsd, alioth - - - name: process - ud-replicated - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C python2.7 -a '/usr/bin/python /usr/bin/ud-replicated'" - hostgroups: freebsd - - - name: process - monit - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C monit -a '/usr/sbin/monit -d 300 -I -c /etc/monit/monitrc -s /var/lib/monit/monit.state'" - hostgroups: squeeze - - - name: process - monit - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C monit -a '/usr/bin/monit -d 300 -I -c /etc/monit/monitrc -s /var/lib/monit/monit.state'" - hostgroups: computers - excludehostgroups: squeeze, alioth + ### - name: HW - hpacucli status servicegroups: raid @@ -2142,12 +1940,11 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli --ignore-transfer-speed=1I:1:1 --ignore-transfer-speed=1I:1:2" normal_check_interval: 120 hostgroups: dl585 - ### - - - name: HW - edac status - nrpe: "/usr/lib/nagios/plugins/dsa-check-edac" - normal_check_interval: 120 - hosts: lw05, lw06 + ### +# - +# name: HW - edac status +# nrpe: "/usr/lib/nagios/plugins/dsa-check-edac" +# normal_check_interval: 120 #hostgroups: computers #excludehosts: villa, lobos, senfl, schein - @@ -2155,160 +1952,283 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm" normal_check_interval: 120 hostgroups: dl385, dl380, dl360, bl460, dl585, bm-bl - excludehosts: villa, lobos, senfl, schein + excludehosts: villa, lobos, senfl, schein, storace - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --ps-no-redundant" normal_check_interval: 120 - hosts: villa, lobos + hosts: villa + - + name: HW - hpasmcli status + nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --ps-no-redundant --ignore-failed='PS2'" + normal_check_interval: 120 + hosts: lobos - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-no-redundant" normal_check_interval: 120 hosts: senfl, schein - ### + - + name: HW - hpasmcli status + nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-ignore-not-present" + normal_check_interval: 120 + hosts: storace + ### - name: RAID - 3ware servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-raid-3ware" hosts: powell - ### + ### - name: RAID - aacraid servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-raid-aacraid" hostgroups: aacraid - ### + ### - name: RAID - megaraid servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-raid-megaraid" hostgroups: megaraid - ### - #- - # name: RAID - 3ware - # servicegroups: raid - # nrpe: "/usr/lib/nagios/plugins/dsa-check-raid-3ware" - # hosts: puccini - ### + ### - name: RAID - MPT servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-raid-mpt" hostgroups: mptraid - - ### -# - -# name: RAID - megactl -# servicegroups: raid -# nrpe: "/usr/lib/nagios/plugins/dsa-check-raid-megactl" -# hostgroups: megactl - ### + ### - name: RAID - DRBD servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-drbd -d All" hostgroups: drbd-hosts - ### + # }}} + # }}} + # {{{ ### mail stuff + # {{{ exim processes and mailq - - name: process - slapd - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:20 -c 1:50 -u openldap -C slapd -a '/usr/sbin/slapd -h ldap:/// ldaps:/// -g openldap -u openldap'" - hosts: draghi + name: process - exim + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u Debian-exim -C exim4 -a '/usr/sbin/exim4 -bd -q'" + hostgroups: computers + excludehostgroups: postfix-hosts, mail-relay + excludehosts: master, busoni, quantz, buxtehude - - name: network service - ldaps cert - check: dsa_check_cert!636 - depends: process - slapd - normal_check_interval: 60 - hosts: draghi - ### + name: process - exim + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:25 -c 1: -u Debian-exim -C exim4 -a '/usr/sbin/exim4 -bd -q'" + hostgroups: mail-relay - - name: process - ulogd - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C ulogd -a '/usr/sbin/ulogd -d'" + name: process - exim - total + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:50 -c 1: -C exim4" hostgroups: computers - excludehostgroups: freebsd, sparc + excludehostgroups: postfix-hosts + excludehosts: master, busoni, quantz, buxtehude - - name: unexpected process - ulogd - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C ulogd" - hostgroups: freebsd, sparc + name: process - exim + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:300 -c 1:500 -C exim4 -a '/usr/sbin/exim4'" + hosts: master, busoni, quantz, buxtehude + - + name: mail queue + nrpe: "/usr/lib/nagios/plugins/check_mailq -M exim -w 1000 -c 2000" + hostgroups: heavy-exim + # }}} + # {{{ clamav + - + name: process - clamav - clamd + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:2 -c 1: -u clamav -C clamd -a '/usr/sbin/clamd'" + hostgroups: heavy-exim, heavy-postfix + - + name: service - clamav + nrpe: "/usr/lib/nagios/plugins/check_clamd -H /var/run/clamav/clamd.ctl" + hostgroups: heavy-exim, heavy-postfix + depends: process - clamav - clamd + - + name: process - clamav - freshclam + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u clamav -C freshclam -a '/usr/bin/freshclam -d --quiet'" + hostgroups: heavy-exim, heavy-postfix + - + name: unwanted process - clamav + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C clamd" + hostgroups: computers + excludehostgroups: heavy-exim, heavy-postfix + - + name: unwanted process - freshclam + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C freshclam" + hostgroups: computers + excludehostgroups: heavy-exim, heavy-postfix + # }}} + # {{{ anti-spam + - + name: process - spamd - master + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd --create-prefs --max-children 5 --helper-home-dir -d --pidfile=/var/run/spamd.pid'" + hostgroups: spamd + excludehosts: picconi + - + name: process - spamd - master + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd --create-prefs --max-children 20 --min-spare=5 --helper-home-dir -d --pidfile=/var/run/spamd.pid'" + hosts: picconi + - + name: process - spamd - master + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd --create-prefs --max-children 10 --helper-home-dir -d --pidfile=/var/run/spamd.pid'" + hosts: bendel + - + name: process - spamd - child + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:11 -c 1: -C spamd -a 'spamd child'" + hosts: bendel + hostgroups: spamd + depends: process - spamd - master + # + - + name: process - spamd - master + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u debbugs -C spamd -a '/usr/sbin/spamd -d '" + hosts: buxtehude + - + name: process - spamd - child + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -u debbugs -C spamd -a 'spamd child'" + hosts: buxtehude + # + - + name: unwanted process - spamd + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C spamd" + hostgroups: computers + excludehostgroups: spamd + excludehosts: bendel, busoni, buxtehude - ### - - name: process - udevd - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -p 1 -C udevd -a 'udevd'" + name: unwanted process - greylistd + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C greylistd" hostgroups: computers - excludehostgroups: freebsd + + ### + - + name: process - postgrey + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgrey -a '/usr/sbin/postgrey --pidfile=/var/run/postgrey.pid --daemonize --unix=/var/run/postgrey/socket --retry-window=4 --auto-whitelist-clients=10 --exim'" + hostgroups: heavy-exim + - + name: process - postgrey + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgrey -a '/usr/sbin/postgrey --pidfile=/var/run/postgrey.pid --daemonize --inet=127.0.0.1:60000'" + hostgroups: heavy-postfix + # + - + name: unwanted process - postgrey + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C postgrey" + hostgroups: computers + excludehostgroups: heavy-postfix, heavy-exim + ### + - + name: process - amavis - master + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u amavis -a 'amavisd-new (master)'" + hostgroups: amavis-hosts + - + name: process - amavis - all + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1:10 -u amavis -a 'amavisd-new '" + hostgroups: amavis-hosts + depends: process - amavis - master + # + - + name: unwanted process - amavis + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C amavisd-new" + hostgroups: computers + excludehostgroups: amavis-hosts + ### + - + name: process - weightd - master + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (master)'" + hostgroups: heavy-postfix, alioth + - + name: process - weightd - cache + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (cache)'" + hostgroups: heavy-postfix, alioth + depends: process - weightd - master + - + name: process - weightd - child + nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:50 -c 1: -u polw -a 'policyd-weight (child)'" + hostgroups: heavy-postfix, alioth + depends: process - weightd - master + ### + - + name: unwanted process - policyd-weight + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C policyd-weight" + hostgroups: computers + excludehostgroups: heavy-postfix, alioth + # }}} + # {{{ postfix + ### + - + name: process - postfix - master + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C master -a '/usr/lib/postfix/master'" + hostgroups: postfix-hosts - - name: unexpected process - udev - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C udevd" - hostgroups: freebsd - ### + name: process - postfix - qmgr + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postfix -C qmgr -a 'qmgr -l -t fifo -u'" + hostgroups: postfix-hosts + depends: process - postfix - master - - name: process - acpid - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C acpid -a '/usr/sbin/acpid'" - hostgroups: acpid-hosts + name: process - postfix - pickup + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postfix -C pickup -a 'pickup -l -t fifo -u -c'" + hostgroups: postfix-hosts + depends: process - postfix - master - - name: unexpected process - acpid - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C acpid" - hostgroups: computers - excludehostgroups: acpid-hosts, kvmdomains - ### -# - -# name: process - bosserver -# nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C bosserver -a '/usr/sbin/bosserver'" -# hostgroups: bosserver -# - ### + name: process - postfix - anvil + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:1 -c 0: -u postfix -C anvil -a 'anvil -l -t unix -u'" + hostgroups: postfix-hosts + depends: process - postfix - master + - - name: process - xinetd - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C xinetd -a '/usr/sbin/xinetd -pidfile /var/run/xinetd.pid -stayalive'" - hostgroups: xinetd-hosts + name: process - postfix - trivial-rewrite + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:10 -c 0:15 -u postfix -C trivial-rewrite -a 'trivial-rewrite -n rewrite -t unix -u -c'" + hostgroups: postfix-hosts + depends: process - postfix - master - - name: unwanted process - xinetd - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C xinetd" - hostgroups: computers - excludehostgroups: xinetd-hosts, deadslow - ### + name: process - postfix - proxymap + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:10 -c 0:15 -u postfix -C proxymap -a 'proxymap -t unix -u'" + hostgroups: postfix-hosts + depends: process - postfix - master - - name: network service - finger - check: check_tcp!79 - hosts: draghi - depends: process - xinetd - ### + name: process - postfix - cleanup + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:30 -c 0:50 -u postfix -C cleanup -a 'cleanup -z -t unix -u -c'" + hostgroups: postfix-hosts + depends: process - postfix - master - - name: network service - rsync - check: check_tcp!873 - hostgroups: rsyncd-hosts - depends: process - xinetd - excludehosts: rietz + name: process - postfix - local + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:30 -c 0:50 -u postfix -C local -a 'local -t unix'" + hostgroups: postfix-hosts + depends: process - postfix - master + - - name: network service - rsync - check: check_tcp!873 - hosts: rietz2 - depends: rietz:process - xinetd + name: network service local - smtps cert + nrpe: "/usr/lib/nagios/plugins/check_http -H localhost -p 465 -S -C 14 -t 45" + hostgroups: postfix-hosts + depends: process - postfix - master + normal_check_interval: 120 + # }}} + # {{{ mail - network service - - name: network service - rsync - check: check_tcp!873 - hosts: milanollo2 - depends: milanollo:process - xinetd + name: network service - smtp + check: dsa_check_smtp + hostgroups: computers + excludehostgroups: postfix-hosts, incomingmailrelayed, incomingmailrelayed2025 + depends: process - exim - ### - - name: process - icinga - # there is always one extra process per check currently running.. - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:30 -c 1: -u nagios -C icinga -a '/usr/sbin/icinga -d /etc/icinga/icinga.cfg'" - hosts: tchaikovsky - ### + name: network service - smtp + check: dsa_check_smtp + hostgroups: postfix-hosts + depends: process - postfix - master - - name: process - jetty - master - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -a 'jsvc.exec'" - hostgroups: jetty-hosts + name: network service - submission + check: dsa_check_smtp_port!587 + hostgroups: incomingmailrelayed + depends: process - exim - - name: process - jetty - worker - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:50 -c 1:100 -u jetty -a 'jsvc.exec -user jetty'" - hostgroups: jetty-hosts - depends: process - jetty - master - - ### + name: network service - smtp 2025 + check: dsa_check_smtp_port!2025 + hostgroups: incomingmailrelayed2025 + depends: process - exim + # }}} + # }}} + # {{{ ### host specific services + # {{{ HTTP - name: process - apache2 - master nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -a /usr/sbin/apache2" @@ -2322,7 +2242,7 @@ services: name: unwanted process - apache2 nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C apache2" hostgroups: computers - excludehostgroups: apache2-hosts, deadslow + excludehostgroups: apache2-hosts - name: network service - http @@ -2374,46 +2294,12 @@ services: name: unwanted network service - https check: dsa_check_port_closed!443 hostgroups: apache2-hosts - excludehostgroups: apache-https, deadslow + excludehostgroups: apache-https # ravel does ssh on port 443 excludehosts: ravel normal_check_interval: 60 - -### - #- - # name: process - varnish - master - # nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C varnishd -a '/usr/sbin/varnishd -P /var/run/varnishd.pid -a :80 -T localhost:6082 -f /etc/varnish/default.vcl'" - # hostgroups: varnish-hosts - #- - # name: process - varnish - worker - # nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u nobody -C varnishd -a '/usr/sbin/varnishd -P /var/run/varnishd.pid -a :80 -T localhost:6082 -f /etc/varnish/default.vcl'" - # hostgroups: varnish-hosts - # depends: process - varnish - master - #- - # name: network service - http - # check: check_http - # hostgroups: varnish-hosts - # depends: process - varnish - master - -### - - #### - - - name: process - named - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:15 -c 1: -u bind -C named -a '/usr/sbin/named -u bind'" - hostgroups: bind9-hosts - - - name: network service - dns - check: check_dns - hostgroups: bind9-hosts - depends: process - named - - - name: unwanted process - named - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C named" - hostgroups: computers - excludehostgroups: bind9-hosts, deadslow - - #### + # }}} + # {{{ FTP - name: network service - ftp check: check_ftp @@ -2424,43 +2310,13 @@ services: check: check_ftp hosts: klecker-ftp depends: klecker:process - xinetd - - #### - #- - # name: process - debianqueued - # nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u dak -C debianqueued" - - - name: process - debianqueued - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u dak-unpriv -C debianqueued" - hostgroups: uploadqueue - - - ### - #- - # name: process - postgresql81 - master - # nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgres -C postmaster -a '/usr/lib/postgresql/8.1/bin/postmaster -D /var/lib/postgresql/8.1/main -c config_file=/etc/postgresql/8.1/main/postgresql.conf'" - # hostgroups: postgres81-hosts - #- - # name: process - postgresql81 - writer - # nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgres -C postmaster -a 'postgres: writer process'" - # hostgroups: postgres81-hosts - # depends: process - postgresql81 - master - #- - # name: process - postgresql81 - buffer - # nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgres -C postmaster -a 'postgres: stats buffer process'" - # hostgroups: postgres81-hosts - # depends: process - postgresql81 - master - #- - # name: process - postgresql81 - collector - # nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgres -C postmaster -a 'postgres: stats collector process'" - # hostgroups: postgres81-hosts - # depends: process - postgresql81 - master - #### + # }}} + # {{{ postgres - name: unwanted process - postgresql nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C postgres" hostgroups: computers - excludehostgroups: postgres91-hosts, deadslow + excludehostgroups: postgres91-hosts - name: unwanted process - postgresql 9.0 nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C postgres -a '9.0/bin/postgres'" @@ -2472,39 +2328,33 @@ services: - name: postgresql backups nrpe: "/usr/bin/sudo -u debbackup /usr/lib/nagios/plugins/dsa-check-backuppg" - #hosts: beethoven hosts: backuphost - #### - #### - - - name: process - stunnel4 - puppet-ekeyd - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -u stunnel4 -C stunnel4 -a '/usr/bin/stunnel4 /etc/stunnel/puppet-ekeyd.conf'" - hostgroups: squeeze, wheezy - excludehostgroups: freebsd, alioth - #### - - - name: process - UPS - nut usbhid-ups - ups1 - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u nut -C usbhid-ups -a '/lib/nut/usbhid-ups -a ups1'" - hosts: franck - - - name: process - UPS - nut upsd - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u nut -C upsd -a '/sbin/upsd'" - hosts: franck - - - name: process - UPS - nut upsmon master - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C upsmon -a '/sbin/upsmon'" - hosts: franck - - - name: process - UPS - nut upsmon worker - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u nut -C upsmon -a '/sbin/upsmon'" - hosts: franck - depends: process - UPS - nut upsmon master - - - name: UPS - on line power - nrpe: "/usr/lib/nagios/plugins/dsa-check-ups" - hosts: franck - depends: process - UPS - nut upsd - ### + # }}} + # {{{ power +# - +# name: process - UPS - nut usbhid-ups - ups1 +# nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u nut -C usbhid-ups -a '/lib/nut/usbhid-ups -a ups1'" +# hosts: franck +# - +# name: process - UPS - nut upsd +# nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u nut -C upsd -a '/sbin/upsd'" +# hosts: franck +# - +# name: process - UPS - nut upsmon master +# nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C upsmon -a '/sbin/upsmon'" +# hosts: franck +# - +# name: process - UPS - nut upsmon worker +# nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u nut -C upsmon -a '/sbin/upsmon'" +# hosts: franck +# depends: process - UPS - nut upsmon master +# - +# name: UPS - on line power +# nrpe: "/usr/lib/nagios/plugins/dsa-check-ups" +# hosts: franck +# depends: process - UPS - nut upsd + # }}} + # {{{ buildd - name: process - buildd servicegroups: buildd @@ -2520,7 +2370,6 @@ services: contact_groups: buildd - name: processes - zombie schroot - #nrpe: "/usr/lib/nagios/plugins/check_procs -a schroot -s Zs -c 0" nrpe: "(/usr/lib/nagios/plugins/check_procs -a schroot -s Zs -c 0 > /dev/null || /usr/lib/nagios/plugins/check_procs -a schroot -s Zs -c 0) && /usr/lib/nagios/plugins/check_procs -a schroot -s ZNs -c 0" hostgroups: buildd contact_groups: +buildd @@ -2532,21 +2381,8 @@ services: nrpe: "/usr/lib/nagios/plugins/check_procs -m 'ELAPSED' -c 500 -C lvcreate -u root -a 'lvcreate'" hostgroups: buildd contact_groups: +buildd - #### - - - name: network service - gobby - check: check_tcp!6523 - hosts: gombert - contact_groups: gobby - #### - #- - # name: process - tftpd - # nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C in.tftpd -a '/usr/sbin/in.tftpd -l -B 1450 -s /var/lib/tftpboot'" - # hostgroups: tftpd-hosts - ### - - ############ NFS Stuff ############ - #### + # }}} + # {{{ NFS Stuff - name: process - statd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u statd -C rpc.statd -a '/sbin/rpc.statd'" @@ -2572,17 +2408,9 @@ services: name: unwanted process - automount nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C automount" hostgroups: computers - excludehostgroups: autofs, deadslow - - ############ MISC OTHER Stuff ############ - ##### - - - name: puppetmaster cert - nrpe: "/usr/lib/nagios/plugins/dsa-check-cert-expire /var/lib/puppet/ssl/certs/ca.pem" - hosts: handel - normal_check_interval: 60 - max_check_attempts: 2 - retry_check_interval: 5 + excludehostgroups: autofs + # }}} + # {{{ mirroring - name: mirror sync - bugs check: "dsa_check_mirrorsync_skew!bugs.debian.org!project/trace/bugs-master.debian.org!120:600" @@ -2605,6 +2433,23 @@ services: normal_check_interval: 15 max_check_attempts: 5 retry_check_interval: 5 + # }}} + # {{{ DNS + - + name: process - named + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:15 -c 1: -u bind -C named -a '/usr/sbin/named -u bind'" + hostgroups: bind9-hosts + - + name: network service - dns + check: check_dns + hostgroups: bind9-hosts + depends: process - named + - + name: unwanted process - named + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C named" + hostgroups: computers + excludehostgroups: bind9-hosts + ### - name: DNS SOA sync - debian.org check: "dsa_check_soas_add!denis.debian.org!debian.org" @@ -2617,17 +2462,13 @@ services: name: DNS SOA sync - debian.com check: "dsa_check_soas_add!denis.debian.org!debian.com" hosts: global - - - name: DNS SOA sync - mirror.debian.net - check: "dsa_check_soas_add!denis.debian.org!mirror.debian.net" - hosts: global - name: DNS SOA sync - 144-28.118.59.86.in-addr.arpa check: "dsa_check_soas_add!denis.debian.org!144-28.118.59.86.in-addr.arpa" hosts: global - name: DNS SOA sync - alioth.debian.org - check: "dsa_check_soas_add!alioth.debian.org!alioth.debian.org" + check: "dsa_check_soas_add!denis.debian.org!alioth.debian.org" hosts: global - name: DNS - delegation and signature expiry @@ -2649,8 +2490,8 @@ services: hosts: global remotecheck: "/usr/lib/nagios/plugins/dsa-check-statusfile /srv/dns.debian.org/var/nagios/ds" runfrom: denis - - ############ + # }}} + # {{{ storage - name: ping alive check remotecheck: "/usr/lib/nagios/plugins/check_ping -H $HOSTADDRESS$ -w 50,10% -c 200,30%" @@ -2666,21 +2507,19 @@ services: hosts: giustini - name: event log - remotecheck: "/usr/lib/nagios/plugins/dsa-check-msa-eventlog --start=8867 $HOSTADDRESS$ public" + remotecheck: "/usr/lib/nagios/plugins/dsa-check-msa-eventlog --start=9966 $HOSTADDRESS$ public" runfrom: dijkstra hosts: giustini - ############ + # }}} + # {{{ porterbox - name: current chroots nrpe: "/usr/lib/nagios/plugins/dsa-check-dchroots-current" hostgroups: porterbox normal_check_interval: 60 retry_check_interval: 15 - ############ -# - -# name: process - openstack - keystone -# nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u root -C keystone-all -a '/usr/bin/python /usr/bin/keystone-all'" -# hostgroups: openstack-controller + # }}} + # {{{ openstack - name: process - openstack - memcached nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u nobody -C memcached -a '/usr/bin/memcached -m 64 -p 11211 -u nobody -l 127.0.0.1'" @@ -2721,4 +2560,81 @@ services: name: process - openstack - neutron-server nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u neutron -C neutron-server -a '/usr/bin/python2.7 /usr/bin/neutron-server --config-file=/etc/neutron/neutron.conf --config-file /etc/neutron/plugins/openvswitch/ovs_neutron_plugin.ini --log-file=/var/log/neutron/neutron-server.log'" hostgroups: openstack-controller + # }}} + # {{{ misc + - + name: process - rngd + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C rngd -a '/usr/sbin/rngd -r /dev/hwrng'" + hostgroups: dl385 + ### + - + name: process - slapd + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:20 -c 1:50 -u openldap -C slapd -a '/usr/sbin/slapd -h ldap:/// ldaps:/// -g openldap -u openldap'" + hosts: draghi + - + name: network service - ldaps cert + check: dsa_check_cert!636 + depends: process - slapd + normal_check_interval: 60 + hosts: draghi + ### + - + name: network service - finger + check: check_tcp!79 + hosts: draghi + depends: process - xinetd + ### + - + name: network service - rsync + check: check_tcp!873 + hostgroups: rsyncd-hosts + depends: process - xinetd + excludehosts: rietz + - + name: network service - rsync + check: check_tcp!873 + hosts: rietz2 + depends: rietz:process - xinetd + - + name: network service - rsync + check: check_tcp!873 + hosts: milanollo2 + depends: milanollo:process - xinetd + ### + - + name: process - icinga + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:30 -c 1: -u nagios -C icinga -a '/usr/sbin/icinga -d /etc/icinga/icinga.cfg'" + hosts: tchaikovsky + ### + - + name: process - jetty - master + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -a 'jsvc.exec'" + hostgroups: jetty-hosts + - + name: process - jetty - worker + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:50 -c 1:100 -u jetty -a 'jsvc.exec -user jetty'" + hostgroups: jetty-hosts + depends: process - jetty - master + ### + - + name: process - debianqueued + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u dak-unpriv -C debianqueued" + hostgroups: uploadqueue + #### + - + name: network service - gobby + check: check_tcp!6523 + hosts: gombert + contact_groups: gobby + #### + - + name: puppetmaster cert + nrpe: "/usr/lib/nagios/plugins/dsa-check-cert-expire /var/lib/puppet/ssl/certs/ca.pem" + hosts: handel + normal_check_interval: 60 + max_check_attempts: 2 + retry_check_interval: 5 + # }}} +# }}} + # vim: set ts=2 sw=2 et ai si fdm=marker: