X-Git-Url: https://git.adam-barratt.org.uk/?a=blobdiff_plain;f=config%2Fnagios-master.cfg;h=9a2ba27caab25ad3bead154f0237b6fa03a246fe;hb=2230997e24a165f489edd89ae2f6ed760980b774;hp=c365cee9b0bb1490365e9b81254d2bd8b9c6d1a4;hpb=a18da6d58cdbd9b42a7733c0dbb8e8590e620428;p=mirror%2Fdsa-nagios.git diff --git a/config/nagios-master.cfg b/config/nagios-master.cfg index c365cee..9a2ba27 100644 --- a/config/nagios-master.cfg +++ b/config/nagios-master.cfg @@ -46,7 +46,7 @@ servers: parents: gw-ubcece hostgroups: layer3-infrastructure gw-cecsit: - address: 150.203.164.38 + address: 150.203.164.1 parents: gw-ubcece hostgroups: layer3-infrastructure gw-arm: @@ -105,6 +105,10 @@ servers: address: 82.195.75.126 parents: gw-ubcece hostgroups: layer3-infrastructure + gw-man-da2: + address: 82.195.78.116 + parents: gw-ubcece + hostgroups: layer3-infrastructure gw-marist: address: 148.100.88.1 parents: gw-ubcece @@ -142,16 +146,12 @@ servers: address: 206.12.19.254 hostgroups: layer3-infrastructure contacts: lfilipoz - gw-ugent: - address: 193.191.17.50 - parents: gw-ubcece - hostgroups: layer3-infrastructure gw-umn: address: 128.101.240.222 parents: gw-ubcece hostgroups: layer3-infrastructure gw-unicamp: - address: 177.220.10.65 + address: 177.220.10.129 parents: gw-ubcece hostgroups: layer3-infrastructure gw-utwente: @@ -230,6 +230,10 @@ servers: address: 141.170.6.155 parents: gw-aql hostgroups: computers, buildd, jessie, nfs-client + mips-aql-06: + address: 141.170.6.157 + parents: gw-aql + hostgroups: computers, buildd, jessie, hassrvfs minkus: address: 141.170.6.151 parents: gw-aql @@ -323,24 +327,32 @@ servers: bm-bl9: address: 5.153.231.249 parents: gw-bytemark - hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute, broken_mq + hostgroups: computers, bm-bl, service, jessie bm-bl10: address: 5.153.231.250 parents: gw-bytemark - hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute, broken_mq + hostgroups: computers, bm-bl, service, jessie bm-bl11: address: 5.153.231.251 parents: gw-bytemark - hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute, broken_mq + hostgroups: computers, bm-bl, service, jessie bm-bl12: address: 5.153.231.252 parents: gw-bytemark - hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute, broken_mq + hostgroups: computers, bm-bl, service, jessie + bm-bl13: + address: 5.153.231.253 + parents: gw-bytemark + hostgroups: computers, bm-bl, service, jessie + bm-bl14: + address: 5.153.231.254 + parents: gw-bytemark + hostgroups: computers, bm-bl, service, jessie milanollo: address: 5.153.231.2 parents: gw-bytemark - hostgroups: computers, service, kvmdomains, jessie, apache2-hosts, nfs-server, xinetd-hosts + hostgroups: computers, service, kvmdomains, jessie, apache2-hosts, apache-https, nfs-server, xinetd-hosts milanollo2: address: 5.153.231.9 parents: milanollo @@ -390,7 +402,7 @@ servers: philp: address: 5.153.231.13 parents: ganeti-bytemark - hostgroups: computers, hassrvfs, kvmdomains, jessie, apache2-hosts + hostgroups: computers, hassrvfs, kvmdomains, jessie, apache2-hosts, apache-https rainier: address: 5.153.231.16 parents: ganeti-bytemark @@ -402,7 +414,7 @@ servers: delfin: address: 5.153.231.17 parents: ganeti-bytemark - hostgroups: computers, hassrvfs, kvmdomains, jessie, apache2-hosts, nfs-client, autofs + hostgroups: computers, hassrvfs, kvmdomains, jessie, apache2-hosts, apache-https, nfs-client, autofs wuiet: address: 5.153.231.18 parents: ganeti-bytemark @@ -432,7 +444,7 @@ servers: petrova: address: 5.153.231.25 parents: ganeti-bytemark - hostgroups: computers, kvmdomains, jessie, apache2-hosts + hostgroups: computers, kvmdomains, jessie, apache2-hosts, apache-https oyens: address: 5.153.231.26 parents: ganeti-bytemark @@ -445,10 +457,6 @@ servers: address: 5.153.231.28 parents: ganeti-bytemark hostgroups: computers, service, kvmdomains, jessie, hassrvfs, nfs-client, xinetd-hosts, heavy-exim, apache2-hosts, autofs, apache-https - portman: - address: 5.153.231.29 - parents: ganeti-bytemark - hostgroups: computers, service, kvmdomains, jessie, hassrvfs, apache2-hosts paradis: address: 5.153.231.30 parents: ganeti-bytemark @@ -457,6 +465,10 @@ servers: address: 5.153.231.32 parents: ganeti-bytemark hostgroups: computers, kvmdomains, jessie, no-bacula + tate: + address: 5.153.231.33 + parents: ganeti-bytemark + hostgroups: computers, service, kvmdomains, jessie, autofs, nfs-client, apache2-hosts, apache-https gideon: address: 5.153.231.34 parents: ganeti-bytemark @@ -464,7 +476,7 @@ servers: httpredir-bm-01: address: 5.153.231.35 parents: ganeti-bytemark - hostgroups: computers, service, kvmdomains, wheezy, apache2-hosts + hostgroups: computers, service, kvmdomains, jessie, apache2-hosts, hassrvfs lindsay: address: 5.153.231.36 parents: ganeti-bytemark @@ -484,10 +496,6 @@ servers: address: 5.153.231.40 parents: ganeti-bytemark hostgroups: computers, service, kvmdomains, jessie, hassrvfs, nfs-client, autofs - pittar: - address: 5.153.231.41 - parents: ganeti-bytemark - hostgroups: computers, service, kvmdomains, jessie, hassrvfs, nfs-client, autofs #, apache2-hosts pinel: address: 5.153.231.42 parents: ganeti-bytemark @@ -496,6 +504,10 @@ servers: address: 5.153.231.43 parents: ganeti-bytemark hostgroups: computers, service, kvmdomains, jessie, hassrvfs + manziarly: + address: 5.153.231.44 + parents: ganeti-bytemark + hostgroups: computers, service, kvmdomains, jessie, autofs, nfs-client, apache2-hosts, apache-https # }}} # {{{ gw-c3sl santoro: @@ -522,7 +534,19 @@ servers: mirror-anu: address: 150.203.164.39 parents: gw-cecsit - hostgroups: computers, service, apache2-hosts, rsyncd-hosts, dl360, hassrvfs, xinetd-hosts, jessie, security_mirror, no-bacula, apache-https + hostgroups: computers, service, apache2-hosts, dl360, hpnewraid, hassrvfs, xinetd-hosts, jessie, security_mirror, apache-https + mirror-anu2: + address: 150.203.164.60 + parents: mirror-anu + hostgroups: secondary-IPs + mirror-anu3: + address: 150.203.164.61 + parents: mirror-anu + hostgroups: secondary-IPs + mirror-anu4: + address: 150.203.164.62 + parents: mirror-anu + hostgroups: secondary-IPs # }}} # {{{ gw-conova sompek: @@ -635,6 +659,14 @@ servers: address: 194.177.211.205 parents: ganeti-grnet hostgroups: computers, service, hassrvfs, kvmdomains, jessie, postgres94-hosts, apache2-hosts, apache-https + boott: + address: 194.177.211.206 + parents: ganeti-grnet + hostgroups: computers, service, hassrvfs, kvmdomains, jessie + porta: + address: 194.177.211.207 + parents: ganeti-grnet + hostgroups: computers, service, hassrvfs, kvmdomains, jessie # }}} # {{{ gw-isc schein: @@ -644,7 +676,19 @@ servers: mirror-isc: address: 149.20.20.7 parents: gw-isc - hostgroups: computers, service, apache2-hosts, rsyncd-hosts, dl360, hassrvfs, xinetd-hosts, jessie, security_mirror, no-bacula + hostgroups: computers, service, apache2-hosts, apache-https, dl360, hpnewraid, hassrvfs, xinetd-hosts, jessie, security_mirror + mirror-isc2: + address: 149.20.20.19 + parents: mirror-isc + hostgroups: secondary-IPs + mirror-isc3: + address: 149.20.20.22 + parents: mirror-isc + hostgroups: secondary-IPs + mirror-isc-syncproxy: + address: 149.20.20.21 + parents: mirror-isc + hostgroups: secondary-IPs # }}} # {{{ gw-leaseweb lw01: @@ -697,11 +741,11 @@ servers: czerny: address: 82.195.75.109 parents: gw-man-da - hostgroups: computers, service, dl380, acpid-hosts, wheezy, drbd-hosts + hostgroups: computers, service, dl380, acpid-hosts, jessie, drbd-hosts clementi: address: 82.195.75.103 parents: gw-man-da - hostgroups: computers, service, dl380, acpid-hosts, wheezy, drbd-hosts + hostgroups: computers, service, dl380, acpid-hosts, jessie, drbd-hosts bendel: address: 82.195.75.100 parents: ganeti3 @@ -771,7 +815,7 @@ servers: wolkenstein: address: 82.195.75.65 parents: ganeti3 - hostgroups: computers, hasbootfs, hassrvfs, kvmdomains, service, xinetd-hosts, rsyncd-hosts, apache2-hosts, jessie + hostgroups: computers, hasbootfs, hassrvfs, kvmdomains, service, xinetd-hosts, apache2-hosts, jessie, apache-https mipsel-manda-01: address: 82.195.75.72 parents: gw-man-da @@ -804,7 +848,7 @@ servers: beach: address: 140.211.166.201 parents: byrd - hostgroups: computers, service, kvmdomains, jessie, apache2-hosts, xinetd-hosts, hassrvfs, rsyncd-hosts, no-bacula, apache-https + hostgroups: computers, service, kvmdomains, jessie, apache2-hosts, xinetd-hosts, hassrvfs, rsyncd-hosts, apache-https pieta: address: 140.211.166.195 @@ -916,7 +960,7 @@ servers: ubc-bl4: address: 206.12.19.214 parents: sw-ubcece-kais - hostgroups: computers, bl460, acpid-hosts, service, wheezy, drbd-hosts + hostgroups: computers, bl460, acpid-hosts, service, jessie, drbd-hosts ubc-bl8: address: 206.12.19.218 parents: sw-ubcece-kais @@ -982,11 +1026,11 @@ servers: glinka: address: 206.12.19.126 parents: ganeti2 - hostgroups: computers, service, kvmdomains, wheezy, apache2-hosts, nfs-client, autofs, xinetd-hosts + hostgroups: computers, service, kvmdomains, jessie, apache2-hosts, apache-https, nfs-client, autofs, xinetd-hosts tye: address: 206.12.19.129 parents: ganeti2 - hostgroups: computers, service, kvmdomains, wheezy, heavy-exim, apache2-hosts, nfs-client, autofs, hassrvfs + hostgroups: computers, service, kvmdomains, jessie, heavy-exim, apache2-hosts, apache-https, nfs-client, autofs, hassrvfs elgar: address: 206.12.19.130 parents: ganeti2 @@ -1003,10 +1047,6 @@ servers: address: 206.12.19.136 parents: ganeti2 hostgroups: computers, service, kvmdomains, jessie, apache2-hosts, apache-https, broken_https_default_vhost - bizet: - address: 206.12.19.137 - parents: ganeti2 - hostgroups: computers, service, kvmdomains, jessie, hassrvfs, no-bacula ullmann: address: 206.12.19.141 parents: ganeti2 @@ -1014,7 +1054,7 @@ servers: sonntag: address: 206.12.19.142 parents: ganeti2 - hostgroups: computers, service, kvmdomains, wheezy, nfs-client, autofs + hostgroups: computers, service, kvmdomains, jessie, nfs-client, autofs menotti: address: 206.12.19.143 parents: ganeti2 @@ -1026,9 +1066,7 @@ servers: buxtehude: address: 206.12.19.147 parents: ganeti2 - hostgroups: computers, service, kvmdomains, jessie, hassrvfs, apache2-hosts, heavy-exim, postgres94-hosts, hasvarlogfs, apache-https, spamd - # }}} - # {{{ gw-ugent + hostgroups: computers, service, kvmdomains, jessie, hassrvfs, apache2-hosts, heavy-exim, postgres94-hosts, hasvarlogfs, apache-https, spamd, nfs-server # }}} # {{{ gw-umn #saens: @@ -1038,23 +1076,35 @@ servers: mirror-umn: address: 128.101.240.212 parents: gw-umn - hostgroups: computers, service, apache2-hosts, rsyncd-hosts, dl360, hassrvfs, xinetd-hosts, jessie, security_mirror + hostgroups: computers, service, apache2-hosts, apache-https, dl360, hpnewraid, hassrvfs, xinetd-hosts, jessie, security_mirror + mirror-umn2: + address: 128.101.240.215 + parents: mirror-umn + hostgroups: secondary-IPs + mirror-umn3: + address: 128.101.240.216 + parents: mirror-umn + hostgroups: secondary-IPs + mirror-umn4: + address: 128.101.240.217 + parents: mirror-umn + hostgroups: secondary-IPs # }}} # {{{ gw-unicamp prokofiev: - address: 177.220.10.78 + address: 177.220.10.140 parents: gw-unicamp hostgroups: computers, jessie, service powerpc-unicamp-01: - address: 177.220.10.79 + address: 177.220.10.141 parents: prokofiev hostgroups: computers, hassrvfs, buildd, jessie ppc64el-unicamp-01: - address: 177.220.10.80 + address: 177.220.10.142 parents: prokofiev hostgroups: computers, hassrvfs, buildd, jessie plummer: - address: 177.220.10.81 + address: 177.220.10.143 parents: prokofiev hostgroups: computers, porterbox, hassrvfs, jessie # }}} @@ -1062,7 +1112,7 @@ servers: klecker: address: 130.89.148.10 parents: gw-utwente - hostgroups: computers, service, apache2-hosts, rsyncd-hosts, dl380, xinetd-hosts, jessie, incomingmailrelayed2025, hassrvfs + hostgroups: computers, service, apache2-hosts, apache-https, rsyncd-hosts, dl380, xinetd-hosts, jessie, incomingmailrelayed2025, hassrvfs klecker-ftp: address: 130.89.148.12 parents: klecker @@ -1166,6 +1216,9 @@ hostgroups: general: alias: general purpose developer accessible machines + hpnewraid: + alias: new (2015+) machines where we need hpssacli instead of hpacucli + private: 1 dl380: alias: HP DL380 hosts private: 1 @@ -1325,9 +1378,9 @@ hostgroups: alioth: alias: machines that just are just awkward private: 1 - openstack-compute: - alias: nodes that run OpenStack compute - private: 1 + #openstack-compute: + # alias: nodes that run OpenStack compute + # private: 1 openstack-controller: alias: nodes that run OpenStack controller private: 1 @@ -1372,21 +1425,21 @@ services: check: "check_ping!350.0,20%!600.0,40%" hostgroups: pingable excludehostgroups: layer3-infrastructure, high-RTT - normal_check_interval: 5 + check_interval: 5 max_check_attempts: 4 retry_check_interval: 1 - name: PING check: "check_ping!600.0,20%!900.0,40%" hostgroups: high-RTT - normal_check_interval: 5 + check_interval: 5 max_check_attempts: 4 retry_check_interval: 1 - name: PING check: "check_ping!2000.0,60%!3000.0,80%" hostgroups: layer3-infrastructure - normal_check_interval: 5 + check_interval: 5 max_check_attempts: 4 retry_check_interval: 1 # }}} @@ -1396,12 +1449,17 @@ services: servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk -w 5% -c 2% -A -X devpts -X proc -X linprocfs -X devfs -X fdescfs -X sysfs -X nfs -X nfs4 --ignore-eregi-path='/home/buildd/build-tr|/var/lib/schroot/mount|/proc/sys/fs/binfmt_misc'" hostgroups: computers - excludehosts: sibelius + excludehosts: sibelius, rietz - name: disk usage - all servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk -X devpts -X proc -X linprocfs -X devfs -X fdescfs -X sysfs -X nfs -x nfs4 -x /srv/farm-snapshot/farm-misc 95 98" hosts: sibelius + - + name: disk usage - all + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk -X devpts -X proc -X linprocfs -X devfs -X fdescfs -X sysfs -X nfs -x nfs4 -x /srv -x /home 95 98" + hosts: rietz - name: disk usage on / @@ -1515,13 +1573,13 @@ services: name: setup - dsa config nrpe: "/usr/lib/nagios/plugins/dsa-check-config" hostgroups: computers - normal_check_interval: 60 + check_interval: 60 excludehostgroups: alioth - name: setup - local hostname etc-hosts nrpe: 'if getent ahosts `hostname` | grep -q 127.0; then echo "Warning: local hostname resolves to 127/8 address"; exit 1; else echo "OK: Hostname resolves to non-127/8 address."; exit 0; fi' hostgroups: computers - normal_check_interval: 60 + check_interval: 60 # }}} # {{{ os health #### @@ -1588,7 +1646,7 @@ services: - name: system - filesystem check nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-filesystems" - normal_check_interval: 60 + check_interval: 60 retry_check_interval: 15 hostgroups: computers # }}} @@ -1599,7 +1657,7 @@ services: nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-dabackup" hostgroups: computers excludehosts: backuphost, storace, backuphost - normal_check_interval: 60 + check_interval: 60 max_check_attempts: 2 retry_check_interval: 5 - @@ -1607,7 +1665,7 @@ services: servicegroups: backup nrpe: "/usr/lib/nagios/plugins/dsa-check-dabackup-server" hosts: storace - normal_check_interval: 60 + check_interval: 60 max_check_attempts: 2 retry_check_interval: 5 - @@ -1617,7 +1675,7 @@ services: runfrom: dinis hostgroups: computers excludehostgroups: buildd, porterbox, no-bacula - normal_check_interval: 60 + check_interval: 60 retry_check_interval: 15 - name: backup - bacula - last full backup @@ -1626,8 +1684,13 @@ services: runfrom: dinis hostgroups: computers excludehostgroups: buildd, porterbox, no-bacula - normal_check_interval: 60 + check_interval: 60 retry_check_interval: 15 + - + name: process - bacula-dir + servicegroups: backup + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u bacula -C bacula-dir -a '/usr/sbin/bacula-dir -c /etc/bacula/bacula-dir.conf'" + hosts: dinis - name: process - bacula-fd servicegroups: backup @@ -1652,14 +1715,14 @@ services: servicegroups: kernel nrpe: "/usr/lib/nagios/plugins/dsa-check-running-kernel" hostgroups: computers - normal_check_interval: 60 + check_interval: 60 retry_check_interval: 5 - name: apt - security updates servicegroups: apt nrpe: "/usr/lib/nagios/plugins/dsa-check-statusfile /var/cache/dsa/nagios/apt" hostgroups: computers - normal_check_interval: 60 + check_interval: 60 retry_check_interval: 15 - name: unexpected file - apt sources.list @@ -1673,7 +1736,7 @@ services: #nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-libs" hostgroups: computers excludehostgroups: freebsd - normal_check_interval: 60 + check_interval: 60 retry_check_interval: 15 notification_interval: 10080 - @@ -1712,7 +1775,7 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-statusfile /var/cache/dsa/nagios/samhain" hostgroups: computers depends: process - samhain - normal_check_interval: 60 + check_interval: 60 retry_check_interval: 5 excludehostgroups: brokensamhain - @@ -1767,7 +1830,7 @@ services: check: dsa_check_ssh hostgroups: computers depends: process - sshd - normal_check_interval: 60 + check_interval: 60 notification_interval: 1440 #### - @@ -1785,18 +1848,13 @@ services: ### - name: process - munin-node - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C munin-node -a '/usr/sbin/munin-node'" + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C munin-node -a 'munin-node'" hostgroups: computers - excludehostgroups: freebsd, armhf + excludehostgroups: freebsd - name: process - munin-node nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C perl -a '/usr/bin/perl -wT /usr/sbin/munin-node'" hostgroups: freebsd - - - name: process - munin-node - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C munin-node -a 'munin-node'" - hostgroups: wheezy, jessie - excludehostgroups: freebsd - name: network service - munin-node check: check_tcp!4949 @@ -1847,7 +1905,7 @@ services: remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa" runfrom: rainier hostgroups: computers - normal_check_interval: 60 + check_interval: 60 retry_check_interval: 15 excludehostgroups: alioth, broken_mq - @@ -1856,7 +1914,7 @@ services: remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa" runfrom: rapoport hostgroups: computers - normal_check_interval: 60 + check_interval: 60 retry_check_interval: 15 excludehostgroups: alioth, broken_mq ### @@ -1864,7 +1922,7 @@ services: name: local resolver nrpe: "/usr/lib/nagios/plugins/dsa-check-resolver www.debian.org www.google.com" hostgroups: computers - normal_check_interval: 60 + check_interval: 60 - name: process - unbound nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u unbound -C unbound -a '/usr/sbin/unbound'" @@ -1923,6 +1981,11 @@ services: hostgroups: computers excludehostgroups: freebsd, alioth excludehosts: czerny, grnet-node01, storace, ubc-bl2 + ### + - + name: process - rngd + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C rngd -a '/usr/sbin/rngd -r /dev/hwrng'" + hostgroups: kvmdomains # }}} # {{{ anti-services - @@ -1937,7 +2000,7 @@ services: name: unwanted process - openvpn nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C openvpn" hostgroups: computers - normal_check_interval: 120 + check_interval: 120 - name: unwanted process - gkrellmd nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C gkrellmd" @@ -1974,6 +2037,16 @@ services: name: "sso CRL" nrpe: "if [ -e /var/lib/dsa/sso/ca.crl ]; then /usr/lib/nagios/plugins/dsa-check-crl-expire -w 129600 -c 86400 /var/lib/dsa/sso/ca.crl; else echo 'No sso/ca.crl on this host.'; fi" hostgroups: computers + - + name: SSL certs - puppet + hosts: global + remotecheck: "/usr/lib/nagios/plugins/dsa-check-cert-expire-dir /etc/puppet/modules/ssl/files/servicecerts" + runfrom: handel + - + name: SSL certs - LE + hosts: global + remotecheck: "/usr/lib/nagios/plugins/dsa-check-cert-expire-dir /etc/puppet/modules/ssl/files/from-letsencrypt" + runfrom: handel # }}} # {{{ HW health/raid - @@ -2004,76 +2077,77 @@ services: name: HW - hpacucli status servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli" - normal_check_interval: 120 + check_interval: 120 hostgroups: dl385, dl380, dl360, bl460, dl180 - excludehosts: schein, rietz, mirror-anu, mirror-isc, mirror-umn + excludehosts: schein, rietz + excludehostgroups: hpnewraid - name: HW - hpacucli status servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli --no-controller-ok --ignore-controller='P700m'" - normal_check_interval: 120 + check_interval: 120 hostgroups: bm-bl - name: HW - hpacucli status servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli --no-battery" - normal_check_interval: 120 + check_interval: 120 hosts: schein, rietz - name: HW - hpacucli enclosure status servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli-enclosure 1 1E:1" - normal_check_interval: 120 + check_interval: 120 hosts: franck - name: HW - hpacucli status servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli --ignore-transfer-speed=1I:1:1 --ignore-transfer-speed=1I:1:2" - normal_check_interval: 120 + check_interval: 120 hostgroups: dl585 - name: HW - hpssacli status servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-hpssacli" - normal_check_interval: 120 - hosts: mirror-anu, mirror-isc, mirror-umn + check_interval: 120 + hostgroups: hpnewraid ### # - # name: HW - edac status # nrpe: "/usr/lib/nagios/plugins/dsa-check-edac" -# normal_check_interval: 120 +# check_interval: 120 #hostgroups: computers #excludehosts: villa, lobos, schein - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm" - normal_check_interval: 120 + check_interval: 120 hostgroups: dl385, dl380, dl360, bl460, dl585, bm-bl excludehosts: villa, lobos, schein, storace, mirror-anu - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --ps-no-redundant" - normal_check_interval: 120 + check_interval: 120 hosts: villa - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --ps-no-redundant --ignore-failed='PS2'" - normal_check_interval: 120 + check_interval: 120 hosts: lobos - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-no-redundant" - normal_check_interval: 120 + check_interval: 120 hosts: schein - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-ignore-not-present" - normal_check_interval: 120 + check_interval: 120 hosts: storace - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-ignore-not-present --ps-no-redundant --ignore-failed='PS1'" - normal_check_interval: 120 + check_interval: 120 hosts: mirror-anu ### - @@ -2099,6 +2173,12 @@ services: servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-drbd -d All" hostgroups: drbd-hosts + excludehosts: ubc-bl8 + - + name: RAID - DRBD + servicegroups: raid + nrpe: "/usr/lib/nagios/plugins/dsa-check-drbd -d All --ok-no-devices" + hosts: ubc-bl8 # }}} # }}} # {{{ ### mail stuff @@ -2306,7 +2386,7 @@ services: nrpe: "/usr/lib/nagios/plugins/check_http -H localhost -p 465 -S -C 14 -t 45" hostgroups: postfix-hosts depends: process - postfix - master - normal_check_interval: 120 + check_interval: 120 # }}} # {{{ mail - network service - @@ -2383,31 +2463,31 @@ services: excludehosts: menotti excludehostgroups: broken_https_default_vhost depends: "process - apache2 - master" - normal_check_interval: 120 + check_interval: 120 - name: network service - https check: dsa_check_https_want_auth hosts: menotti depends: "process - apache2 - master" - normal_check_interval: 120 + check_interval: 120 - name: network service - https check: dsa_check_https_any_status hostgroups: broken_https_default_vhost depends: "process - apache2 - master" - normal_check_interval: 120 + check_interval: 120 - name: network service - https cert check: dsa_check_cert!443 hostgroups: apache-https depends: network service - https - normal_check_interval: 60 + check_interval: 60 - name: unwanted network service - https check: dsa_check_port_closed!443 hostgroups: apache2-hosts excludehostgroups: apache-https - normal_check_interval: 60 + check_interval: 60 # }}} # {{{ FTP - @@ -2487,7 +2567,7 @@ services: nrpe: "(/usr/lib/nagios/plugins/check_procs -a schroot -s Zs -c 0 > /dev/null || /usr/lib/nagios/plugins/check_procs -a schroot -s Zs -c 0) && /usr/lib/nagios/plugins/check_procs -a schroot -s ZNs -c 0" hostgroups: buildd contact_groups: +buildd - normal_check_interval: 5 + check_interval: 5 max_check_attempts: 24 retry_check_interval: 5 - @@ -2537,14 +2617,14 @@ services: name: mirror sync - packages check: "dsa_check_mirrorsync_skew!packages.debian.org!Pics/.trace!3600:57600" hosts: global - normal_check_interval: 15 + check_interval: 15 max_check_attempts: 5 retry_check_interval: 5 - name: mirror sync - snapshot check: "dsa_check_mirrorsync_skew!snapshot.debian.org!project/trace/snapshot-master.debian.org!3600:28800" hosts: global - normal_check_interval: 15 + check_interval: 15 max_check_attempts: 5 retry_check_interval: 5 # }}} @@ -2627,7 +2707,7 @@ services: remotecheck: "/usr/lib/nagios/plugins/check_ping -H $HOSTADDRESS$ -w 50,10% -c 200,30%" runfrom: ubc-bl8 hosts: giustini - normal_check_interval: 5 + check_interval: 5 max_check_attempts: 4 retry_check_interval: 1 - @@ -2646,7 +2726,7 @@ services: name: current chroots nrpe: "/usr/lib/nagios/plugins/dsa-check-dchroots-current" hostgroups: porterbox - normal_check_interval: 60 + check_interval: 60 retry_check_interval: 15 # }}} # {{{ openstack @@ -2662,10 +2742,10 @@ services: # name: process - openstack - nova-api # nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u nova -C nova-api -a '/usr/bin/python /usr/bin/nova-api --config-file=/etc/nova/nova.conf --log-file /var/log/nova/nova-api.log'" # hostgroups: openstack-controller - - - name: process - openstack - nova-compute - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u nova -C nova-compute -a '/usr/bin/python /usr/bin/nova-compute --config-file=/etc/nova/nova.conf --log-file /var/log/nova/nova-compute.log --config-file=/etc/nova/nova-compute.conf'" - hostgroups: openstack-compute +# - +# name: process - openstack - nova-compute +# nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u nova -C nova-compute -a '/usr/bin/python /usr/bin/nova-compute --config-file=/etc/nova/nova.conf --log-file /var/log/nova/nova-compute.log --config-file=/etc/nova/nova-compute.conf'" +# hostgroups: openstack-compute # - # name: process - openstack - nova-cert # nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u nova -C nova-cert -a '/usr/bin/python /usr/bin/nova-cert --config-file=/etc/nova/nova.conf --log-file /var/log/nova/nova-cert.log'" @@ -2711,7 +2791,7 @@ services: name: network service - ldaps cert check: dsa_check_cert!636 depends: process - slapd - normal_check_interval: 60 + check_interval: 60 hosts: draghi ### - @@ -2736,6 +2816,21 @@ services: check: check_tcp!873 hosts: milanollo2 depends: milanollo:process - xinetd + - + name: network service - rsync + check: check_tcp!873 + hosts: mirror-isc2, mirror-isc-syncproxy + depends: mirror-isc:process - xinetd + - + name: network service - rsync + check: check_tcp!873 + hosts: mirror-umn2, mirror-umn3 + depends: mirror-umn:process - xinetd + - + name: network service - rsync + check: check_tcp!873 + hosts: mirror-anu2, mirror-anu3 + depends: mirror-anu:process - xinetd ### - name: process - icinga @@ -2756,19 +2851,19 @@ services: - name: network service - sip-tls cert - 443 check: dsa_check_cert!443 - normal_check_interval: 60 + check_interval: 60 hosts: vogler - name: network service - sip-tls cert - 5061 check: dsa_check_cert!5061 - normal_check_interval: 60 + check_interval: 60 hosts: vogler #### - name: puppetmaster cert nrpe: "sudo -u puppet /usr/lib/nagios/plugins/dsa-check-cert-expire /var/lib/puppet/ssl/certs/ca.pem" hosts: handel - normal_check_interval: 60 + check_interval: 60 max_check_attempts: 2 retry_check_interval: 5 # }}}