X-Git-Url: https://git.adam-barratt.org.uk/?a=blobdiff_plain;f=config%2Fnagios-master.cfg;h=dbc8e0a80e6d037200558dbff8da6525e9dac5ec;hb=a684136f20e90309d450015cceccb43ed0ce7788;hp=85e72ba0d3f8f9419dad2bd5cb1242ecdd8eb4fc;hpb=ec032b8eaf3a44082d4a40766714d785fd06c06e;p=mirror%2Fdsa-nagios.git diff --git a/config/nagios-master.cfg b/config/nagios-master.cfg index 85e72ba..dbc8e0a 100644 --- a/config/nagios-master.cfg +++ b/config/nagios-master.cfg @@ -151,7 +151,7 @@ servers: parents: gw-ubcece hostgroups: layer3-infrastructure gw-unicamp: - address: 177.220.10.65 + address: 177.220.10.129 parents: gw-ubcece hostgroups: layer3-infrastructure gw-utwente: @@ -522,7 +522,19 @@ servers: mirror-anu: address: 150.203.164.39 parents: gw-cecsit - hostgroups: computers, service, apache2-hosts, rsyncd-hosts, dl360, hassrvfs, xinetd-hosts, jessie, security_mirror, no-bacula, apache-https + hostgroups: computers, service, apache2-hosts, dl360, hpnewraid, hassrvfs, xinetd-hosts, jessie, security_mirror, apache-https + mirror-anu2: + address: 150.203.164.60 + parents: mirror-anu + hostgroups: secondary-IPs + mirror-anu3: + address: 150.203.164.61 + parents: mirror-anu + hostgroups: secondary-IPs + mirror-anu4: + address: 150.203.164.62 + parents: mirror-anu + hostgroups: secondary-IPs # }}} # {{{ gw-conova sompek: @@ -644,7 +656,19 @@ servers: mirror-isc: address: 149.20.20.7 parents: gw-isc - hostgroups: computers, service, apache2-hosts, rsyncd-hosts, dl360, hasorgfs, xinetd-hosts, jessie, security_mirror, no-bacula + hostgroups: computers, service, apache2-hosts, dl360, hpnewraid, hassrvfs, xinetd-hosts, jessie, security_mirror + mirror-isc2: + address: 149.20.20.19 + parents: mirror-isc + hostgroups: secondary-IPs + mirror-isc3: + address: 149.20.20.19 + parents: mirror-isc + hostgroups: secondary-IPs + mirror-isc-syncproxy: + address: 149.20.20.21 + parents: mirror-isc + hostgroups: secondary-IPs # }}} # {{{ gw-leaseweb lw01: @@ -731,7 +755,7 @@ servers: draghi: address: 82.195.75.106 parents: ganeti3 - hostgroups: computers, service, hasbootfs, hassrvfs, apache2-hosts, spamd, heavy-exim, kvmdomains, xinetd-hosts, apache-https, wheezy + hostgroups: computers, service, hasbootfs, hassrvfs, apache2-hosts, spamd, heavy-exim, kvmdomains, xinetd-hosts, apache-https, jessie geo1: address: 82.195.75.105 parents: ganeti3 @@ -796,14 +820,16 @@ servers: address: 140.211.15.34 parents: gw-osuosl hostgroups: computers, service, dl360, hassrvfs, jessie, hasvarlogfs, apache2-hosts, no-bacula, apache-https + byrd: - address: 140.211.166.20 + address: 140.211.166.200 parents: gw-osuosl hostgroups: computers, service, dl380, jessie - buxtehude: - address: 140.211.166.26 + beach: + address: 140.211.166.201 parents: byrd - hostgroups: computers, service, hassrvfs, apache2-hosts, heavy-exim, postgres94-hosts, jessie, hasvarlogfs, apache-https, spamd + hostgroups: computers, service, kvmdomains, jessie, apache2-hosts, xinetd-hosts, hassrvfs, rsyncd-hosts, apache-https + pieta: address: 140.211.166.195 parents: gw-osuosl @@ -898,27 +924,27 @@ servers: ubc-bl7: address: 206.12.19.217 parents: sw-ubcece-kais - hostgroups: computers, bl460, acpid-hosts, service, wheezy, drbd-hosts + hostgroups: computers, bl460, acpid-hosts, service, jessie, drbd-hosts ubc-bl6: address: 206.12.19.216 parents: sw-ubcece-kais - hostgroups: computers, bl460, acpid-hosts, service, wheezy, drbd-hosts + hostgroups: computers, bl460, acpid-hosts, service, jessie, drbd-hosts ubc-bl2: address: 206.12.19.212 parents: sw-ubcece-kais - hostgroups: computers, bl460, acpid-hosts, service, wheezy, drbd-hosts + hostgroups: computers, bl460, acpid-hosts, service, jessie, drbd-hosts ubc-bl3: address: 206.12.19.213 parents: sw-ubcece-kais - hostgroups: computers, bl460, acpid-hosts, service, wheezy, drbd-hosts + hostgroups: computers, bl460, acpid-hosts, service, jessie, drbd-hosts ubc-bl4: address: 206.12.19.214 parents: sw-ubcece-kais - hostgroups: computers, bl460, acpid-hosts, service, wheezy, drbd-hosts + hostgroups: computers, bl460, acpid-hosts, service, jessie, drbd-hosts ubc-bl8: address: 206.12.19.218 parents: sw-ubcece-kais - hostgroups: computers, bl460, acpid-hosts, service, wheezy, drbd-hosts + hostgroups: computers, bl460, acpid-hosts, service, jessie, drbd-hosts ganeti2: address: 206.12.19.23 parents: sw-ubcece-kais @@ -1001,14 +1027,6 @@ servers: address: 206.12.19.136 parents: ganeti2 hostgroups: computers, service, kvmdomains, jessie, apache2-hosts, apache-https, broken_https_default_vhost - bizet: - address: 206.12.19.137 - parents: ganeti2 - hostgroups: computers, service, kvmdomains, jessie, hassrvfs, no-bacula - beach: - address: 206.12.19.140 - parents: ganeti2 - hostgroups: computers, service, kvmdomains, jessie, apache2-hosts, xinetd-hosts, hassrvfs, nfs-server, rsyncd-hosts, no-bacula, apache-https ullmann: address: 206.12.19.141 parents: ganeti2 @@ -1016,7 +1034,7 @@ servers: sonntag: address: 206.12.19.142 parents: ganeti2 - hostgroups: computers, service, kvmdomains, wheezy, nfs-client, autofs + hostgroups: computers, service, kvmdomains, jessie, nfs-client, autofs menotti: address: 206.12.19.143 parents: ganeti2 @@ -1025,6 +1043,10 @@ servers: address: 206.12.19.146 parents: ganeti2 hostgroups: computers, service, kvmdomains, jessie, spamd, heavy-exim, mail-relay + buxtehude: + address: 206.12.19.147 + parents: ganeti2 + hostgroups: computers, service, kvmdomains, jessie, hassrvfs, apache2-hosts, heavy-exim, postgres94-hosts, hasvarlogfs, apache-https, spamd, nfs-server # }}} # {{{ gw-ugent # }}} @@ -1036,23 +1058,35 @@ servers: mirror-umn: address: 128.101.240.212 parents: gw-umn - hostgroups: computers, service, apache2-hosts, rsyncd-hosts, dl360, hassrvfs, xinetd-hosts, jessie, security_mirror + hostgroups: computers, service, apache2-hosts, dl360, hpnewraid, hassrvfs, xinetd-hosts, jessie, security_mirror + mirror-umn2: + address: 128.101.240.215 + parents: mirror-umn + hostgroups: secondary-IPs + mirror-umn3: + address: 128.101.240.216 + parents: mirror-umn + hostgroups: secondary-IPs + mirror-umn4: + address: 128.101.240.217 + parents: mirror-umn + hostgroups: secondary-IPs # }}} # {{{ gw-unicamp prokofiev: - address: 177.220.10.78 + address: 177.220.10.140 parents: gw-unicamp hostgroups: computers, jessie, service powerpc-unicamp-01: - address: 177.220.10.79 + address: 177.220.10.141 parents: prokofiev hostgroups: computers, hassrvfs, buildd, jessie ppc64el-unicamp-01: - address: 177.220.10.80 + address: 177.220.10.142 parents: prokofiev hostgroups: computers, hassrvfs, buildd, jessie plummer: - address: 177.220.10.81 + address: 177.220.10.143 parents: prokofiev hostgroups: computers, porterbox, hassrvfs, jessie # }}} @@ -1164,6 +1198,9 @@ hostgroups: general: alias: general purpose developer accessible machines + hpnewraid: + alias: new (2015+) machines where we need hpssacli instead of hpacucli + private: 1 dl380: alias: HP DL380 hosts private: 1 @@ -1370,21 +1407,21 @@ services: check: "check_ping!350.0,20%!600.0,40%" hostgroups: pingable excludehostgroups: layer3-infrastructure, high-RTT - normal_check_interval: 5 + check_interval: 5 max_check_attempts: 4 retry_check_interval: 1 - name: PING check: "check_ping!600.0,20%!900.0,40%" hostgroups: high-RTT - normal_check_interval: 5 + check_interval: 5 max_check_attempts: 4 retry_check_interval: 1 - name: PING check: "check_ping!2000.0,60%!3000.0,80%" hostgroups: layer3-infrastructure - normal_check_interval: 5 + check_interval: 5 max_check_attempts: 4 retry_check_interval: 1 # }}} @@ -1513,13 +1550,13 @@ services: name: setup - dsa config nrpe: "/usr/lib/nagios/plugins/dsa-check-config" hostgroups: computers - normal_check_interval: 60 + check_interval: 60 excludehostgroups: alioth - name: setup - local hostname etc-hosts nrpe: 'if getent ahosts `hostname` | grep -q 127.0; then echo "Warning: local hostname resolves to 127/8 address"; exit 1; else echo "OK: Hostname resolves to non-127/8 address."; exit 0; fi' hostgroups: computers - normal_check_interval: 60 + check_interval: 60 # }}} # {{{ os health #### @@ -1586,7 +1623,7 @@ services: - name: system - filesystem check nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-filesystems" - normal_check_interval: 60 + check_interval: 60 retry_check_interval: 15 hostgroups: computers # }}} @@ -1597,7 +1634,7 @@ services: nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-dabackup" hostgroups: computers excludehosts: backuphost, storace, backuphost - normal_check_interval: 60 + check_interval: 60 max_check_attempts: 2 retry_check_interval: 5 - @@ -1605,7 +1642,7 @@ services: servicegroups: backup nrpe: "/usr/lib/nagios/plugins/dsa-check-dabackup-server" hosts: storace - normal_check_interval: 60 + check_interval: 60 max_check_attempts: 2 retry_check_interval: 5 - @@ -1615,7 +1652,7 @@ services: runfrom: dinis hostgroups: computers excludehostgroups: buildd, porterbox, no-bacula - normal_check_interval: 60 + check_interval: 60 retry_check_interval: 15 - name: backup - bacula - last full backup @@ -1624,7 +1661,7 @@ services: runfrom: dinis hostgroups: computers excludehostgroups: buildd, porterbox, no-bacula - normal_check_interval: 60 + check_interval: 60 retry_check_interval: 15 - name: process - bacula-fd @@ -1650,14 +1687,14 @@ services: servicegroups: kernel nrpe: "/usr/lib/nagios/plugins/dsa-check-running-kernel" hostgroups: computers - normal_check_interval: 60 + check_interval: 60 retry_check_interval: 5 - name: apt - security updates servicegroups: apt nrpe: "/usr/lib/nagios/plugins/dsa-check-statusfile /var/cache/dsa/nagios/apt" hostgroups: computers - normal_check_interval: 60 + check_interval: 60 retry_check_interval: 15 - name: unexpected file - apt sources.list @@ -1671,7 +1708,7 @@ services: #nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-libs" hostgroups: computers excludehostgroups: freebsd - normal_check_interval: 60 + check_interval: 60 retry_check_interval: 15 notification_interval: 10080 - @@ -1710,7 +1747,7 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-statusfile /var/cache/dsa/nagios/samhain" hostgroups: computers depends: process - samhain - normal_check_interval: 60 + check_interval: 60 retry_check_interval: 5 excludehostgroups: brokensamhain - @@ -1765,7 +1802,7 @@ services: check: dsa_check_ssh hostgroups: computers depends: process - sshd - normal_check_interval: 60 + check_interval: 60 notification_interval: 1440 #### - @@ -1783,18 +1820,13 @@ services: ### - name: process - munin-node - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C munin-node -a '/usr/sbin/munin-node'" + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C munin-node -a 'munin-node'" hostgroups: computers - excludehostgroups: freebsd, armhf + excludehostgroups: freebsd - name: process - munin-node nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C perl -a '/usr/bin/perl -wT /usr/sbin/munin-node'" hostgroups: freebsd - - - name: process - munin-node - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C munin-node -a 'munin-node'" - hostgroups: wheezy, jessie - excludehostgroups: freebsd - name: network service - munin-node check: check_tcp!4949 @@ -1845,7 +1877,7 @@ services: remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa" runfrom: rainier hostgroups: computers - normal_check_interval: 60 + check_interval: 60 retry_check_interval: 15 excludehostgroups: alioth, broken_mq - @@ -1854,7 +1886,7 @@ services: remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa" runfrom: rapoport hostgroups: computers - normal_check_interval: 60 + check_interval: 60 retry_check_interval: 15 excludehostgroups: alioth, broken_mq ### @@ -1862,7 +1894,7 @@ services: name: local resolver nrpe: "/usr/lib/nagios/plugins/dsa-check-resolver www.debian.org www.google.com" hostgroups: computers - normal_check_interval: 60 + check_interval: 60 - name: process - unbound nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u unbound -C unbound -a '/usr/sbin/unbound'" @@ -1935,7 +1967,7 @@ services: name: unwanted process - openvpn nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C openvpn" hostgroups: computers - normal_check_interval: 120 + check_interval: 120 - name: unwanted process - gkrellmd nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C gkrellmd" @@ -2002,66 +2034,78 @@ services: name: HW - hpacucli status servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli" - normal_check_interval: 120 + check_interval: 120 hostgroups: dl385, dl380, dl360, bl460, dl180 excludehosts: schein, rietz + excludehostgroups: hpnewraid - name: HW - hpacucli status servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli --no-controller-ok --ignore-controller='P700m'" - normal_check_interval: 120 + check_interval: 120 hostgroups: bm-bl - name: HW - hpacucli status servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli --no-battery" - normal_check_interval: 120 + check_interval: 120 hosts: schein, rietz - name: HW - hpacucli enclosure status servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli-enclosure 1 1E:1" - normal_check_interval: 120 + check_interval: 120 hosts: franck - name: HW - hpacucli status servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli --ignore-transfer-speed=1I:1:1 --ignore-transfer-speed=1I:1:2" - normal_check_interval: 120 + check_interval: 120 hostgroups: dl585 + - + name: HW - hpssacli status + servicegroups: raid + nrpe: "/usr/lib/nagios/plugins/dsa-check-hpssacli" + check_interval: 120 + hostgroups: hpnewraid ### # - # name: HW - edac status # nrpe: "/usr/lib/nagios/plugins/dsa-check-edac" -# normal_check_interval: 120 +# check_interval: 120 #hostgroups: computers #excludehosts: villa, lobos, schein - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm" - normal_check_interval: 120 + check_interval: 120 hostgroups: dl385, dl380, dl360, bl460, dl585, bm-bl - excludehosts: villa, lobos, schein, storace + excludehosts: villa, lobos, schein, storace, mirror-anu - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --ps-no-redundant" - normal_check_interval: 120 + check_interval: 120 hosts: villa - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --ps-no-redundant --ignore-failed='PS2'" - normal_check_interval: 120 + check_interval: 120 hosts: lobos - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-no-redundant" - normal_check_interval: 120 + check_interval: 120 hosts: schein - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-ignore-not-present" - normal_check_interval: 120 + check_interval: 120 hosts: storace + - + name: HW - hpasmcli status + nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-ignore-not-present --ps-no-redundant --ignore-failed='PS1'" + check_interval: 120 + hosts: mirror-anu ### - name: RAID - 3ware @@ -2086,6 +2130,12 @@ services: servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-drbd -d All" hostgroups: drbd-hosts + excludehosts: ubc-bl8 + - + name: RAID - DRBD + servicegroups: raid + nrpe: "/usr/lib/nagios/plugins/dsa-check-drbd -d All --ok-no-devices" + hosts: ubc-bl8 # }}} # }}} # {{{ ### mail stuff @@ -2293,7 +2343,7 @@ services: nrpe: "/usr/lib/nagios/plugins/check_http -H localhost -p 465 -S -C 14 -t 45" hostgroups: postfix-hosts depends: process - postfix - master - normal_check_interval: 120 + check_interval: 120 # }}} # {{{ mail - network service - @@ -2370,31 +2420,31 @@ services: excludehosts: menotti excludehostgroups: broken_https_default_vhost depends: "process - apache2 - master" - normal_check_interval: 120 + check_interval: 120 - name: network service - https check: dsa_check_https_want_auth hosts: menotti depends: "process - apache2 - master" - normal_check_interval: 120 + check_interval: 120 - name: network service - https check: dsa_check_https_any_status hostgroups: broken_https_default_vhost depends: "process - apache2 - master" - normal_check_interval: 120 + check_interval: 120 - name: network service - https cert check: dsa_check_cert!443 hostgroups: apache-https depends: network service - https - normal_check_interval: 60 + check_interval: 60 - name: unwanted network service - https check: dsa_check_port_closed!443 hostgroups: apache2-hosts excludehostgroups: apache-https - normal_check_interval: 60 + check_interval: 60 # }}} # {{{ FTP - @@ -2474,7 +2524,7 @@ services: nrpe: "(/usr/lib/nagios/plugins/check_procs -a schroot -s Zs -c 0 > /dev/null || /usr/lib/nagios/plugins/check_procs -a schroot -s Zs -c 0) && /usr/lib/nagios/plugins/check_procs -a schroot -s ZNs -c 0" hostgroups: buildd contact_groups: +buildd - normal_check_interval: 5 + check_interval: 5 max_check_attempts: 24 retry_check_interval: 5 - @@ -2524,14 +2574,14 @@ services: name: mirror sync - packages check: "dsa_check_mirrorsync_skew!packages.debian.org!Pics/.trace!3600:57600" hosts: global - normal_check_interval: 15 + check_interval: 15 max_check_attempts: 5 retry_check_interval: 5 - name: mirror sync - snapshot check: "dsa_check_mirrorsync_skew!snapshot.debian.org!project/trace/snapshot-master.debian.org!3600:28800" hosts: global - normal_check_interval: 15 + check_interval: 15 max_check_attempts: 5 retry_check_interval: 5 # }}} @@ -2614,7 +2664,7 @@ services: remotecheck: "/usr/lib/nagios/plugins/check_ping -H $HOSTADDRESS$ -w 50,10% -c 200,30%" runfrom: ubc-bl8 hosts: giustini - normal_check_interval: 5 + check_interval: 5 max_check_attempts: 4 retry_check_interval: 1 - @@ -2633,7 +2683,7 @@ services: name: current chroots nrpe: "/usr/lib/nagios/plugins/dsa-check-dchroots-current" hostgroups: porterbox - normal_check_interval: 60 + check_interval: 60 retry_check_interval: 15 # }}} # {{{ openstack @@ -2698,7 +2748,7 @@ services: name: network service - ldaps cert check: dsa_check_cert!636 depends: process - slapd - normal_check_interval: 60 + check_interval: 60 hosts: draghi ### - @@ -2723,6 +2773,21 @@ services: check: check_tcp!873 hosts: milanollo2 depends: milanollo:process - xinetd + - + name: network service - rsync + check: check_tcp!873 + hosts: mirror-isc2, mirror-isc-syncproxy + depends: mirror-isc:process - xinetd + - + name: network service - rsync + check: check_tcp!873 + hosts: mirror-umn2, mirror-umn3 + depends: mirror-umn:process - xinetd + - + name: network service - rsync + check: check_tcp!873 + hosts: mirror-anu2, mirror-anu3 + depends: mirror-anu:process - xinetd ### - name: process - icinga @@ -2743,19 +2808,19 @@ services: - name: network service - sip-tls cert - 443 check: dsa_check_cert!443 - normal_check_interval: 60 + check_interval: 60 hosts: vogler - name: network service - sip-tls cert - 5061 check: dsa_check_cert!5061 - normal_check_interval: 60 + check_interval: 60 hosts: vogler #### - name: puppetmaster cert nrpe: "sudo -u puppet /usr/lib/nagios/plugins/dsa-check-cert-expire /var/lib/puppet/ssl/certs/ca.pem" hosts: handel - normal_check_interval: 60 + check_interval: 60 max_check_attempts: 2 retry_check_interval: 5 # }}}