X-Git-Url: https://git.adam-barratt.org.uk/?a=blobdiff_plain;f=config%2Fnagios-master.cfg;h=1f2c420762a71598e7c61365129bd25e9d3b6d08;hb=880136158329669e59bf7559ec78d5016b975dad;hp=230120db065d922cfb76fdebac72926b1ca78287;hpb=c5c799e293c01fbe01e25ad8c4dd8941b9704ddf;p=mirror%2Fdsa-nagios.git diff --git a/config/nagios-master.cfg b/config/nagios-master.cfg index 230120d..1f2c420 100644 --- a/config/nagios-master.cfg +++ b/config/nagios-master.cfg @@ -402,10 +402,6 @@ servers: address: 5.153.231.20 parents: ganeti-bytemark hostgroups: computers, general, kvmdomains, stretch, nfs-client, autofs, systemd-timesyncd - moszumanska: - address: 5.153.231.21 - parents: ganeti-bytemark - hostgroups: secondary-IPs dillon: address: 5.153.231.22 parents: ganeti-bytemark @@ -515,11 +511,11 @@ servers: conova-node01: address: 217.196.149.227 parents: gw-conova - hostgroups: computers, stretch, service, sw-raid + hostgroups: computers, stretch, service, sw-raid, drbd-hosts conova-node02: address: 217.196.149.228 parents: gw-conova - hostgroups: computers, stretch, service, sw-raid + hostgroups: computers, stretch, service, sw-raid, drbd-hosts ganeti-conova: address: 217.196.149.235 parents: gw-conova @@ -528,22 +524,6 @@ servers: address: 217.196.149.229 parents: gw-conova hostgroups: computers, stretch, service, apache2-hosts - mirror-conova-debian: - address: 217.196.149.232 - hostgroups: secondary-IPs - parents: mirror-conova - mirror-conova-security: - address: 217.196.149.233 - hostgroups: secondary-IPs, rsyncd-hosts, security_mirror - parents: mirror-conova - mirror-conova-archive: - address: 217.196.149.234 - hostgroups: secondary-IPs, rsyncd-hosts - parents: mirror-conova - mirror-conova-syncproxy4-eu: - address: 217.196.149.237 - hostgroups: secondary-IPs, rsyncd-hosts, https-service - parents: mirror-conova arm-conova-01: address: 217.196.149.230 @@ -557,6 +537,27 @@ servers: address: 217.196.149.236 parents: ganeti-conova hostgroups: computers, hassrvfs, porterbox, stretch + + schmelzer: + address: 185.69.161.161 + parents: gw-conova + hostgroups: computers, service, stretch, r540, manyprocesses, apache2-hosts, apache-https, systemd-timesyncd + schmelzer-debian: + address: 217.196.149.232 + hostgroups: secondary-IPs + parents: schmelzer + schmelzer-security: + address: 217.196.149.233 + hostgroups: secondary-IPs, rsyncd-hosts, security_mirror + parents: schmelzer + schmelzer-archive: + address: 217.196.149.234 + hostgroups: secondary-IPs, rsyncd-hosts + parents: schmelzer + schmelzer-syncproxy4-eu: + address: 217.196.149.237 + hostgroups: secondary-IPs, rsyncd-hosts, https-service + parents: schmelzer # }}} # {{{ gw-csail csail-node01: @@ -720,11 +721,19 @@ servers: czerny: address: 82.195.75.109 parents: gw-manda - hostgroups: computers, service, dl380, acpid-hosts, stretch, drbd-hosts, manyprocesses + hostgroups: computers, service, dl380, acpid-hosts, stretch, manyprocesses clementi: address: 82.195.75.103 parents: gw-manda - hostgroups: computers, service, dl380, acpid-hosts, stretch, drbd-hosts, manyprocesses + hostgroups: computers, service, dl380, acpid-hosts, stretch, manyprocesses + manda-node03: + address: 82.195.75.69 + parents: gw-manda + hostgroups: computers, service, stretch, r540, drbd-hosts, manyprocesses + manda-node04: + address: 82.195.75.70 + parents: gw-manda + hostgroups: computers, service, stretch, r540, drbd-hosts, manyprocesses bendel: address: 82.195.75.100 parents: ganeti3 @@ -737,14 +746,10 @@ servers: address: 82.195.75.98 parents: ganeti3 hostgroups: computers, service, kvmdomains, stretch, hassrvfs, apache2-hosts, apache-https, heavy-exim - lully: - address: 82.195.75.99 - parents: ganeti3 - hostgroups: computers, service, hasbootfs, kvmdomains, stretch, hasvarlogfs draghi: address: 82.195.75.106 parents: ganeti3 - hostgroups: computers, service, hasbootfs, hassrvfs, apache2-hosts, spamd, heavy-exim, kvmdomains, xinetd-hosts, apache-https, stretch + hostgroups: computers, service, hassrvfs, apache2-hosts, spamd, heavy-exim, kvmdomains, xinetd-hosts, apache-https, stretch geo1: address: 82.195.75.105 parents: ganeti3 @@ -752,7 +757,7 @@ servers: handel: address: 82.195.75.104 parents: ganeti3 - hostgroups: computers, service, kvmdomains, apache2-hosts, stretch, postgres96-hosts + hostgroups: computers, service, kvmdomains, apache2-hosts, stretch, postgres96-hosts, hassrvfs kaufmann: address: 82.195.75.107 parents: ganeti3 @@ -822,18 +827,23 @@ servers: parents: byrd hostgroups: computers, service, kvmdomains, stretch, apache2-hosts, hassrvfs, rsyncd-hosts, apache-https + pijper: + address: 140.211.166.194 + parents: gw-osuosl + hostgroups: computers, stretch, service, manyprocesses + loghost-osuosl-01: + address: 140.211.166.202 + parents: pijper + hostgroups: computers, service, kvmdomains, stretch, hassrvfs, systemd-timesyncd + pieta: address: 140.211.166.195 parents: gw-osuosl hostgroups: computers, stretch, service, manyprocesses ppc64el-osuosl-01: address: 140.211.166.196 - parents: pieta + parents: pijper hostgroups: computers, hassrvfs, buildd, stretch - powerpc-osuosl-01: - address: 140.211.166.197 - parents: pieta - hostgroups: computers, hassrvfs, buildd, jessie # }}} # {{{ gw-sanger sallinen: @@ -847,7 +857,7 @@ servers: sibelius: address: 193.62.202.28 parents: gw-sanger - hostgroups: computers, postgres94-hosts, service, apache2-hosts, sw-raid, jessie, rsyncd-hosts, hasvarlogfs, multipath-hosts, nfs-server, varnish-hosts + hostgroups: computers, service, apache2-hosts, sw-raid, jessie, rsyncd-hosts, hasvarlogfs, multipath-hosts, nfs-server, varnish-hosts contacts: tjrc1, dave # }}} # {{{ gw-scanplus @@ -987,14 +997,14 @@ servers: address: 209.87.16.46 parents: ubc-gateway hostgroups: computers, service, kvmdomains, stretch, systemd-timesyncd, apache2-hosts, apache-https, broken_https_default_vhost - kantuser: - address: 209.87.16.47 - parents: ubc-gateway - hostgroups: computers, service, kvmdomains, stretch, systemd-timesyncd, apache2-hosts grabbe: address: 209.87.16.48 parents: ubc-gateway hostgroups: computers, service, kvmdomains, stretch, systemd-timesyncd, apache2-hosts, apache-https + trabaci: + address: 209.87.16.49 + parents: ubc-gateway + hostgroups: computers, service, kvmdomains, stretch, hassrvfs, systemd-timesyncd # }}} # {{{ gw-umn #saens: @@ -1049,6 +1059,10 @@ servers: address: 130.89.148.14 parents: klecker hostgroups: secondary-IPs + smit: + address: 130.89.148.78 + parents: gw-utwente + hostgroups: computers, service, stretch, r540, manyprocesses, incomingmailrelayed2025 # }}} # {{{ gw-ynic henze: @@ -1136,11 +1150,16 @@ hostgroups: pe1950: alias: Dell PowerEdge 1950 hosts private: 1 + r540: + alias: Dell PowerEdge R540 hosts + private: 1 jessie: alias: Hosts running jessie stretch: alias: Hosts running stretch + buster: + alias: Hosts running buster kvmdomains: alias: Hosts that are KVM domains @@ -1180,9 +1199,6 @@ hostgroups: xinetd-hosts: alias: hosts providing services via xinetd private: 1 - postgres94-hosts: - alias: hosts running postgres94 - private: 1 postgres96-hosts: alias: hosts running postgres96 private: 1 @@ -1274,9 +1290,6 @@ hostgroups: high-RTT: alias: machines with high round trip times private: 1 - alioth: - alias: machines that just are just awkward - private: 1 #openstack-compute: # alias: nodes that run OpenStack compute # private: 1 @@ -1348,7 +1361,6 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-ipv6-default-gw" hostgroups: computers check_interval: 60 - excludehostgroups: alioth # }}} # {{{ ### disk usage - @@ -1533,7 +1545,6 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-config" hostgroups: computers check_interval: 60 - excludehostgroups: alioth - name: setup - local hostname etc-hosts nrpe: 'if getent ahosts `hostname` | grep -q 127.0; then echo "Warning: local hostname resolves to 127/8 address"; exit 1; else echo "OK: Hostname resolves to non-127/8 address."; exit 0; fi' @@ -1647,7 +1658,6 @@ services: servicegroups: backup nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u bacula -C bacula-fd -a '/usr/sbin/bacula-fd -c /etc/bacula/bacula-fd.conf'" hostgroups: computers - excludehostgroups: alioth - name: network backup status - draghi @@ -1734,19 +1744,16 @@ services: remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$" runfrom: lotti hostgroups: computers - excludehostgroups: alioth - - name: remote logging on lully + name: remote logging on loghost-grnet-01 remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$" - runfrom: lully + runfrom: loghost-grnet-01 hostgroups: computers - excludehostgroups: alioth - - name: remote logging on loghost-grnet-01 + name: remote logging on loghost-osuosl-01 remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$" - runfrom: loghost-grnet-01 + runfrom: loghost-osuosl-01 hostgroups: computers - excludehostgroups: alioth # }}} # {{{ base service - @@ -1771,13 +1778,13 @@ services: name: process - nrpe nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:25 -c 1: -u nagios -C nrpe -a '/usr/sbin/nrpe -c /etc/nagios/nrpe.cfg -d'" hostgroups: computers - excludehostgroups: stretch + excludehostgroups: stretch buster max_check_attempts: -1 depends: network service - nrpe - name: process - nrpe nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:25 -c 1: -u nagios -C nrpe -a '/usr/sbin/nrpe -c /etc/nagios/nrpe.cfg -f'" - hostgroups: stretch + hostgroups: stretch buster max_check_attempts: -1 depends: network service - nrpe ### @@ -1836,7 +1843,6 @@ services: name: process - ud-replicated nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C ud-replicated -a '/usr/bin/python /usr/bin/ud-replicated'" hostgroups: computers - excludehostgroups: alioth ### - name: MQ connection on rainier @@ -1846,7 +1852,7 @@ services: hostgroups: computers check_interval: 60 retry_interval: 15 - excludehostgroups: alioth, broken_mq + excludehostgroups: broken_mq - name: MQ connection on rapoport servicegroups: MQ @@ -1855,7 +1861,7 @@ services: hostgroups: computers check_interval: 60 retry_interval: 15 - excludehostgroups: alioth, broken_mq + excludehostgroups: broken_mq ### - name: local resolver @@ -1866,7 +1872,11 @@ services: name: process - unbound nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u unbound -C unbound -a '/usr/sbin/unbound'" hostgroups: computers - excludehostgroups: alioth + - + name: unbound trust anchors + nrpe: "/usr/lib/nagios/plugins/dsa-check-unbound-anchors" + hostgroups: computers + check_interval: 60 ### - name: process - uptimed @@ -1897,12 +1907,10 @@ services: name: process - stunnel4 - puppet-ekeyd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -u stunnel4 -C stunnel4 -a '/usr/bin/stunnel4 /etc/stunnel/puppet-ekeyd.conf'" hostgroups: computers - excludehostgroups: alioth - name: process - stunnel4 - puppet-ekeyd is crazy nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-stunnel-sanity" hostgroups: computers - excludehostgroups: alioth excludehosts: czerny, grnet-node01, storace # }}} # {{{ anti-services @@ -1927,7 +1935,7 @@ services: - name: unwanted process - rpc.statd nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C rpc.statd" - hostgroups: stretch + hostgroups: stretch buster excludehosts: storace - name: unwanted process - inetd @@ -2045,7 +2053,12 @@ services: - name: HW - OpenManage status nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-openmanage" - hostgroups: pe1950 + hostgroups: pe1950, r540 + excludehosts: wieck, schumann + - + name: HW - OpenManage status + nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-openmanage -b bp=0 -b bat_charge=0:0" + hosts: wieck, schumann # }}} # }}} # {{{ ### mail stuff @@ -2074,6 +2087,15 @@ services: name: mail queue nrpe: "/usr/lib/nagios/plugins/check_mailq -M exim -w 1000 -c 2000" hostgroups: heavy-exim + - + name: process - fail2ban + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -C fail2ban-server" + hostgroups: heavy-exim, heavy-postfix + - + name: unwanted process - fail2ban + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C fail2ban-server" + hostgroups: computers + excludehostgroups: heavy-exim, heavy-postfix # }}} # {{{ clamav - @@ -2168,23 +2190,23 @@ services: - name: process - weightd - master nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (master)'" - hostgroups: heavy-postfix, alioth + hostgroups: heavy-postfix - name: process - weightd - cache nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (cache)'" - hostgroups: heavy-postfix, alioth + hostgroups: heavy-postfix depends: process - weightd - master - name: process - weightd - child nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:50 -c 1: -u polw -a 'policyd-weight (child)'" - hostgroups: heavy-postfix, alioth + hostgroups: heavy-postfix depends: process - weightd - master ### - name: unwanted process - policyd-weight nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C policyd-weight" hostgroups: computers - excludehostgroups: heavy-postfix, alioth + excludehostgroups: heavy-postfix # }}} # {{{ postfix ### @@ -2394,15 +2416,11 @@ services: name: unwanted process - postgresql nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C postgres" hostgroups: computers - excludehostgroups: postgres94-hosts, postgres96-hosts + excludehostgroups: postgres96-hosts - name: unwanted process - postgresql 9.0 nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C postgres -a '9.0/bin/postgres'" hostgroups: computers - - - name: process - postgresql94 - master - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.4/bin/postgres'" - hostgroups: postgres94-hosts - name: process - postgresql96 - master nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.6/bin/postgres'" @@ -2439,7 +2457,7 @@ services: name: process - statd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u statd -C rpc.statd -a '/sbin/rpc.statd'" hostgroups: nfs-client, nfs-server - excludehostgroups: stretch + excludehostgroups: stretch buster - name: process - nfsd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u root -C nfsd -a '[nfsd]'" @@ -2685,6 +2703,16 @@ services: check: "dsa_check_staticsync!miniconf10.debconf.org" hosts: global servicegroups: mirror + - + name: mirror static sync - wiki + check: "dsa_check_staticsync!wiki.debconf.org" + hosts: global + servicegroups: mirror + - + name: mirror static sync - www + check: "dsa_check_staticsync!www.debconf.org" + hosts: global + servicegroups: mirror # }}} # {{{ DNS - @@ -2881,6 +2909,14 @@ services: check_interval: 60 hosts: vogler #### + - + name: Debian SMTP CA + nrpe: "sudo -u puppet /usr/lib/nagios/plugins/dsa-check-cert-expire /srv/puppet.debian.org/ca/ca.crt" + hosts: handel + check_interval: 60 + max_check_attempts: 2 + retry_interval: 5 + #### - name: puppetmaster cert nrpe: "sudo -u puppet /usr/lib/nagios/plugins/dsa-check-cert-expire /var/lib/puppet/ssl/certs/ca.pem" @@ -2894,6 +2930,37 @@ services: hostgroups: computers check_interval: 60 retry_interval: 15 + #### + - + name: ping peer on mgmt network + nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.184.12 -w 50,10% -c 200,30%" + hosts: conova-node01 + check_interval: 5 + max_check_attempts: 4 + retry_interval: 1 + - + name: ping peer on mgmt network + nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.184.11 -w 50,10% -c 200,30%" + hosts: conova-node02 + check_interval: 5 + max_check_attempts: 4 + retry_interval: 1 + + - + name: ping peer on mgmt network + nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.182.14 -w 50,10% -c 200,30%" + hosts: manda-node03 + check_interval: 5 + max_check_attempts: 4 + retry_interval: 1 + - + name: ping peer on mgmt network + nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.182.13 -w 50,10% -c 200,30%" + hosts: manda-node04 + check_interval: 5 + max_check_attempts: 4 + retry_interval: 1 + # }}} # }}} # }}}