X-Git-Url: https://git.adam-barratt.org.uk/?a=blobdiff_plain;f=config%2Fnagios-master.cfg;h=e45ff9ead5bee99be40924a3f39989b55d5fca37;hb=9172aa164eb91df4933e6b466e57145e170d975b;hp=7278b5df29bcd5fe740a42f4e3514624f4377dc5;hpb=141315704546e1677b902059a430bb05cc169f41;p=mirror%2Fdsa-nagios.git diff --git a/config/nagios-master.cfg b/config/nagios-master.cfg index 7278b5d..e45ff9e 100644 --- a/config/nagios-master.cfg +++ b/config/nagios-master.cfg @@ -515,11 +515,11 @@ servers: conova-node01: address: 217.196.149.227 parents: gw-conova - hostgroups: computers, stretch, service, sw-raid + hostgroups: computers, stretch, service, sw-raid, drbd-hosts conova-node02: address: 217.196.149.228 parents: gw-conova - hostgroups: computers, stretch, service, sw-raid + hostgroups: computers, stretch, service, sw-raid, drbd-hosts ganeti-conova: address: 217.196.149.235 parents: gw-conova @@ -752,7 +752,7 @@ servers: handel: address: 82.195.75.104 parents: ganeti3 - hostgroups: computers, service, kvmdomains, apache2-hosts, stretch, postgres96-hosts + hostgroups: computers, service, kvmdomains, apache2-hosts, stretch, postgres96-hosts, hassrvfs kaufmann: address: 82.195.75.107 parents: ganeti3 @@ -830,10 +830,6 @@ servers: address: 140.211.166.196 parents: pieta hostgroups: computers, hassrvfs, buildd, stretch - powerpc-osuosl-01: - address: 140.211.166.197 - parents: pieta - hostgroups: computers, hassrvfs, buildd, jessie # }}} # {{{ gw-sanger sallinen: @@ -1023,10 +1019,6 @@ servers: address: 143.106.167.124 parents: gw-unicamp hostgroups: computers, stretch, service, manyprocesses - powerpc-unicamp-01: - address: 143.106.167.120 - parents: prokofiev - hostgroups: computers, hassrvfs, buildd, jessie ppc64el-unicamp-01: address: 143.106.167.121 parents: prokofiev @@ -1098,9 +1090,6 @@ hostgroups: armhf: alias: armhf private: 1 - sparc: - alias: sparc - private: 1 porterbox: alias: developer accessible porter machines @@ -1281,9 +1270,6 @@ hostgroups: high-RTT: alias: machines with high round trip times private: 1 - alioth: - alias: machines that just are just awkward - private: 1 #openstack-compute: # alias: nodes that run OpenStack compute # private: 1 @@ -1355,7 +1341,6 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-ipv6-default-gw" hostgroups: computers check_interval: 60 - excludehostgroups: alioth # }}} # {{{ ### disk usage - @@ -1540,7 +1525,6 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-config" hostgroups: computers check_interval: 60 - excludehostgroups: alioth - name: setup - local hostname etc-hosts nrpe: 'if getent ahosts `hostname` | grep -q 127.0; then echo "Warning: local hostname resolves to 127/8 address"; exit 1; else echo "OK: Hostname resolves to non-127/8 address."; exit 0; fi' @@ -1654,7 +1638,6 @@ services: servicegroups: backup nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u bacula -C bacula-fd -a '/usr/sbin/bacula-fd -c /etc/bacula/bacula-fd.conf'" hostgroups: computers - excludehostgroups: alioth - name: network backup status - draghi @@ -1708,10 +1691,6 @@ services: name: process - ulogd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u ulog -C ulogd -a '/usr/sbin/ulogd --daemon --uid ulog'" hostgroups: computers - - - name: unexpected process - ulogd - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C ulogd" - hostgroups: sparc #### - name: process - samhain @@ -1745,19 +1724,16 @@ services: remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$" runfrom: lotti hostgroups: computers - excludehostgroups: alioth - name: remote logging on lully remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$" runfrom: lully hostgroups: computers - excludehostgroups: alioth - name: remote logging on loghost-grnet-01 remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$" runfrom: loghost-grnet-01 hostgroups: computers - excludehostgroups: alioth # }}} # {{{ base service - @@ -1847,7 +1823,6 @@ services: name: process - ud-replicated nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C ud-replicated -a '/usr/bin/python /usr/bin/ud-replicated'" hostgroups: computers - excludehostgroups: alioth ### - name: MQ connection on rainier @@ -1857,7 +1832,7 @@ services: hostgroups: computers check_interval: 60 retry_interval: 15 - excludehostgroups: alioth, broken_mq + excludehostgroups: broken_mq - name: MQ connection on rapoport servicegroups: MQ @@ -1866,7 +1841,7 @@ services: hostgroups: computers check_interval: 60 retry_interval: 15 - excludehostgroups: alioth, broken_mq + excludehostgroups: broken_mq ### - name: local resolver @@ -1877,7 +1852,11 @@ services: name: process - unbound nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u unbound -C unbound -a '/usr/sbin/unbound'" hostgroups: computers - excludehostgroups: alioth + - + name: unbound trust anchors + nrpe: "/usr/lib/nagios/plugins/dsa-check-unbound-anchors" + hostgroups: computers + check_interval: 60 ### - name: process - uptimed @@ -1908,12 +1887,10 @@ services: name: process - stunnel4 - puppet-ekeyd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -u stunnel4 -C stunnel4 -a '/usr/bin/stunnel4 /etc/stunnel/puppet-ekeyd.conf'" hostgroups: computers - excludehostgroups: alioth - name: process - stunnel4 - puppet-ekeyd is crazy nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-stunnel-sanity" hostgroups: computers - excludehostgroups: alioth excludehosts: czerny, grnet-node01, storace # }}} # {{{ anti-services @@ -2085,6 +2062,15 @@ services: name: mail queue nrpe: "/usr/lib/nagios/plugins/check_mailq -M exim -w 1000 -c 2000" hostgroups: heavy-exim + - + name: process - fail2ban + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -C fail2ban-server" + hostgroups: heavy-exim, heavy-postfix + - + name: unwanted process - fail2ban + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C fail2ban-server" + hostgroups: computers + excludehostgroups: heavy-exim, heavy-postfix # }}} # {{{ clamav - @@ -2179,23 +2165,23 @@ services: - name: process - weightd - master nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (master)'" - hostgroups: heavy-postfix, alioth + hostgroups: heavy-postfix - name: process - weightd - cache nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (cache)'" - hostgroups: heavy-postfix, alioth + hostgroups: heavy-postfix depends: process - weightd - master - name: process - weightd - child nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:50 -c 1: -u polw -a 'policyd-weight (child)'" - hostgroups: heavy-postfix, alioth + hostgroups: heavy-postfix depends: process - weightd - master ### - name: unwanted process - policyd-weight nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C policyd-weight" hostgroups: computers - excludehostgroups: heavy-postfix, alioth + excludehostgroups: heavy-postfix # }}} # {{{ postfix ### @@ -2905,6 +2891,21 @@ services: hostgroups: computers check_interval: 60 retry_interval: 15 + #### + - + name: ping peer on mgmt network + nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.184.12 -w 50,10% -c 200,30%" + hosts: conova-node01 + check_interval: 5 + max_check_attempts: 4 + retry_interval: 1 + - + name: ping peer on mgmt network + nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.184.11 -w 50,10% -c 200,30%" + hosts: conova-node02 + check_interval: 5 + max_check_attempts: 4 + retry_interval: 1 # }}} # }}}