conova-node01:
address: 217.196.149.227
parents: gw-conova
- hostgroups: computers, stretch, service, sw-raid
+ hostgroups: computers, stretch, service, sw-raid, drbd-hosts
conova-node02:
address: 217.196.149.228
parents: gw-conova
- hostgroups: computers, stretch, service, sw-raid
+ hostgroups: computers, stretch, service, sw-raid, drbd-hosts
ganeti-conova:
address: 217.196.149.235
parents: gw-conova
address: 82.195.75.103
parents: gw-manda
hostgroups: computers, service, dl380, acpid-hosts, stretch, drbd-hosts, manyprocesses
+ manda-node03:
+ address: 82.195.75.69
+ parents: gw-manda
+ hostgroups: computers, service, stretch
+ manda-node04:
+ address: 82.195.75.70
+ parents: gw-manda
+ hostgroups: computers, service, stretch
bendel:
address: 82.195.75.100
parents: ganeti3
handel:
address: 82.195.75.104
parents: ganeti3
- hostgroups: computers, service, kvmdomains, apache2-hosts, stretch, postgres96-hosts
+ hostgroups: computers, service, kvmdomains, apache2-hosts, stretch, postgres96-hosts, hassrvfs
kaufmann:
address: 82.195.75.107
parents: ganeti3
address: 140.211.166.196
parents: pieta
hostgroups: computers, hassrvfs, buildd, stretch
- powerpc-osuosl-01:
- address: 140.211.166.197
- parents: pieta
- hostgroups: computers, hassrvfs, buildd, jessie
# }}}
# {{{ gw-sanger
sallinen:
sibelius:
address: 193.62.202.28
parents: gw-sanger
- hostgroups: computers, postgres94-hosts, service, apache2-hosts, sw-raid, jessie, rsyncd-hosts, hasvarlogfs, multipath-hosts, nfs-server, varnish-hosts
+ hostgroups: computers, service, apache2-hosts, sw-raid, jessie, rsyncd-hosts, hasvarlogfs, multipath-hosts, nfs-server, varnish-hosts
contacts: tjrc1, dave
# }}}
# {{{ gw-scanplus
address: 143.106.167.124
parents: gw-unicamp
hostgroups: computers, stretch, service, manyprocesses
- powerpc-unicamp-01:
- address: 143.106.167.120
- parents: prokofiev
- hostgroups: computers, hassrvfs, buildd, jessie
ppc64el-unicamp-01:
address: 143.106.167.121
parents: prokofiev
armhf:
alias: armhf
private: 1
- sparc:
- alias: sparc
- private: 1
porterbox:
alias: developer accessible porter machines
xinetd-hosts:
alias: hosts providing services via xinetd
private: 1
- postgres94-hosts:
- alias: hosts running postgres94
- private: 1
postgres96-hosts:
alias: hosts running postgres96
private: 1
high-RTT:
alias: machines with high round trip times
private: 1
- alioth:
- alias: machines that just are just awkward
- private: 1
#openstack-compute:
# alias: nodes that run OpenStack compute
# private: 1
nrpe: "/usr/lib/nagios/plugins/dsa-check-ipv6-default-gw"
hostgroups: computers
check_interval: 60
- excludehostgroups: alioth
# }}}
# {{{ ### disk usage
-
nrpe: "/usr/lib/nagios/plugins/dsa-check-config"
hostgroups: computers
check_interval: 60
- excludehostgroups: alioth
-
name: setup - local hostname etc-hosts
nrpe: 'if getent ahosts `hostname` | grep -q 127.0; then echo "Warning: local hostname resolves to 127/8 address"; exit 1; else echo "OK: Hostname resolves to non-127/8 address."; exit 0; fi'
servicegroups: backup
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u bacula -C bacula-fd -a '/usr/sbin/bacula-fd -c /etc/bacula/bacula-fd.conf'"
hostgroups: computers
- excludehostgroups: alioth
-
name: network backup status - draghi
name: process - ulogd
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u ulog -C ulogd -a '/usr/sbin/ulogd --daemon --uid ulog'"
hostgroups: computers
- -
- name: unexpected process - ulogd
- nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C ulogd"
- hostgroups: sparc
####
-
name: process - samhain
remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$"
runfrom: lotti
hostgroups: computers
- excludehostgroups: alioth
-
name: remote logging on lully
remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$"
runfrom: lully
hostgroups: computers
- excludehostgroups: alioth
-
name: remote logging on loghost-grnet-01
remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$"
runfrom: loghost-grnet-01
hostgroups: computers
- excludehostgroups: alioth
# }}}
# {{{ base service
-
name: process - ud-replicated
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C ud-replicated -a '/usr/bin/python /usr/bin/ud-replicated'"
hostgroups: computers
- excludehostgroups: alioth
###
-
name: MQ connection on rainier
hostgroups: computers
check_interval: 60
retry_interval: 15
- excludehostgroups: alioth, broken_mq
+ excludehostgroups: broken_mq
-
name: MQ connection on rapoport
servicegroups: MQ
hostgroups: computers
check_interval: 60
retry_interval: 15
- excludehostgroups: alioth, broken_mq
+ excludehostgroups: broken_mq
###
-
name: local resolver
name: process - unbound
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u unbound -C unbound -a '/usr/sbin/unbound'"
hostgroups: computers
- excludehostgroups: alioth
+ -
+ name: unbound trust anchors
+ nrpe: "/usr/lib/nagios/plugins/dsa-check-unbound-anchors"
+ hostgroups: computers
+ check_interval: 60
###
-
name: process - uptimed
name: process - stunnel4 - puppet-ekeyd
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -u stunnel4 -C stunnel4 -a '/usr/bin/stunnel4 /etc/stunnel/puppet-ekeyd.conf'"
hostgroups: computers
- excludehostgroups: alioth
-
name: process - stunnel4 - puppet-ekeyd is crazy
nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-stunnel-sanity"
hostgroups: computers
- excludehostgroups: alioth
excludehosts: czerny, grnet-node01, storace
# }}}
# {{{ anti-services
name: mail queue
nrpe: "/usr/lib/nagios/plugins/check_mailq -M exim -w 1000 -c 2000"
hostgroups: heavy-exim
+ -
+ name: process - fail2ban
+ nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -C fail2ban-server"
+ hostgroups: heavy-exim, heavy-postfix
+ -
+ name: unwanted process - fail2ban
+ nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C fail2ban-server"
+ hostgroups: computers
+ excludehostgroups: heavy-exim, heavy-postfix
# }}}
# {{{ clamav
-
-
name: process - weightd - master
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (master)'"
- hostgroups: heavy-postfix, alioth
+ hostgroups: heavy-postfix
-
name: process - weightd - cache
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (cache)'"
- hostgroups: heavy-postfix, alioth
+ hostgroups: heavy-postfix
depends: process - weightd - master
-
name: process - weightd - child
nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:50 -c 1: -u polw -a 'policyd-weight (child)'"
- hostgroups: heavy-postfix, alioth
+ hostgroups: heavy-postfix
depends: process - weightd - master
###
-
name: unwanted process - policyd-weight
nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C policyd-weight"
hostgroups: computers
- excludehostgroups: heavy-postfix, alioth
+ excludehostgroups: heavy-postfix
# }}}
# {{{ postfix
###
name: unwanted process - postgresql
nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C postgres"
hostgroups: computers
- excludehostgroups: postgres94-hosts, postgres96-hosts
+ excludehostgroups: postgres96-hosts
-
name: unwanted process - postgresql 9.0
nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C postgres -a '9.0/bin/postgres'"
hostgroups: computers
- -
- name: process - postgresql94 - master
- nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.4/bin/postgres'"
- hostgroups: postgres94-hosts
-
name: process - postgresql96 - master
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.6/bin/postgres'"
hostgroups: computers
check_interval: 60
retry_interval: 15
+ ####
+ -
+ name: ping peer on mgmt network
+ nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.184.12 -w 50,10% -c 200,30%"
+ hosts: conova-node01
+ check_interval: 5
+ max_check_attempts: 4
+ retry_interval: 1
+ -
+ name: ping peer on mgmt network
+ nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.184.11 -w 50,10% -c 200,30%"
+ hosts: conova-node02
+ check_interval: 5
+ max_check_attempts: 4
+ retry_interval: 1
# }}}
# }}}