address: 5.153.231.20
parents: ganeti-bytemark
hostgroups: computers, general, kvmdomains, stretch, nfs-client, autofs, systemd-timesyncd
- moszumanska:
- address: 5.153.231.21
- parents: ganeti-bytemark
- hostgroups: secondary-IPs
dillon:
address: 5.153.231.22
parents: ganeti-bytemark
conova-node01:
address: 217.196.149.227
parents: gw-conova
- hostgroups: computers, stretch, service, sw-raid
+ hostgroups: computers, stretch, service, sw-raid, drbd-hosts
conova-node02:
address: 217.196.149.228
parents: gw-conova
- hostgroups: computers, stretch, service, sw-raid
+ hostgroups: computers, stretch, service, sw-raid, drbd-hosts
ganeti-conova:
address: 217.196.149.235
parents: gw-conova
address: 82.195.75.103
parents: gw-manda
hostgroups: computers, service, dl380, acpid-hosts, stretch, drbd-hosts, manyprocesses
+ manda-node03:
+ address: 82.195.75.69
+ parents: gw-manda
+ hostgroups: computers, service, stretch, r540, drbd-hosts, manyprocesses
+ manda-node04:
+ address: 82.195.75.70
+ parents: gw-manda
+ hostgroups: computers, service, stretch, r540, drbd-hosts, manyprocesses
bendel:
address: 82.195.75.100
parents: ganeti3
draghi:
address: 82.195.75.106
parents: ganeti3
- hostgroups: computers, service, hasbootfs, hassrvfs, apache2-hosts, spamd, heavy-exim, kvmdomains, xinetd-hosts, apache-https, stretch
+ hostgroups: computers, service, hassrvfs, apache2-hosts, spamd, heavy-exim, kvmdomains, xinetd-hosts, apache-https, stretch
geo1:
address: 82.195.75.105
parents: ganeti3
handel:
address: 82.195.75.104
parents: ganeti3
- hostgroups: computers, service, kvmdomains, apache2-hosts, stretch, postgres96-hosts
+ hostgroups: computers, service, kvmdomains, apache2-hosts, stretch, postgres96-hosts, hassrvfs
kaufmann:
address: 82.195.75.107
parents: ganeti3
parents: byrd
hostgroups: computers, service, kvmdomains, stretch, apache2-hosts, hassrvfs, rsyncd-hosts, apache-https
+ pijper:
+ address: 140.211.166.194
+ parents: gw-osuosl
+ hostgroups: computers, stretch, service, manyprocesses
pieta:
address: 140.211.166.195
parents: gw-osuosl
address: 140.211.166.196
parents: pieta
hostgroups: computers, hassrvfs, buildd, stretch
- powerpc-osuosl-01:
- address: 140.211.166.197
- parents: pieta
- hostgroups: computers, hassrvfs, buildd, jessie
# }}}
# {{{ gw-sanger
sallinen:
sibelius:
address: 193.62.202.28
parents: gw-sanger
- hostgroups: computers, postgres94-hosts, service, apache2-hosts, sw-raid, jessie, rsyncd-hosts, hasvarlogfs, multipath-hosts, nfs-server, varnish-hosts
+ hostgroups: computers, service, apache2-hosts, sw-raid, jessie, rsyncd-hosts, hasvarlogfs, multipath-hosts, nfs-server, varnish-hosts
contacts: tjrc1, dave
# }}}
# {{{ gw-scanplus
address: 143.106.167.124
parents: gw-unicamp
hostgroups: computers, stretch, service, manyprocesses
- powerpc-unicamp-01:
- address: 143.106.167.120
- parents: prokofiev
- hostgroups: computers, hassrvfs, buildd, jessie
ppc64el-unicamp-01:
address: 143.106.167.121
parents: prokofiev
armhf:
alias: armhf
private: 1
- sparc:
- alias: sparc
- private: 1
porterbox:
alias: developer accessible porter machines
pe1950:
alias: Dell PowerEdge 1950 hosts
private: 1
+ r540:
+ alias: Dell PowerEdge R540 hosts
+ private: 1
jessie:
alias: Hosts running jessie
xinetd-hosts:
alias: hosts providing services via xinetd
private: 1
- postgres94-hosts:
- alias: hosts running postgres94
- private: 1
postgres96-hosts:
alias: hosts running postgres96
private: 1
high-RTT:
alias: machines with high round trip times
private: 1
- alioth:
- alias: machines that just are just awkward
- private: 1
#openstack-compute:
# alias: nodes that run OpenStack compute
# private: 1
nrpe: "/usr/lib/nagios/plugins/dsa-check-ipv6-default-gw"
hostgroups: computers
check_interval: 60
- excludehostgroups: alioth
# }}}
# {{{ ### disk usage
-
nrpe: "/usr/lib/nagios/plugins/dsa-check-config"
hostgroups: computers
check_interval: 60
- excludehostgroups: alioth
-
name: setup - local hostname etc-hosts
nrpe: 'if getent ahosts `hostname` | grep -q 127.0; then echo "Warning: local hostname resolves to 127/8 address"; exit 1; else echo "OK: Hostname resolves to non-127/8 address."; exit 0; fi'
name: free memory - percent
nrpe: "/usr/lib/nagios/plugins/dsa-check-memory -m pct"
hostgroups: computers
- -
- name: process - getty
- nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C getty -a /sbin/getty"
- hostgroups: computers
- excludehosts: zelenka, zandonai
- excludehostgroups: jessie, stretch
-
name: process - getty
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C agetty -a /sbin/agetty"
- hostgroups: jessie, stretch
+ hostgroups: computers
-
name: processes - zombies
servicegroups: backup
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u bacula -C bacula-fd -a '/usr/sbin/bacula-fd -c /etc/bacula/bacula-fd.conf'"
hostgroups: computers
- excludehostgroups: alioth
-
name: network backup status - draghi
-
name: process - ulogd
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u ulog -C ulogd -a '/usr/sbin/ulogd --daemon --uid ulog'"
- hostgroups: jessie, stretch
- -
- name: unexpected process - ulogd
- nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C ulogd"
- hostgroups: sparc
+ hostgroups: computers
####
-
name: process - samhain
excludehostgroups: brokensamhain
# }}}
# {{{ logging
- -
- name: process - syslog-ng
- nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C syslog-ng -a '/sbin/syslog-ng -p /var/run/syslog-ng.pid'"
- hostgroups: computers
- excludehostgroups: jessie, stretch
-
name: process - syslog-ng
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C syslog-ng -a '/sbin/syslog-ng -F'"
- hostgroups: jessie, stretch
+ hostgroups: computers
-
name: remote logging on lotti
remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$"
runfrom: lotti
hostgroups: computers
- excludehostgroups: alioth
-
name: remote logging on lully
remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$"
runfrom: lully
hostgroups: computers
- excludehostgroups: alioth
-
name: remote logging on loghost-grnet-01
remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$"
runfrom: loghost-grnet-01
hostgroups: computers
- excludehostgroups: alioth
# }}}
# {{{ base service
-
name: process - ud-replicated
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C ud-replicated -a '/usr/bin/python /usr/bin/ud-replicated'"
hostgroups: computers
- excludehostgroups: alioth
- ###
- -
- name: process - monit
- nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C monit -a '/usr/bin/monit -d 300 -I -c /etc/monit/monitrc -s /var/lib/monit/monit.state'"
- hostgroups: computers
- excludehostgroups: alioth, jessie, stretch
###
-
name: MQ connection on rainier
hostgroups: computers
check_interval: 60
retry_interval: 15
- excludehostgroups: alioth, broken_mq
+ excludehostgroups: broken_mq
-
name: MQ connection on rapoport
servicegroups: MQ
hostgroups: computers
check_interval: 60
retry_interval: 15
- excludehostgroups: alioth, broken_mq
+ excludehostgroups: broken_mq
###
-
name: local resolver
name: process - unbound
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u unbound -C unbound -a '/usr/sbin/unbound'"
hostgroups: computers
- excludehostgroups: alioth
- ###
-
- name: process - uptimed
- nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u daemon -C uptimed -a '/usr/sbin/uptimed'"
+ name: unbound trust anchors
+ nrpe: "/usr/lib/nagios/plugins/dsa-check-unbound-anchors"
hostgroups: computers
+ check_interval: 60
###
-
- name: process - udevd
- nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -p 1 -C udevd -a 'udevd'"
+ name: process - uptimed
+ nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u daemon -C uptimed -a '/usr/sbin/uptimed'"
hostgroups: computers
- excludehostgroups: jessie, stretch
-
name: process - udevd
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -p 1 -C systemd-udevd -a '/lib/systemd/systemd-udevd'"
- hostgroups: jessie, stretch
+ hostgroups: computers
###
-
name: unexpected process - acpid
-
name: process - stunnel4 - puppet-ekeyd
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -u stunnel4 -C stunnel4 -a '/usr/bin/stunnel4 /etc/stunnel/puppet-ekeyd.conf'"
- hostgroups: jessie, stretch
- excludehostgroups: alioth
+ hostgroups: computers
-
name: process - stunnel4 - puppet-ekeyd is crazy
nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-stunnel-sanity"
hostgroups: computers
- excludehostgroups: alioth
excludehosts: czerny, grnet-node01, storace
# }}}
# {{{ anti-services
-
name: HW - OpenManage status
nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-openmanage"
- hostgroups: pe1950
+ hostgroups: pe1950, r540
+ excludehosts: wieck, schumann
+ -
+ name: HW - OpenManage status
+ nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-openmanage -b bp=0"
+ hosts: wieck
+ -
+ name: HW - OpenManage status
+ nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-openmanage -b bp=0 -b bat_charge=0:0"
+ hosts: schumann
# }}}
# }}}
# {{{ ### mail stuff
name: mail queue
nrpe: "/usr/lib/nagios/plugins/check_mailq -M exim -w 1000 -c 2000"
hostgroups: heavy-exim
+ -
+ name: process - fail2ban
+ nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -C fail2ban-server"
+ hostgroups: heavy-exim, heavy-postfix
+ -
+ name: unwanted process - fail2ban
+ nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C fail2ban-server"
+ hostgroups: computers
+ excludehostgroups: heavy-exim, heavy-postfix
# }}}
# {{{ clamav
-
-
name: process - weightd - master
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (master)'"
- hostgroups: heavy-postfix, alioth
+ hostgroups: heavy-postfix
-
name: process - weightd - cache
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (cache)'"
- hostgroups: heavy-postfix, alioth
+ hostgroups: heavy-postfix
depends: process - weightd - master
-
name: process - weightd - child
nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:50 -c 1: -u polw -a 'policyd-weight (child)'"
- hostgroups: heavy-postfix, alioth
+ hostgroups: heavy-postfix
depends: process - weightd - master
###
-
name: unwanted process - policyd-weight
nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C policyd-weight"
hostgroups: computers
- excludehostgroups: heavy-postfix, alioth
+ excludehostgroups: heavy-postfix
# }}}
# {{{ postfix
###
name: unwanted process - postgresql
nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C postgres"
hostgroups: computers
- excludehostgroups: postgres94-hosts, postgres96-hosts
+ excludehostgroups: postgres96-hosts
-
name: unwanted process - postgresql 9.0
nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C postgres -a '9.0/bin/postgres'"
hostgroups: computers
- -
- name: process - postgresql94 - master
- nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.4/bin/postgres'"
- hostgroups: postgres94-hosts
-
name: process - postgresql96 - master
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.6/bin/postgres'"
-
name: system - all services running
nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-systemd-services"
- hostgroups: jessie, stretch
+ hostgroups: computers
###
-
name: process - slapd
hostgroups: computers
check_interval: 60
retry_interval: 15
+ ####
+ -
+ name: ping peer on mgmt network
+ nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.184.12 -w 50,10% -c 200,30%"
+ hosts: conova-node01
+ check_interval: 5
+ max_check_attempts: 4
+ retry_interval: 1
+ -
+ name: ping peer on mgmt network
+ nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.184.11 -w 50,10% -c 200,30%"
+ hosts: conova-node02
+ check_interval: 5
+ max_check_attempts: 4
+ retry_interval: 1
+
+ -
+ name: ping peer on mgmt network
+ nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.182.14 -w 50,10% -c 200,30%"
+ hosts: manda-node03
+ check_interval: 5
+ max_check_attempts: 4
+ retry_interval: 1
+ -
+ name: ping peer on mgmt network
+ nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.182.13 -w 50,10% -c 200,30%"
+ hosts: manda-node04
+ check_interval: 5
+ max_check_attempts: 4
+ retry_interval: 1
+ # }}}
# }}}
# }}}