address: 72.52.94.70
parents: gw-ubcece
hostgroups: layer3-infrastructure
- gw-karlsruhe:
- address: 129.143.59.214
- parents: gw-ubcece
- hostgroups: layer3-infrastructure
gw-leaseweb:
address: 185.17.185.190
parents: gw-ubcece
parents: gw-ubcece
hostgroups: layer3-infrastructure
gw-unicamp:
- address: 177.220.10.129
+ address: 143.106.167.113
parents: gw-ubcece
hostgroups: layer3-infrastructure
gw-utwente:
arm-arm-01:
address: 217.140.96.58
parents: gw-arm
- hostgroups: computers, hassrvfs, buildd, stretch, broken_mq
+ hostgroups: computers, hassrvfs, buildd, stretch, broken_mq, sw-raid
arm-arm-03:
address: 217.140.96.60
parents: gw-arm
- hostgroups: computers, hassrvfs, buildd, stretch, broken_mq
+ hostgroups: computers, hassrvfs, buildd, stretch, broken_mq, sw-raid
arm-arm-04:
address: 217.140.96.61
parents: gw-arm
- hostgroups: computers, hassrvfs, buildd, stretch, broken_mq
+ hostgroups: computers, hassrvfs, buildd, stretch, broken_mq, sw-raid
harris:
address: 217.140.96.66
parents: gw-arm
moszumanska:
address: 5.153.231.21
parents: ganeti-bytemark
- contact_groups: alioth-admins
- hostgroups: computers, general, wheezy, postgres91-hosts, apache2-hosts, acpid-hosts, apache-https, brokensamhain, no-bacula, bind9-hosts, xinetd-hosts, alioth, heavy-exim, spamd
- no-servicegroups: true
+ hostgroups: secondary-IPs
dillon:
address: 5.153.231.22
parents: ganeti-bytemark
pkgmirror-csail:
address: 128.31.0.51
parents: ganeti-csail
- hostgroups: computers, service, kvmdomains, stretch, apache2-hosts, no-bacula, apache-https, hassrvfs, systemd-timesyncd
+ hostgroups: computers, service, kvmdomains, stretch, apache2-hosts, no-bacula, apache-https, hassrvfs, systemd-timesyncd, varnish-hosts
usper:
address: 128.31.0.69
parents: ganeti-csail
lw01:
address: 185.17.185.177
parents: gw-leaseweb
- hostgroups: computers, service, jessie, dl180, nfs-server, rsyncd-hosts
+ hostgroups: computers, service, stretch, dl180, nfs-server, rsyncd-hosts
lw02:
address: 185.17.185.178
parents: gw-leaseweb
- hostgroups: computers, service, jessie, dl180, nfs-server, rsyncd-hosts
+ hostgroups: computers, service, stretch, dl180, nfs-server, rsyncd-hosts
lw03:
address: 185.17.185.179
parents: gw-leaseweb
- hostgroups: computers, service, jessie, dl180, nfs-server, rsyncd-hosts
+ hostgroups: computers, service, stretch, dl180, nfs-server, rsyncd-hosts
lw04:
address: 185.17.185.180
parents: gw-leaseweb
- hostgroups: computers, service, jessie, dl180, nfs-server, rsyncd-hosts
+ hostgroups: computers, service, stretch, dl180, nfs-server, rsyncd-hosts
lw07:
address: 185.17.185.187
parents: gw-leaseweb
- hostgroups: computers, service, jessie, dl180, nfs-client, autofs, hassrvfs, postgres94-hosts, apache2-hosts
+ hostgroups: computers, service, stretch, dl180, nfs-client, autofs, hassrvfs, postgres96-hosts, apache2-hosts, haproxy-hosts, haproxy-https-host, varnish-hosts
+ lw07-2:
+ address: 185.17.185.185
+ parents: lw07
+ hostgroups: secondary-IPs, https-service
+
lw08:
address: 185.17.185.189
parents: gw-leaseweb
- hostgroups: computers, service, jessie, dl180, nfs-client, autofs, hassrvfs, apache2-hosts
+ hostgroups: computers, service, stretch, dl180, nfs-client, autofs, hassrvfs, apache2-hosts
lw09:
address: 185.17.185.181
parents: gw-leaseweb
parents: gw-leaseweb
hostgroups: computers, service, stretch, dl180
# }}}
- # {{{ gw-karlsruhe
- zemlinsky:
- address: 129.143.160.6
- parents: gw-karlsruhe
- hostgroups: computers, buildd, stretch
- contacts: pkern
- # }}}
# {{{ gw-manda
czerny:
address: 82.195.75.109
address: 140.211.166.197
parents: pieta
hostgroups: computers, hassrvfs, buildd, jessie
- partch:
- address: 140.211.15.152
- parents: gw-osuosl
- hostgroups: computers, jessie, hassrvfs, porterbox, sw-raid
# }}}
# {{{ gw-sanger
sallinen:
address: 193.62.202.26
parents: gw-sanger
- hostgroups: computers, service, stretch, dl380, nfs-client, autofs, postgres96-hosts
+ hostgroups: computers, service, stretch, dl380, nfs-client, autofs, postgres96-hosts, apache2-hosts, haproxy-hosts, haproxy-https-host, varnish-hosts
+ sallinen-2:
+ address: 193.62.202.27
+ parents: sallinen
+ hostgroups: secondary-IPs, https-service
sibelius:
address: 193.62.202.28
parents: gw-sanger
- hostgroups: computers, postgres94-hosts, service, apache2-hosts, sw-raid, jessie, rsyncd-hosts, hasvarlogfs, multipath-hosts, nfs-server
- contacts: tjrc1, dave
- smetana:
- address: 193.62.202.29
- parents: gw-sanger
- hostgroups: computers, sw-raid, sparc, wheezy, no-bacula
+ hostgroups: computers, postgres94-hosts, service, apache2-hosts, sw-raid, jessie, rsyncd-hosts, hasvarlogfs, multipath-hosts, nfs-server, varnish-hosts
contacts: tjrc1, dave
# }}}
# {{{ gw-scanplus
address: 209.87.16.46
parents: ubc-gateway
hostgroups: computers, service, kvmdomains, stretch, systemd-timesyncd, apache2-hosts, apache-https, broken_https_default_vhost
+ kantuser:
+ address: 209.87.16.47
+ parents: ubc-gateway
+ hostgroups: computers, service, kvmdomains, stretch, systemd-timesyncd, apache2-hosts
+ grabbe:
+ address: 209.87.16.48
+ parents: ubc-gateway
+ hostgroups: computers, service, kvmdomains, stretch, systemd-timesyncd, apache2-hosts, apache-https
# }}}
# {{{ gw-umn
#saens:
# }}}
# {{{ gw-unicamp
prokofiev:
- address: 177.220.10.140
+ address: 143.106.167.124
parents: gw-unicamp
hostgroups: computers, stretch, service, manyprocesses
powerpc-unicamp-01:
- address: 177.220.10.141
+ address: 143.106.167.120
parents: prokofiev
hostgroups: computers, hassrvfs, buildd, jessie
ppc64el-unicamp-01:
- address: 177.220.10.142
+ address: 143.106.167.121
parents: prokofiev
hostgroups: computers, hassrvfs, buildd, stretch
plummer:
- address: 177.220.10.143
+ address: 143.106.167.122
parents: prokofiev
hostgroups: computers, porterbox, hassrvfs, stretch
# }}}
alias: Dell PowerEdge 1950 hosts
private: 1
- wheezy:
- alias: Hosts running wheezy
jessie:
alias: Hosts running jessie
stretch:
xinetd-hosts:
alias: hosts providing services via xinetd
private: 1
- postgres91-hosts:
- alias: hosts running postgres91
- private: 1
postgres94-hosts:
alias: hosts running postgres94
private: 1
alias: hosts with lots and lots of (kernel) processes
crazymanyprocesses:
alias: hosts with stupidly lots of processes
+ varnish-hosts:
+ alias: hosts running varnish
+ private: 1
+ haproxy-hosts:
+ alias: hosts running haproxy
+ private: 1
+ haproxy-https-host:
+ alias: "host providing https on the standard port via haproxy"
+ private: 1
no-bacula:
alias: hosts which are not being backed up with bacula
servicegroups: diskspace
nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /storage/snapshot-farm-10"
hosts: lw10
+
+ -
+ name: disk usage on nfs farm 1
+ servicegroups: diskspace
+ nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /auto.dsa/snapshot-1"
+ hosts: lw07
+ -
+ name: disk usage on nfs farm 2
+ servicegroups: diskspace
+ nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /auto.dsa/snapshot-2"
+ hosts: lw07
+ -
+ name: disk usage on nfs farm 3
+ servicegroups: diskspace
+ nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /auto.dsa/snapshot-3"
+ hosts: lw07
+ -
+ name: disk usage on nfs farm 4
+ servicegroups: diskspace
+ nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /auto.dsa/snapshot-4"
+ hosts: lw07
+ -
+ name: disk usage on nfs farm 09
+ servicegroups: diskspace
+ nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /auto.dsa/snapshot-09"
+ hosts: lw07
+ -
+ name: disk usage on nfs farm 10
+ servicegroups: diskspace
+ nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /auto.dsa/snapshot-10"
+ hosts: lw07
+
-
name: disk usage on /srv/morgue.debian.org/
servicegroups: diskspace
name: puppetized firewall
nrpe: "/usr/lib/nagios/plugins/dsa-check-file -w -f /etc/ferm/conf.d/defs.conf"
hostgroups: computers
- -
- name: process - ulogd
- nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C ulogd -a '/usr/sbin/ulogd -d'"
- hostgroups: computers
- excludehostgroups: sparc, jessie, stretch
-
name: process - ulogd
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u ulog -C ulogd -a '/usr/sbin/ulogd --daemon --uid ulog'"
name: system time synced
nrpe: "/usr/lib/nagios/plugins/dsa-check-timedatectl -s"
hostgroups: computers
- excludehostgroups: systemd-timesyncd, wheezy
+ excludehostgroups: systemd-timesyncd
servicegroups: time
-
name: system time synced
name: process - irqbalance
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C irqbalance -a '/usr/sbin/irqbalance'"
hostgroups: computers
- excludehosts: harris, smetana
+ excludehosts: harris
###
-
name: process - cron
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -p 1 -C systemd-udevd -a '/lib/systemd/systemd-udevd'"
hostgroups: jessie, stretch
###
- -
- name: process - acpid
- nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C acpid -a '/usr/sbin/acpid'"
- hostgroups: acpid-hosts
- excludehostgroups: jessie, stretch
-
name: unexpected process - acpid
nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C acpid"
-
name: process - stunnel4 - puppet-ekeyd
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -u stunnel4 -C stunnel4 -a '/usr/bin/stunnel4 /etc/stunnel/puppet-ekeyd.conf'"
- hostgroups: wheezy, jessie, stretch
+ hostgroups: jessie, stretch
excludehostgroups: alioth
-
name: process - stunnel4 - puppet-ekeyd is crazy
name: "sso CRL"
nrpe: "if [ -e /var/lib/dsa/sso/ca.crl ]; then /usr/lib/nagios/plugins/dsa-check-crl-expire -w 129600 -c 86400 /var/lib/dsa/sso/ca.crl; else echo 'No sso/ca.crl on this host.'; fi"
hostgroups: computers
- -
- name: SSL certs - puppet
- hosts: global
- remotecheck: "/usr/lib/nagios/plugins/dsa-check-cert-expire-dir /etc/puppet/modules/ssl/files/servicecerts"
- runfrom: handel
-
name: SSL certs - LE
hosts: global
runfrom: handel
# }}}
# {{{ HW health/raid
- -
- name: process - mdadm monitor
- servicegroups: raid
- nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C mdadm -a '/sbin/mdadm --monitor --pid-file /run/mdadm/monitor.pid --daemonise --scan'"
- hostgroups: sw-raid
- excludehostgroups: jessie, stretch
-
name: process - mdadm monitor
servicegroups: raid
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C mdadm -a '/sbin/mdadm --monitor --scan'"
hostgroups: sw-raid
- excludehostgroups: wheezy
-
name: RAID - sw raid
servicegroups: raid
nrpe: "/usr/lib/nagios/plugins/check_clamd -H /var/run/clamav/clamd.ctl"
hostgroups: heavy-exim, heavy-postfix
depends: process - clamav - clamd
- -
- name: process - clamav - freshclam
- nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u clamav -C freshclam -a '/usr/bin/freshclam -d --quiet'"
- hostgroups: heavy-exim, heavy-postfix
- excludehostgroups: jessie, stretch
-
name: process - clamav - freshclam
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u clamav -C freshclam -a '/usr/bin/freshclam -d --foreground=true'"
hostgroups: heavy-exim, heavy-postfix
- excludehostgroups: wheezy
-
name: unwanted process - clamav
nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C clamd"
excludehostgroups: heavy-exim, heavy-postfix
# }}}
# {{{ anti-spam
- -
- name: process - spamd - master
- nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd --create-prefs --max-children 5 --helper-home-dir -d --pidfile=/var/run/spamd.pid'"
- hostgroups: spamd
- excludehosts: picconi
- excludehostgroups: jessie, stretch
-
name: process - spamd - master
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd -d --pidfile=/var/run/spamd.pid --create-prefs --max-children 5 --helper-home-dir'"
hostgroups: spamd
excludehosts: picconi
- excludehostgroups: wheezy
-
name: process - spamd - master
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd -d --pidfile=/var/run/spamd.pid --create-prefs --max-children 20 --min-spare=5 --helper-home-dir'"
hostgroups: computers
###
- -
- name: process - postgrey
- nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgrey -a '/usr/sbin/postgrey --pidfile=/var/run/postgrey.pid --daemonize --unix=/var/run/postgrey/socket --retry-window=4 --auto-whitelist-clients=10 --exim'"
- hostgroups: heavy-exim
- excludehostgroups: jessie, stretch
-
name: process - postgrey
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgrey -a 'postgrey --pidfile=/var/run/postgrey.pid --daemonize --unix=/var/run/postgrey/socket --retry-window=4 --auto-whitelist-clients=10 --exim'"
hostgroups: heavy-exim
- excludehostgroups: wheezy
-
name: process - postgrey
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgrey -a 'postgrey --pidfile=/var/run/postgrey.pid --daemonize --inet=127.0.0.1:60000'"
-
name: network service - https cert
check: dsa_check_cert!443
- hostgroups: apache-https, https-service
+ hostgroups: apache-https, https-service, haproxy-https-host
depends: network service - https
check_interval: 60
-
name: unwanted network service - https
check: dsa_check_port_closed!443
hostgroups: apache2-hosts
- excludehostgroups: apache-https
+ excludehostgroups: apache-https, haproxy-https-host
check_interval: 60
+
+ ###
+ -
+ name: process - haproxy - master
+ nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -a '/usr/sbin/haproxy-systemd-wrapper'"
+ hostgroups: haproxy-hosts
+ -
+ name: process - haproxy - worker
+ nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1:15 -u haproxy -a '/usr/sbin/haproxy '"
+ hostgroups: haproxy-hosts
+ depends: process - haproxy - master
+ -
+ name: network service - https
+ check: check_https
+ hostgroups: haproxy-https-host
+ depends: "process - haproxy - master"
+ check_interval: 120
+
+ -
+ name: unwanted process - haproxy
+ nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C haproxy"
+ hostgroups: computers
+ excludehostgroups: haproxy-hosts
+
+ ###
+ -
+ name: process - varnish
+ nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:2 -c 1:15 -u vcache -a '/usr/sbin/varnishd -j unix,user=vcache -F -a '"
+ hostgroups: varnish-hosts
+ excludehostgroups: jessie
+ -
+ name: unwanted process - varnish
+ nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C varnishd"
+ hostgroups: computers
+ excludehostgroups: varnish-hosts
+
+
# }}}
# {{{ FTP
-
name: unwanted process - postgresql
nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C postgres"
hostgroups: computers
- excludehostgroups: postgres91-hosts, postgres94-hosts, postgres96-hosts
+ excludehostgroups: postgres94-hosts, postgres96-hosts
-
name: unwanted process - postgresql 9.0
nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C postgres -a '9.0/bin/postgres'"
hostgroups: computers
- -
- name: process - postgresql91 - master
- nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.1/bin/postgres'"
- hostgroups: postgres91-hosts
-
name: process - postgresql94 - master
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.4/bin/postgres'"
name: puppet - agent check
nrpe: "/usr/lib/nagios/plugins/dsa-check-statusfile /var/cache/dsa/nagios/puppet-agent"
hostgroups: computers
- excludehosts: moszumanska
check_interval: 60
retry_interval: 15
# }}}