address: 130.239.18.121
parents: gw-accumu
hostgroups: computers, buildd, hassrvfs, jessie
+ boman:
+ address: 130.239.18.124
+ parents: gw-accumu
+ hostgroups: computers, service, jessie, apache2-hosts, hassrvfs
# }}}
# {{{ gw-aql
eller:
fasolo:
address: 138.16.160.17
parents: gw-brown
- hostgroups: computers, service, apache2-hosts, apache-https, dl380, hpnewraid, rsyncd-hosts, xinetd-hosts, jessie, hassrvfs, postgres94-hosts
+ hostgroups: computers, service, apache2-hosts, apache-https, dl380, hpnewraid, rsyncd-systemd-hosts, jessie, hassrvfs, postgres94-hosts
# }}}
# {{{ gw-bytemark
bm-bl1:
bilbao:
address: 5.153.231.37
parents: ganeti-bytemark
- hostgroups: computers, service, kvmdomains, jessie, autofs, nfs-client, systemd-timesyncd, apache2-hosts
+ hostgroups: computers, service, kvmdomains, jessie, systemd-timesyncd, apache2-hosts, hassrvfs
sor:
address: 5.153.231.38
parents: ganeti-bytemark
address: 5.153.231.44
parents: ganeti-bytemark
hostgroups: computers, service, kvmdomains, jessie, autofs, nfs-client, apache2-hosts, apache-https, systemd-timesyncd
+
+ casulana:
+ address: 5.153.231.41
+ parents: gw-bytemark
+ hostgroups: computers, service, stretch, hpnewraid, hassrvfs, dl380
# }}}
# {{{ gw-c3sl
santoro:
address: 217.196.149.228
parents: gw-conova
hostgroups: computers, jessie, service, sw-raid
+ mirror-conova:
+ address: 217.196.149.229
+ parents: gw-conova
+ hostgroups: computers, jessie, service
arm-conova-01:
address: 217.196.149.230
porta:
address: 194.177.211.207
parents: ganeti-grnet
- hostgroups: computers, service, hassrvfs, kvmdomains, jessie, rsyncd-hosts, xinetd-hosts, systemd-timesyncd
+ hostgroups: computers, service, hassrvfs, kvmdomains, jessie, rsyncd-systemd-hosts, xinetd-hosts, systemd-timesyncd
melartin:
address: 194.177.211.208
parents: ganeti-grnet
kaufmann:
address: 82.195.75.107
parents: ganeti3
- hostgroups: computers, service, apache2-hosts, rsyncd-hosts, kvmdomains, xinetd-hosts, jessie, apache-https
+ hostgroups: computers, service, apache2-hosts, rsyncd-systemd-hosts, kvmdomains, jessie, apache-https
stockhausen:
address: 82.195.75.108
parents: ganeti3
seger:
address: 82.195.75.93
parents: ganeti3
- hostgroups: computers, service, apache2-hosts, hassrvfs, hasbootfs, rsyncd-hosts, uploadqueue, kvmdomains, xinetd-hosts, apache-https, postgres94-hosts, jessie
+ hostgroups: computers, service, apache2-hosts, hassrvfs, hasbootfs, rsyncd-systemd-hosts, uploadqueue, kvmdomains, xinetd-hosts, apache-https, postgres94-hosts, jessie
# }}}
# {{{ gw-marist
zani:
# }}}
# {{{ gw-osuosl
busoni:
- address: 140.211.15.34
+ address: 140.211.166.202
parents: gw-osuosl
hostgroups: computers, service, dl360, hassrvfs, jessie, hasvarlogfs, apache2-hosts, no-bacula, apache-https
hostgroups: computers, jessie, hassrvfs, porterbox, sw-raid
# }}}
# {{{ gw-sanger
+ sallinen:
+ address: 193.62.202.26
+ parents: gw-sanger
+ hostgroups: computers, service, jessie, dl380, hpnewraid
sibelius:
address: 193.62.202.28
parents: gw-sanger
- hostgroups: computers, postgres94-hosts, service, apache2-hosts, sw-raid, jessie, rsyncd-hosts, xinetd-hosts, hasvarlogfs, multipath-hosts
+ hostgroups: computers, postgres94-hosts, service, apache2-hosts, sw-raid, jessie, rsyncd-systemd-hosts, hasvarlogfs, multipath-hosts
contacts: tjrc1, dave
smetana:
address: 193.62.202.29
gretchaninov:
address: 209.87.16.36
parents: ubc-gateway
- hostgroups: computers, general, kvmdomains, jessie, hassrvfs, nfs-server, apache2-hosts, xinetd-hosts
+ hostgroups: computers, general, kvmdomains, jessie, hassrvfs, nfs-server, apache2-hosts, xinetd-hosts, apache-https
tye:
address: 209.87.16.37
parents: ubc-gateway
alias: Hosts running jessie
jessie-freebsd:
alias: kFreebsd hosts running jessie
+ stretch:
+ alias: Hosts running stretch
kvmdomains:
alias: Hosts that are KVM domains
rsyncd-hosts:
alias: hosts providing rsync services via xinetd
private: 1
+ rsyncd-systemd-hosts:
+ alias: hosts providing rsync services via systemd
+ private: 1
xinetd-hosts:
alias: hosts providing services via xinetd
private: 1
excludehostgroups: layer3-infrastructure, high-RTT
check_interval: 5
max_check_attempts: 4
- retry_check_interval: 1
+ retry_interval: 1
-
name: PING
check: "check_ping!600.0,20%!900.0,40%"
hostgroups: high-RTT
check_interval: 5
max_check_attempts: 4
- retry_check_interval: 1
+ retry_interval: 1
-
name: PING
check: "check_ping!2000.0,60%!3000.0,80%"
hostgroups: layer3-infrastructure
check_interval: 5
max_check_attempts: 4
- retry_check_interval: 1
+ retry_interval: 1
# }}}
# {{{ ### disk usage
-
name: processes - total
nrpe: "/usr/lib/nagios/plugins/check_procs 620 700"
hostgroups: computers
- excludehosts: prokofiev, pieta, ubc-enc2bl01, ubc-enc2bl02, ubc-enc2bl09, ubc-enc2bl10, fasolo
+ excludehosts: prokofiev, pieta, ubc-enc2bl01, ubc-enc2bl02, ubc-enc2bl09, ubc-enc2bl10, fasolo, casulana
-
name: processes - total
nrpe: "/usr/lib/nagios/plugins/check_procs 1500 1700"
- hosts: prokofiev, pieta, ubc-enc2bl01, ubc-enc2bl02, ubc-enc2bl09, ubc-enc2bl10, fasolo
+ hosts: prokofiev, pieta, ubc-enc2bl01, ubc-enc2bl02, ubc-enc2bl09, ubc-enc2bl10, fasolo, casulana
-
name: swap usage - percent
nrpe: "/usr/lib/nagios/plugins/check_swap -w 20% -c 10%"
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C getty -a /sbin/getty"
hostgroups: computers
excludehosts: zelenka, zandonai
- excludehostgroups: jessie
+ excludehostgroups: jessie, stretch
-
name: process - getty
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C agetty -a /sbin/getty"
-
name: process - getty
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C agetty -a /sbin/agetty"
- hostgroups: jessie
+ hostgroups: jessie, stretch
excludehostgroups: freebsd
-
name: system - filesystem check
nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-filesystems"
check_interval: 60
- retry_check_interval: 15
+ retry_interval: 15
hostgroups: computers
# }}}
# {{{ backup
excludehosts: backuphost, storace
check_interval: 60
max_check_attempts: 2
- retry_check_interval: 5
+ retry_interval: 5
-
name: backup server config
servicegroups: backup
hosts: storace
check_interval: 60
max_check_attempts: 2
- retry_check_interval: 5
+ retry_interval: 5
-
name: backup - bacula - last backup
servicegroups: backup
hostgroups: computers
excludehostgroups: buildd, porterbox, no-bacula
check_interval: 60
- retry_check_interval: 15
+ retry_interval: 15
-
name: backup - bacula - last full backup
servicegroups: backup
hostgroups: computers
excludehostgroups: buildd, porterbox, no-bacula
check_interval: 60
- retry_check_interval: 15
+ retry_interval: 15
-
name: process - bacula-dir
servicegroups: backup
nrpe: "/usr/lib/nagios/plugins/dsa-check-running-kernel"
hostgroups: computers
check_interval: 60
- retry_check_interval: 5
+ retry_interval: 5
-
name: apt - security updates
servicegroups: apt
nrpe: "/usr/lib/nagios/plugins/dsa-check-statusfile /var/cache/dsa/nagios/apt"
hostgroups: computers
check_interval: 60
- retry_check_interval: 15
+ retry_interval: 15
-
name: unexpected file - apt sources.list
servicegroups: apt
hostgroups: computers
excludehostgroups: freebsd
check_interval: 60
- retry_check_interval: 15
+ retry_interval: 15
notification_interval: 10080
-
name: installed firewall
name: process - ulogd
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C ulogd -a '/usr/sbin/ulogd -d'"
hostgroups: computers
- excludehostgroups: freebsd, sparc, jessie
+ excludehostgroups: freebsd, sparc, jessie, stretch
-
name: process - ulogd
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u ulog -C ulogd -a '/usr/sbin/ulogd --daemon --uid ulog'"
- hostgroups: jessie
+ hostgroups: jessie, stretch
excludehostgroups: freebsd
-
name: unexpected process - ulogd
hostgroups: computers
depends: process - samhain
check_interval: 60
- retry_check_interval: 5
+ retry_interval: 5
excludehostgroups: brokensamhain
-
name: processes - samhain zombies
name: process - syslog-ng
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C syslog-ng -a '/sbin/syslog-ng -p /var/run/syslog-ng.pid'"
hostgroups: computers
- excludehostgroups: freebsd, jessie
+ excludehostgroups: freebsd, jessie, stretch
-
name: process - syslog-ng
nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:2 -c 2: -u root -C syslog-ng -a '/sbin/syslog-ng -p /var/run/syslog-ng.pid'"
-
name: process - syslog-ng
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C syslog-ng -a '/sbin/syslog-ng -F'"
- hostgroups: jessie
+ hostgroups: jessie, stretch
excludehostgroups: freebsd
-
name: process - nrpe
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:25 -c 1: -u nagios -C nrpe -a '/usr/sbin/nrpe -c /etc/nagios/nrpe.cfg -d'"
hostgroups: computers
+ excludehostgroups: stretch
+ max_check_attempts: -1
+ depends: network service - nrpe
+ -
+ name: process - nrpe
+ nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:25 -c 1: -u nagios -C nrpe -a '/usr/sbin/nrpe -c /etc/nagios/nrpe.cfg -f'"
+ hostgroups: stretch
max_check_attempts: -1
depends: network service - nrpe
###
name: process - monit
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C monit -a '/usr/bin/monit -d 300 -I -c /etc/monit/monitrc -s /var/lib/monit/monit.state'"
hostgroups: computers
- excludehostgroups: alioth, jessie
+ excludehostgroups: alioth, jessie, stretch
###
-
name: MQ connection on rainier
runfrom: rainier
hostgroups: computers
check_interval: 60
- retry_check_interval: 15
+ retry_interval: 15
excludehostgroups: alioth, broken_mq
-
name: MQ connection on rapoport
runfrom: rapoport
hostgroups: computers
check_interval: 60
- retry_check_interval: 15
+ retry_interval: 15
excludehostgroups: alioth, broken_mq
###
-
name: process - udevd
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -p 1 -C udevd -a 'udevd'"
hostgroups: computers
- excludehostgroups: freebsd, jessie
+ excludehostgroups: freebsd, jessie, stretch
-
name: process - udevd
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -p 1 -C systemd-udevd -a '/lib/systemd/systemd-udevd'"
- hostgroups: jessie
+ hostgroups: jessie, stretch
excludehostgroups: freebsd
-
name: unexpected process - udev
name: process - acpid
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C acpid -a '/usr/sbin/acpid'"
hostgroups: acpid-hosts
- excludehostgroups: jessie
+ excludehostgroups: jessie, stretch
-
name: unexpected process - acpid
nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C acpid"
-
name: process - stunnel4 - puppet-ekeyd
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -u stunnel4 -C stunnel4 -a '/usr/bin/stunnel4 /etc/stunnel/puppet-ekeyd.conf'"
- hostgroups: wheezy, jessie
+ hostgroups: wheezy, jessie, stretch
excludehostgroups: freebsd, alioth
-
name: process - stunnel4 - puppet-ekeyd is crazy
servicegroups: raid
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C mdadm -a '/sbin/mdadm --monitor --pid-file /run/mdadm/monitor.pid --daemonise --scan'"
hostgroups: sw-raid
- excludehostgroups: jessie
+ excludehostgroups: jessie, stretch
-
name: process - mdadm monitor
servicegroups: raid
nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm"
check_interval: 120
hostgroups: dl380, dl360, bl460, bm-bl
- excludehosts: villa, lobos, storace, mirror-anu
+ excludehosts: villa, lobos, storace, mirror-anu, sallinen
-
name: HW - hpasmcli status
nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --ps-no-redundant"
name: HW - hpasmcli status
nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-ignore-not-present"
check_interval: 120
- hosts: storace
+ hosts: storace, sallinen
-
name: HW - hpasmcli status
nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-ignore-not-present --ps-no-redundant --ignore-failed='PS1'"
name: process - clamav - freshclam
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u clamav -C freshclam -a '/usr/bin/freshclam -d --quiet'"
hostgroups: heavy-exim, heavy-postfix
- excludehostgroups: jessie
+ excludehostgroups: jessie, stretch
-
name: process - clamav - freshclam
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u clamav -C freshclam -a '/usr/bin/freshclam -d --foreground=true'"
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd --create-prefs --max-children 5 --helper-home-dir -d --pidfile=/var/run/spamd.pid'"
hostgroups: spamd
excludehosts: picconi
- excludehostgroups: jessie
+ excludehostgroups: jessie, stretch
-
name: process - spamd - master
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd -d --pidfile=/var/run/spamassassin.pid --create-prefs --max-children 5 --helper-home-dir'"
name: process - postgrey
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgrey -a '/usr/sbin/postgrey --pidfile=/var/run/postgrey.pid --daemonize --unix=/var/run/postgrey/socket --retry-window=4 --auto-whitelist-clients=10 --exim'"
hostgroups: heavy-exim
- excludehostgroups: jessie
+ excludehostgroups: jessie, stretch
-
name: process - postgrey
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgrey -a 'postgrey --pidfile=/var/run/postgrey.pid --daemonize --unix=/var/run/postgrey/socket --retry-window=4 --auto-whitelist-clients=10 --exim'"
contact_groups: +buildd
check_interval: 5
max_check_attempts: 24
- retry_check_interval: 5
+ retry_interval: 5
-
name: processes - lvcreate
nrpe: "/usr/lib/nagios/plugins/check_procs -m 'ELAPSED' -c 500 -C lvcreate -u root -a 'lvcreate'"
hosts: global
check_interval: 15
max_check_attempts: 5
- retry_check_interval: 5
+ retry_interval: 5
servicegroups: mirror
-
name: mirror sync - snapshot
hosts: global
check_interval: 15
max_check_attempts: 5
- retry_check_interval: 5
+ retry_interval: 5
servicegroups: mirror
-
check: "dsa_check_staticsync_nossl!metadata.ftp-master.debian.org"
hosts: global
servicegroups: mirror
+ -
+ name: mirror static sync - mirror-master
+ check: "dsa_check_staticsync_nossl!mirror-master.debian.org"
+ hosts: global
+ servicegroups: mirror
-
name: mirror static sync - deb
check: "dsa_check_staticsync_nossl!deb.debian.org"
hosts: global
servicegroups: mirror
+ -
+ name: mirror static sync - manpages
+ check: "dsa_check_staticsync!manpages.debian.org"
+ hosts: global
+ servicegroups: mirror
-
name: mirror static sync - 10years
hosts: giustini
check_interval: 5
max_check_attempts: 4
- retry_check_interval: 1
+ retry_interval: 1
-
name: Overall Unit Status
remotecheck: "/usr/lib/nagios/plugins/check_snmp -H $HOSTADDRESS$ -C public -P 2c -o connUnitStatus -n -c 3 -w 3"
nrpe: "/usr/lib/nagios/plugins/dsa-check-dchroots-current"
hostgroups: porterbox
check_interval: 60
- retry_check_interval: 15
+ retry_interval: 15
# }}}
# {{{ openstack
# -
-
name: system - all services running
nrpe: "/usr/bin/sudo /bin/systemctl is-system-running"
- hostgroups: jessie
+ hostgroups: jessie, stretch
excludehostgroups: freebsd
###
-
hosts: draghi
depends: process - xinetd
###
+ -
+ name: network service - rsync
+ check: check_tcp!873
+ hostgroups: rsyncd-systemd-hosts
-
name: network service - rsync
check: check_tcp!873
hosts: handel
check_interval: 60
max_check_attempts: 2
- retry_check_interval: 5
+ retry_interval: 5
# }}}
# }}}