parents: gw-ubcece
hostgroups: layer3-infrastructure
gw-unicamp:
- address: 177.220.10.65
+ address: 177.220.10.129
parents: gw-ubcece
hostgroups: layer3-infrastructure
gw-utwente:
mirror-anu:
address: 150.203.164.39
parents: gw-cecsit
- hostgroups: computers, service, apache2-hosts, rsyncd-hosts, dl360, hassrvfs, xinetd-hosts, jessie, security_mirror, no-bacula, apache-https
+ hostgroups: computers, service, apache2-hosts, dl360, hpnewraid, hassrvfs, xinetd-hosts, jessie, security_mirror, apache-https
+ mirror-anu2:
+ address: 150.203.164.60
+ parents: mirror-anu
+ hostgroups: secondary-IPs
+ mirror-anu3:
+ address: 150.203.164.61
+ parents: mirror-anu
+ hostgroups: secondary-IPs
+ mirror-anu4:
+ address: 150.203.164.62
+ parents: mirror-anu
+ hostgroups: secondary-IPs
# }}}
# {{{ gw-conova
sompek:
mirror-isc:
address: 149.20.20.7
parents: gw-isc
- hostgroups: computers, service, apache2-hosts, rsyncd-hosts, dl360, hassrvfs, xinetd-hosts, jessie, security_mirror, no-bacula
+ hostgroups: computers, service, apache2-hosts, dl360, hpnewraid, hassrvfs, xinetd-hosts, jessie, security_mirror
+ mirror-isc2:
+ address: 149.20.20.19
+ parents: mirror-isc
+ hostgroups: secondary-IPs
+ mirror-isc3:
+ address: 149.20.20.19
+ parents: mirror-isc
+ hostgroups: secondary-IPs
+ mirror-isc-syncproxy:
+ address: 149.20.20.21
+ parents: mirror-isc
+ hostgroups: secondary-IPs
# }}}
# {{{ gw-leaseweb
lw01:
ubc-bl4:
address: 206.12.19.214
parents: sw-ubcece-kais
- hostgroups: computers, bl460, acpid-hosts, service, wheezy, drbd-hosts
+ hostgroups: computers, bl460, acpid-hosts, service, jessie, drbd-hosts
ubc-bl8:
address: 206.12.19.218
parents: sw-ubcece-kais
sonntag:
address: 206.12.19.142
parents: ganeti2
- hostgroups: computers, service, kvmdomains, wheezy, nfs-client, autofs
+ hostgroups: computers, service, kvmdomains, jessie, nfs-client, autofs
menotti:
address: 206.12.19.143
parents: ganeti2
mirror-umn:
address: 128.101.240.212
parents: gw-umn
- hostgroups: computers, service, apache2-hosts, rsyncd-hosts, dl360, hassrvfs, xinetd-hosts, jessie, security_mirror
+ hostgroups: computers, service, apache2-hosts, dl360, hpnewraid, hassrvfs, xinetd-hosts, jessie, security_mirror
+ mirror-umn2:
+ address: 128.101.240.215
+ parents: mirror-umn
+ hostgroups: secondary-IPs
+ mirror-umn3:
+ address: 128.101.240.216
+ parents: mirror-umn
+ hostgroups: secondary-IPs
+ mirror-umn4:
+ address: 128.101.240.217
+ parents: mirror-umn
+ hostgroups: secondary-IPs
# }}}
# {{{ gw-unicamp
prokofiev:
- address: 177.220.10.78
+ address: 177.220.10.140
parents: gw-unicamp
hostgroups: computers, jessie, service
powerpc-unicamp-01:
- address: 177.220.10.79
+ address: 177.220.10.141
parents: prokofiev
hostgroups: computers, hassrvfs, buildd, jessie
ppc64el-unicamp-01:
- address: 177.220.10.80
+ address: 177.220.10.142
parents: prokofiev
hostgroups: computers, hassrvfs, buildd, jessie
plummer:
- address: 177.220.10.81
+ address: 177.220.10.143
parents: prokofiev
hostgroups: computers, porterbox, hassrvfs, jessie
# }}}
general:
alias: general purpose developer accessible machines
+ hpnewraid:
+ alias: new (2015+) machines where we need hpssacli instead of hpacucli
+ private: 1
dl380:
alias: HP DL380 hosts
private: 1
check: "check_ping!350.0,20%!600.0,40%"
hostgroups: pingable
excludehostgroups: layer3-infrastructure, high-RTT
- normal_check_interval: 5
+ check_interval: 5
max_check_attempts: 4
retry_check_interval: 1
-
name: PING
check: "check_ping!600.0,20%!900.0,40%"
hostgroups: high-RTT
- normal_check_interval: 5
+ check_interval: 5
max_check_attempts: 4
retry_check_interval: 1
-
name: PING
check: "check_ping!2000.0,60%!3000.0,80%"
hostgroups: layer3-infrastructure
- normal_check_interval: 5
+ check_interval: 5
max_check_attempts: 4
retry_check_interval: 1
# }}}
name: setup - dsa config
nrpe: "/usr/lib/nagios/plugins/dsa-check-config"
hostgroups: computers
- normal_check_interval: 60
+ check_interval: 60
excludehostgroups: alioth
-
name: setup - local hostname etc-hosts
nrpe: 'if getent ahosts `hostname` | grep -q 127.0; then echo "Warning: local hostname resolves to 127/8 address"; exit 1; else echo "OK: Hostname resolves to non-127/8 address."; exit 0; fi'
hostgroups: computers
- normal_check_interval: 60
+ check_interval: 60
# }}}
# {{{ os health
####
-
name: system - filesystem check
nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-filesystems"
- normal_check_interval: 60
+ check_interval: 60
retry_check_interval: 15
hostgroups: computers
# }}}
nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-dabackup"
hostgroups: computers
excludehosts: backuphost, storace, backuphost
- normal_check_interval: 60
+ check_interval: 60
max_check_attempts: 2
retry_check_interval: 5
-
servicegroups: backup
nrpe: "/usr/lib/nagios/plugins/dsa-check-dabackup-server"
hosts: storace
- normal_check_interval: 60
+ check_interval: 60
max_check_attempts: 2
retry_check_interval: 5
-
runfrom: dinis
hostgroups: computers
excludehostgroups: buildd, porterbox, no-bacula
- normal_check_interval: 60
+ check_interval: 60
retry_check_interval: 15
-
name: backup - bacula - last full backup
runfrom: dinis
hostgroups: computers
excludehostgroups: buildd, porterbox, no-bacula
- normal_check_interval: 60
+ check_interval: 60
retry_check_interval: 15
-
name: process - bacula-fd
servicegroups: kernel
nrpe: "/usr/lib/nagios/plugins/dsa-check-running-kernel"
hostgroups: computers
- normal_check_interval: 60
+ check_interval: 60
retry_check_interval: 5
-
name: apt - security updates
servicegroups: apt
nrpe: "/usr/lib/nagios/plugins/dsa-check-statusfile /var/cache/dsa/nagios/apt"
hostgroups: computers
- normal_check_interval: 60
+ check_interval: 60
retry_check_interval: 15
-
name: unexpected file - apt sources.list
#nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-libs"
hostgroups: computers
excludehostgroups: freebsd
- normal_check_interval: 60
+ check_interval: 60
retry_check_interval: 15
notification_interval: 10080
-
nrpe: "/usr/lib/nagios/plugins/dsa-check-statusfile /var/cache/dsa/nagios/samhain"
hostgroups: computers
depends: process - samhain
- normal_check_interval: 60
+ check_interval: 60
retry_check_interval: 5
excludehostgroups: brokensamhain
-
check: dsa_check_ssh
hostgroups: computers
depends: process - sshd
- normal_check_interval: 60
+ check_interval: 60
notification_interval: 1440
####
-
###
-
name: process - munin-node
- nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C munin-node -a '/usr/sbin/munin-node'"
+ nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C munin-node -a 'munin-node'"
hostgroups: computers
- excludehostgroups: freebsd, armhf
+ excludehostgroups: freebsd
-
name: process - munin-node
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C perl -a '/usr/bin/perl -wT /usr/sbin/munin-node'"
hostgroups: freebsd
- -
- name: process - munin-node
- nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C munin-node -a 'munin-node'"
- hostgroups: wheezy, jessie
- excludehostgroups: freebsd
-
name: network service - munin-node
check: check_tcp!4949
remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa"
runfrom: rainier
hostgroups: computers
- normal_check_interval: 60
+ check_interval: 60
retry_check_interval: 15
excludehostgroups: alioth, broken_mq
-
remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa"
runfrom: rapoport
hostgroups: computers
- normal_check_interval: 60
+ check_interval: 60
retry_check_interval: 15
excludehostgroups: alioth, broken_mq
###
name: local resolver
nrpe: "/usr/lib/nagios/plugins/dsa-check-resolver www.debian.org www.google.com"
hostgroups: computers
- normal_check_interval: 60
+ check_interval: 60
-
name: process - unbound
nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u unbound -C unbound -a '/usr/sbin/unbound'"
name: unwanted process - openvpn
nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C openvpn"
hostgroups: computers
- normal_check_interval: 120
+ check_interval: 120
-
name: unwanted process - gkrellmd
nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C gkrellmd"
name: HW - hpacucli status
servicegroups: raid
nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli"
- normal_check_interval: 120
+ check_interval: 120
hostgroups: dl385, dl380, dl360, bl460, dl180
- excludehosts: schein, rietz, mirror-anu, mirror-isc, mirror-umn
+ excludehosts: schein, rietz
+ excludehostgroups: hpnewraid
-
name: HW - hpacucli status
servicegroups: raid
nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli --no-controller-ok --ignore-controller='P700m'"
- normal_check_interval: 120
+ check_interval: 120
hostgroups: bm-bl
-
name: HW - hpacucli status
servicegroups: raid
nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli --no-battery"
- normal_check_interval: 120
+ check_interval: 120
hosts: schein, rietz
-
name: HW - hpacucli enclosure status
servicegroups: raid
nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli-enclosure 1 1E:1"
- normal_check_interval: 120
+ check_interval: 120
hosts: franck
-
name: HW - hpacucli status
servicegroups: raid
nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli --ignore-transfer-speed=1I:1:1 --ignore-transfer-speed=1I:1:2"
- normal_check_interval: 120
+ check_interval: 120
hostgroups: dl585
-
name: HW - hpssacli status
servicegroups: raid
nrpe: "/usr/lib/nagios/plugins/dsa-check-hpssacli"
- normal_check_interval: 120
- hosts: mirror-anu, mirror-isc, mirror-umn
+ check_interval: 120
+ hostgroups: hpnewraid
###
# -
# name: HW - edac status
# nrpe: "/usr/lib/nagios/plugins/dsa-check-edac"
-# normal_check_interval: 120
+# check_interval: 120
#hostgroups: computers
#excludehosts: villa, lobos, schein
-
name: HW - hpasmcli status
nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm"
- normal_check_interval: 120
+ check_interval: 120
hostgroups: dl385, dl380, dl360, bl460, dl585, bm-bl
excludehosts: villa, lobos, schein, storace, mirror-anu
-
name: HW - hpasmcli status
nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --ps-no-redundant"
- normal_check_interval: 120
+ check_interval: 120
hosts: villa
-
name: HW - hpasmcli status
nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --ps-no-redundant --ignore-failed='PS2'"
- normal_check_interval: 120
+ check_interval: 120
hosts: lobos
-
name: HW - hpasmcli status
nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-no-redundant"
- normal_check_interval: 120
+ check_interval: 120
hosts: schein
-
name: HW - hpasmcli status
nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-ignore-not-present"
- normal_check_interval: 120
+ check_interval: 120
hosts: storace
-
name: HW - hpasmcli status
nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-ignore-not-present --ps-no-redundant --ignore-failed='PS1'"
- normal_check_interval: 120
+ check_interval: 120
hosts: mirror-anu
###
-
servicegroups: raid
nrpe: "/usr/lib/nagios/plugins/dsa-check-drbd -d All"
hostgroups: drbd-hosts
+ excludehosts: ubc-bl8
+ -
+ name: RAID - DRBD
+ servicegroups: raid
+ nrpe: "/usr/lib/nagios/plugins/dsa-check-drbd -d All --ok-no-devices"
+ hosts: ubc-bl8
# }}}
# }}}
# {{{ ### mail stuff
nrpe: "/usr/lib/nagios/plugins/check_http -H localhost -p 465 -S -C 14 -t 45"
hostgroups: postfix-hosts
depends: process - postfix - master
- normal_check_interval: 120
+ check_interval: 120
# }}}
# {{{ mail - network service
-
excludehosts: menotti
excludehostgroups: broken_https_default_vhost
depends: "process - apache2 - master"
- normal_check_interval: 120
+ check_interval: 120
-
name: network service - https
check: dsa_check_https_want_auth
hosts: menotti
depends: "process - apache2 - master"
- normal_check_interval: 120
+ check_interval: 120
-
name: network service - https
check: dsa_check_https_any_status
hostgroups: broken_https_default_vhost
depends: "process - apache2 - master"
- normal_check_interval: 120
+ check_interval: 120
-
name: network service - https cert
check: dsa_check_cert!443
hostgroups: apache-https
depends: network service - https
- normal_check_interval: 60
+ check_interval: 60
-
name: unwanted network service - https
check: dsa_check_port_closed!443
hostgroups: apache2-hosts
excludehostgroups: apache-https
- normal_check_interval: 60
+ check_interval: 60
# }}}
# {{{ FTP
-
nrpe: "(/usr/lib/nagios/plugins/check_procs -a schroot -s Zs -c 0 > /dev/null || /usr/lib/nagios/plugins/check_procs -a schroot -s Zs -c 0) && /usr/lib/nagios/plugins/check_procs -a schroot -s ZNs -c 0"
hostgroups: buildd
contact_groups: +buildd
- normal_check_interval: 5
+ check_interval: 5
max_check_attempts: 24
retry_check_interval: 5
-
name: mirror sync - packages
check: "dsa_check_mirrorsync_skew!packages.debian.org!Pics/.trace!3600:57600"
hosts: global
- normal_check_interval: 15
+ check_interval: 15
max_check_attempts: 5
retry_check_interval: 5
-
name: mirror sync - snapshot
check: "dsa_check_mirrorsync_skew!snapshot.debian.org!project/trace/snapshot-master.debian.org!3600:28800"
hosts: global
- normal_check_interval: 15
+ check_interval: 15
max_check_attempts: 5
retry_check_interval: 5
# }}}
remotecheck: "/usr/lib/nagios/plugins/check_ping -H $HOSTADDRESS$ -w 50,10% -c 200,30%"
runfrom: ubc-bl8
hosts: giustini
- normal_check_interval: 5
+ check_interval: 5
max_check_attempts: 4
retry_check_interval: 1
-
name: current chroots
nrpe: "/usr/lib/nagios/plugins/dsa-check-dchroots-current"
hostgroups: porterbox
- normal_check_interval: 60
+ check_interval: 60
retry_check_interval: 15
# }}}
# {{{ openstack
name: network service - ldaps cert
check: dsa_check_cert!636
depends: process - slapd
- normal_check_interval: 60
+ check_interval: 60
hosts: draghi
###
-
check: check_tcp!873
hosts: milanollo2
depends: milanollo:process - xinetd
+ -
+ name: network service - rsync
+ check: check_tcp!873
+ hosts: mirror-isc2, mirror-isc-syncproxy
+ depends: mirror-isc:process - xinetd
+ -
+ name: network service - rsync
+ check: check_tcp!873
+ hosts: mirror-umn2, mirror-umn3
+ depends: mirror-umn:process - xinetd
+ -
+ name: network service - rsync
+ check: check_tcp!873
+ hosts: mirror-anu2, mirror-anu3
+ depends: mirror-anu:process - xinetd
###
-
name: process - icinga
-
name: network service - sip-tls cert - 443
check: dsa_check_cert!443
- normal_check_interval: 60
+ check_interval: 60
hosts: vogler
-
name: network service - sip-tls cert - 5061
check: dsa_check_cert!5061
- normal_check_interval: 60
+ check_interval: 60
hosts: vogler
####
-
name: puppetmaster cert
nrpe: "sudo -u puppet /usr/lib/nagios/plugins/dsa-check-cert-expire /var/lib/puppet/ssl/certs/ca.pem"
hosts: handel
- normal_check_interval: 60
+ check_interval: 60
max_check_attempts: 2
retry_check_interval: 5
# }}}