sonntag on jessie
[mirror/dsa-nagios.git] / config / nagios-master.cfg
index 1b06ac1..dbc8e0a 100644 (file)
@@ -151,7 +151,7 @@ servers:
     parents: gw-ubcece
     hostgroups: layer3-infrastructure
   gw-unicamp:
-    address: 177.220.10.65
+    address: 177.220.10.129
     parents: gw-ubcece
     hostgroups: layer3-infrastructure
   gw-utwente:
@@ -522,7 +522,19 @@ servers:
   mirror-anu:
     address: 150.203.164.39
     parents: gw-cecsit
-    hostgroups: computers, service, apache2-hosts, rsyncd-hosts, dl360, hassrvfs, xinetd-hosts, jessie, security_mirror, no-bacula, apache-https
+    hostgroups: computers, service, apache2-hosts, dl360, hpnewraid, hassrvfs, xinetd-hosts, jessie, security_mirror, apache-https
+  mirror-anu2:
+    address: 150.203.164.60
+    parents: mirror-anu
+    hostgroups: secondary-IPs
+  mirror-anu3:
+    address: 150.203.164.61
+    parents: mirror-anu
+    hostgroups: secondary-IPs
+  mirror-anu4:
+    address: 150.203.164.62
+    parents: mirror-anu
+    hostgroups: secondary-IPs
   # }}}
   # {{{ gw-conova
   sompek:
@@ -644,7 +656,19 @@ servers:
   mirror-isc:
     address: 149.20.20.7
     parents: gw-isc
-    hostgroups: computers, service, apache2-hosts, rsyncd-hosts, dl360, hassrvfs, xinetd-hosts, jessie, security_mirror, no-bacula
+    hostgroups: computers, service, apache2-hosts, dl360, hpnewraid, hassrvfs, xinetd-hosts, jessie, security_mirror
+  mirror-isc2:
+    address: 149.20.20.19
+    parents: mirror-isc
+    hostgroups: secondary-IPs
+  mirror-isc3:
+    address: 149.20.20.19
+    parents: mirror-isc
+    hostgroups: secondary-IPs
+  mirror-isc-syncproxy:
+    address: 149.20.20.21
+    parents: mirror-isc
+    hostgroups: secondary-IPs
   # }}}
   # {{{ gw-leaseweb
   lw01:
@@ -916,7 +940,7 @@ servers:
   ubc-bl4:
     address: 206.12.19.214
     parents: sw-ubcece-kais
-    hostgroups: computers, bl460, acpid-hosts, service, wheezy, drbd-hosts
+    hostgroups: computers, bl460, acpid-hosts, service, jessie, drbd-hosts
   ubc-bl8:
     address: 206.12.19.218
     parents: sw-ubcece-kais
@@ -1010,7 +1034,7 @@ servers:
   sonntag:
     address: 206.12.19.142
     parents: ganeti2
-    hostgroups: computers, service, kvmdomains, wheezy, nfs-client, autofs
+    hostgroups: computers, service, kvmdomains, jessie, nfs-client, autofs
   menotti:
     address: 206.12.19.143
     parents: ganeti2
@@ -1034,23 +1058,35 @@ servers:
   mirror-umn:
     address: 128.101.240.212
     parents: gw-umn
-    hostgroups: computers, service, apache2-hosts, rsyncd-hosts, dl360, hassrvfs, xinetd-hosts, jessie, security_mirror
+    hostgroups: computers, service, apache2-hosts, dl360, hpnewraid, hassrvfs, xinetd-hosts, jessie, security_mirror
+  mirror-umn2:
+    address: 128.101.240.215
+    parents: mirror-umn
+    hostgroups: secondary-IPs
+  mirror-umn3:
+    address: 128.101.240.216
+    parents: mirror-umn
+    hostgroups: secondary-IPs
+  mirror-umn4:
+    address: 128.101.240.217
+    parents: mirror-umn
+    hostgroups: secondary-IPs
   # }}}
   # {{{ gw-unicamp
   prokofiev:
-    address: 177.220.10.78
+    address: 177.220.10.140
     parents: gw-unicamp
     hostgroups: computers, jessie, service
   powerpc-unicamp-01:
-    address: 177.220.10.79
+    address: 177.220.10.141
     parents: prokofiev
     hostgroups: computers, hassrvfs, buildd, jessie
   ppc64el-unicamp-01:
-    address: 177.220.10.80
+    address: 177.220.10.142
     parents: prokofiev
     hostgroups: computers, hassrvfs, buildd, jessie
   plummer:
-    address: 177.220.10.81
+    address: 177.220.10.143
     parents: prokofiev
     hostgroups: computers, porterbox, hassrvfs, jessie
   # }}}
@@ -1162,6 +1198,9 @@ hostgroups:
   general:
     alias: general purpose developer accessible machines
 
+  hpnewraid:
+    alias: new (2015+) machines where we need hpssacli instead of hpacucli
+    private: 1
   dl380:
     alias: HP DL380 hosts
     private: 1
@@ -1368,21 +1407,21 @@ services:
     check: "check_ping!350.0,20%!600.0,40%"
     hostgroups: pingable
     excludehostgroups: layer3-infrastructure, high-RTT
-    normal_check_interval: 5
+    check_interval: 5
     max_check_attempts: 4
     retry_check_interval: 1
   -
     name: PING
     check: "check_ping!600.0,20%!900.0,40%"
     hostgroups: high-RTT
-    normal_check_interval: 5
+    check_interval: 5
     max_check_attempts: 4
     retry_check_interval: 1
   -
     name: PING
     check: "check_ping!2000.0,60%!3000.0,80%"
     hostgroups: layer3-infrastructure
-    normal_check_interval: 5
+    check_interval: 5
     max_check_attempts: 4
     retry_check_interval: 1
   # }}}
@@ -1511,13 +1550,13 @@ services:
     name: setup - dsa config
     nrpe: "/usr/lib/nagios/plugins/dsa-check-config"
     hostgroups: computers
-    normal_check_interval: 60
+    check_interval: 60
     excludehostgroups: alioth
   -
     name: setup - local hostname etc-hosts
     nrpe: 'if getent ahosts `hostname` | grep -q 127.0; then echo "Warning: local hostname resolves to 127/8 address"; exit 1; else echo "OK: Hostname resolves to non-127/8 address."; exit 0; fi'
     hostgroups: computers
-    normal_check_interval: 60
+    check_interval: 60
   # }}}
   # {{{ os health
   ####
@@ -1584,7 +1623,7 @@ services:
   -
     name: system - filesystem check
     nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-filesystems"
-    normal_check_interval:  60
+    check_interval:  60
     retry_check_interval: 15
     hostgroups: computers
   # }}}
@@ -1595,7 +1634,7 @@ services:
     nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-dabackup"
     hostgroups: computers
     excludehosts: backuphost, storace, backuphost
-    normal_check_interval: 60
+    check_interval: 60
     max_check_attempts: 2
     retry_check_interval: 5
   -
@@ -1603,7 +1642,7 @@ services:
     servicegroups: backup
     nrpe: "/usr/lib/nagios/plugins/dsa-check-dabackup-server"
     hosts: storace
-    normal_check_interval: 60
+    check_interval: 60
     max_check_attempts: 2
     retry_check_interval: 5
   -
@@ -1613,7 +1652,7 @@ services:
     runfrom: dinis
     hostgroups: computers
     excludehostgroups: buildd, porterbox, no-bacula
-    normal_check_interval:  60
+    check_interval:  60
     retry_check_interval: 15
   -
     name: backup - bacula - last full backup
@@ -1622,7 +1661,7 @@ services:
     runfrom: dinis
     hostgroups: computers
     excludehostgroups: buildd, porterbox, no-bacula
-    normal_check_interval:  60
+    check_interval:  60
     retry_check_interval: 15
   -
     name: process - bacula-fd
@@ -1648,14 +1687,14 @@ services:
     servicegroups: kernel
     nrpe: "/usr/lib/nagios/plugins/dsa-check-running-kernel"
     hostgroups: computers
-    normal_check_interval: 60
+    check_interval: 60
     retry_check_interval: 5
   -
     name: apt - security updates
     servicegroups: apt
     nrpe: "/usr/lib/nagios/plugins/dsa-check-statusfile /var/cache/dsa/nagios/apt"
     hostgroups: computers
-    normal_check_interval:  60
+    check_interval:  60
     retry_check_interval: 15
   -
     name: unexpected file - apt sources.list
@@ -1669,7 +1708,7 @@ services:
     #nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-libs"
     hostgroups: computers
     excludehostgroups: freebsd
-    normal_check_interval:  60
+    check_interval:  60
     retry_check_interval: 15
     notification_interval: 10080
   -
@@ -1708,7 +1747,7 @@ services:
     nrpe: "/usr/lib/nagios/plugins/dsa-check-statusfile /var/cache/dsa/nagios/samhain"
     hostgroups: computers
     depends: process - samhain
-    normal_check_interval: 60
+    check_interval: 60
     retry_check_interval: 5
     excludehostgroups: brokensamhain
   -
@@ -1763,7 +1802,7 @@ services:
     check: dsa_check_ssh
     hostgroups: computers
     depends: process - sshd
-    normal_check_interval:  60
+    check_interval:  60
     notification_interval: 1440
   ####
   -
@@ -1781,18 +1820,13 @@ services:
   ###
   -
     name: process - munin-node
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C munin-node -a '/usr/sbin/munin-node'"
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C munin-node -a 'munin-node'"
     hostgroups: computers
-    excludehostgroups: freebsd, armhf
+    excludehostgroups: freebsd
   -
     name: process - munin-node
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C perl -a '/usr/bin/perl -wT /usr/sbin/munin-node'"
     hostgroups: freebsd
-  -
-    name: process - munin-node
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C munin-node -a 'munin-node'"
-    hostgroups: wheezy, jessie
-    excludehostgroups: freebsd
   -
     name: network service - munin-node
     check: check_tcp!4949
@@ -1843,7 +1877,7 @@ services:
     remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa"
     runfrom: rainier
     hostgroups: computers
-    normal_check_interval:  60
+    check_interval:  60
     retry_check_interval: 15
     excludehostgroups: alioth, broken_mq
   -
@@ -1852,7 +1886,7 @@ services:
     remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa"
     runfrom: rapoport
     hostgroups: computers
-    normal_check_interval:  60
+    check_interval:  60
     retry_check_interval: 15
     excludehostgroups: alioth, broken_mq
   ###
@@ -1860,7 +1894,7 @@ services:
     name: local resolver
     nrpe: "/usr/lib/nagios/plugins/dsa-check-resolver www.debian.org www.google.com"
     hostgroups: computers
-    normal_check_interval: 60
+    check_interval: 60
   -
     name: process - unbound
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u unbound -C unbound -a '/usr/sbin/unbound'"
@@ -1933,7 +1967,7 @@ services:
     name: unwanted process - openvpn
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C openvpn"
     hostgroups: computers
-    normal_check_interval: 120
+    check_interval: 120
   -
     name: unwanted process - gkrellmd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C gkrellmd"
@@ -2000,76 +2034,77 @@ services:
     name: HW - hpacucli status
     servicegroups: raid
     nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli"
-    normal_check_interval: 120
+    check_interval: 120
     hostgroups: dl385, dl380, dl360, bl460, dl180
-    excludehosts: schein, rietz, mirror-anu, mirror-isc, mirror-umn
+    excludehosts: schein, rietz
+    excludehostgroups: hpnewraid
   -
     name: HW - hpacucli status
     servicegroups: raid
     nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli --no-controller-ok --ignore-controller='P700m'"
-    normal_check_interval: 120
+    check_interval: 120
     hostgroups: bm-bl
   -
     name: HW - hpacucli status
     servicegroups: raid
     nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli --no-battery"
-    normal_check_interval: 120
+    check_interval: 120
     hosts: schein, rietz
   -
     name: HW - hpacucli enclosure status
     servicegroups: raid
     nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli-enclosure 1 1E:1"
-    normal_check_interval: 120
+    check_interval: 120
     hosts: franck
   -
     name: HW - hpacucli status
     servicegroups: raid
     nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli --ignore-transfer-speed=1I:1:1 --ignore-transfer-speed=1I:1:2"
-    normal_check_interval: 120
+    check_interval: 120
     hostgroups: dl585
   -
     name: HW - hpssacli status
     servicegroups: raid
     nrpe: "/usr/lib/nagios/plugins/dsa-check-hpssacli"
-    normal_check_interval: 120
-    hosts: mirror-anu, mirror-isc, mirror-umn
+    check_interval: 120
+    hostgroups: hpnewraid
   ###
 #  -
 #    name: HW - edac status
 #    nrpe: "/usr/lib/nagios/plugins/dsa-check-edac"
-#    normal_check_interval: 120
+#    check_interval: 120
     #hostgroups: computers
     #excludehosts: villa, lobos, schein
   -
     name: HW - hpasmcli status
     nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm"
-    normal_check_interval: 120
+    check_interval: 120
     hostgroups: dl385, dl380, dl360, bl460, dl585, bm-bl
     excludehosts: villa, lobos, schein, storace, mirror-anu
   -
     name: HW - hpasmcli status
     nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --ps-no-redundant"
-    normal_check_interval: 120
+    check_interval: 120
     hosts: villa
   -
     name: HW - hpasmcli status
     nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --ps-no-redundant  --ignore-failed='PS2'"
-    normal_check_interval: 120
+    check_interval: 120
     hosts: lobos
   -
     name: HW - hpasmcli status
     nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-no-redundant"
-    normal_check_interval: 120
+    check_interval: 120
     hosts: schein
   -
     name: HW - hpasmcli status
     nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-ignore-not-present"
-    normal_check_interval: 120
+    check_interval: 120
     hosts: storace
   -
     name: HW - hpasmcli status
     nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-ignore-not-present --ps-no-redundant --ignore-failed='PS1'"
-    normal_check_interval: 120
+    check_interval: 120
     hosts: mirror-anu
   ###
   -
@@ -2095,6 +2130,12 @@ services:
     servicegroups: raid
     nrpe: "/usr/lib/nagios/plugins/dsa-check-drbd -d All"
     hostgroups: drbd-hosts
+    excludehosts: ubc-bl8
+  -
+    name: RAID - DRBD
+    servicegroups: raid
+    nrpe: "/usr/lib/nagios/plugins/dsa-check-drbd -d All --ok-no-devices"
+    hosts: ubc-bl8
   # }}}
   # }}}
   # {{{ ### mail stuff
@@ -2302,7 +2343,7 @@ services:
     nrpe: "/usr/lib/nagios/plugins/check_http -H localhost -p 465 -S -C 14 -t 45"
     hostgroups: postfix-hosts
     depends: process - postfix - master
-    normal_check_interval: 120
+    check_interval: 120
   # }}}
   # {{{ mail - network service
   -
@@ -2379,31 +2420,31 @@ services:
     excludehosts: menotti
     excludehostgroups: broken_https_default_vhost
     depends: "process - apache2 - master"
-    normal_check_interval: 120
+    check_interval: 120
   -
     name: network service - https
     check: dsa_check_https_want_auth
     hosts: menotti
     depends: "process - apache2 - master"
-    normal_check_interval: 120
+    check_interval: 120
   -
     name: network service - https
     check: dsa_check_https_any_status
     hostgroups: broken_https_default_vhost
     depends: "process - apache2 - master"
-    normal_check_interval: 120
+    check_interval: 120
   -
     name: network service - https cert
     check: dsa_check_cert!443
     hostgroups: apache-https
     depends: network service - https
-    normal_check_interval: 60
+    check_interval: 60
   -
     name: unwanted network service - https
     check: dsa_check_port_closed!443
     hostgroups: apache2-hosts
     excludehostgroups: apache-https
-    normal_check_interval: 60
+    check_interval: 60
   # }}}
   # {{{ FTP
   -
@@ -2483,7 +2524,7 @@ services:
     nrpe: "(/usr/lib/nagios/plugins/check_procs -a schroot -s Zs -c 0 > /dev/null || /usr/lib/nagios/plugins/check_procs -a schroot -s Zs -c 0) && /usr/lib/nagios/plugins/check_procs -a schroot -s ZNs -c 0"
     hostgroups: buildd
     contact_groups: +buildd
-    normal_check_interval: 5
+    check_interval: 5
     max_check_attempts: 24
     retry_check_interval: 5
   -
@@ -2533,14 +2574,14 @@ services:
     name: mirror sync - packages
     check: "dsa_check_mirrorsync_skew!packages.debian.org!Pics/.trace!3600:57600"
     hosts: global
-    normal_check_interval: 15
+    check_interval: 15
     max_check_attempts: 5
     retry_check_interval: 5
   -
     name: mirror sync - snapshot
     check: "dsa_check_mirrorsync_skew!snapshot.debian.org!project/trace/snapshot-master.debian.org!3600:28800"
     hosts: global
-    normal_check_interval: 15
+    check_interval: 15
     max_check_attempts: 5
     retry_check_interval: 5
   # }}}
@@ -2623,7 +2664,7 @@ services:
     remotecheck: "/usr/lib/nagios/plugins/check_ping -H $HOSTADDRESS$ -w 50,10% -c 200,30%"
     runfrom: ubc-bl8
     hosts: giustini
-    normal_check_interval: 5
+    check_interval: 5
     max_check_attempts: 4
     retry_check_interval: 1
   -
@@ -2642,7 +2683,7 @@ services:
     name: current chroots
     nrpe: "/usr/lib/nagios/plugins/dsa-check-dchroots-current"
     hostgroups: porterbox
-    normal_check_interval:  60
+    check_interval:  60
     retry_check_interval: 15
   # }}}
   # {{{ openstack
@@ -2707,7 +2748,7 @@ services:
     name: network service - ldaps cert
     check: dsa_check_cert!636
     depends: process - slapd
-    normal_check_interval: 60
+    check_interval: 60
     hosts: draghi
   ###
   -
@@ -2732,6 +2773,21 @@ services:
     check: check_tcp!873
     hosts: milanollo2
     depends: milanollo:process - xinetd
+  -
+    name: network service - rsync
+    check: check_tcp!873
+    hosts: mirror-isc2, mirror-isc-syncproxy
+    depends: mirror-isc:process - xinetd
+  -
+    name: network service - rsync
+    check: check_tcp!873
+    hosts: mirror-umn2, mirror-umn3
+    depends: mirror-umn:process - xinetd
+  -
+    name: network service - rsync
+    check: check_tcp!873
+    hosts: mirror-anu2, mirror-anu3
+    depends: mirror-anu:process - xinetd
   ###
   -
     name: process - icinga
@@ -2752,19 +2808,19 @@ services:
   -
     name: network service - sip-tls cert - 443
     check: dsa_check_cert!443
-    normal_check_interval: 60
+    check_interval: 60
     hosts: vogler
   -
     name: network service - sip-tls cert - 5061
     check: dsa_check_cert!5061
-    normal_check_interval: 60
+    check_interval: 60
     hosts: vogler
   ####
   -
     name: puppetmaster cert
     nrpe: "sudo -u puppet /usr/lib/nagios/plugins/dsa-check-cert-expire /var/lib/puppet/ssl/certs/ca.pem"
     hosts: handel
-    normal_check_interval: 60
+    check_interval: 60
     max_check_attempts: 2
     retry_check_interval: 5
   # }}}