fix syntax
[mirror/dsa-nagios.git] / config / nagios-master.cfg
index 9301c30..06ec26d 100644 (file)
@@ -176,6 +176,10 @@ servers:
     address: 130.239.18.121
     parents: gw-accumu
     hostgroups: computers, buildd, hassrvfs, jessie
+  boman:
+    address: 130.239.18.124
+    parents: gw-accumu
+    hostgroups: computers, service, jessie, apache2-hosts, hassrvfs
   # }}}
   # {{{ gw-aql
   eller:
@@ -457,6 +461,10 @@ servers:
     address: 5.153.231.36
     parents: ganeti-bytemark
     hostgroups: computers, service, kvmdomains, jessie, autofs, nfs-client, systemd-timesyncd
+  bilbao:
+    address: 5.153.231.37
+    parents: ganeti-bytemark
+    hostgroups: computers, service, kvmdomains, jessie, systemd-timesyncd, apache2-hosts, hassrvfs
   sor:
     address: 5.153.231.38
     parents: ganeti-bytemark
@@ -481,6 +489,11 @@ servers:
     address: 5.153.231.44
     parents: ganeti-bytemark
     hostgroups: computers, service, kvmdomains, jessie, autofs, nfs-client, apache2-hosts, apache-https, systemd-timesyncd
+
+  casulana:
+    address: 5.153.231.41
+    parents: gw-bytemark
+    hostgroups: computers, service, stretch, hpnewraid, hassrvfs, dl380
   # }}}
   # {{{ gw-c3sl
   santoro:
@@ -516,6 +529,10 @@ servers:
     address: 217.196.149.228
     parents: gw-conova
     hostgroups: computers, jessie, service, sw-raid
+  mirror-conova:
+    address: 217.196.149.229
+    parents: gw-conova
+    hostgroups: computers, jessie, service
 
   arm-conova-01:
     address: 217.196.149.230
@@ -629,7 +646,7 @@ servers:
   porta:
     address: 194.177.211.207
     parents: ganeti-grnet
-    hostgroups: computers, service, hassrvfs, kvmdomains, jessie, rsyncd-hosts, xinetd-hosts, systemd-timesyncd
+    hostgroups: computers, service, hassrvfs, kvmdomains, jessie, rsyncd-systemd-hosts, xinetd-hosts, systemd-timesyncd
   melartin:
     address: 194.177.211.208
     parents: ganeti-grnet
@@ -750,7 +767,7 @@ servers:
   kaufmann:
     address: 82.195.75.107
     parents: ganeti3
-    hostgroups: computers, service, apache2-hosts, rsyncd-hosts, kvmdomains, xinetd-hosts, jessie, apache-https
+    hostgroups: computers, service, apache2-hosts, rsyncd-systemd-hosts, kvmdomains, jessie, apache-https
   stockhausen:
     address: 82.195.75.108
     parents: ganeti3
@@ -843,6 +860,10 @@ servers:
     hostgroups: computers, jessie, hassrvfs, porterbox, sw-raid
   # }}}
   # {{{ gw-sanger
+  sallinen:
+    address: 193.62.202.26
+    parents: gw-sanger
+    hostgroups: computers, service, jessie, dl380, hpnewraid
   sibelius:
     address: 193.62.202.28
     parents: gw-sanger
@@ -1231,6 +1252,8 @@ hostgroups:
     alias: Hosts running jessie
   jessie-freebsd:
     alias: kFreebsd hosts running jessie
+  stretch:
+    alias: Hosts running stretch
 
   kvmdomains:
     alias: Hosts that are KVM domains
@@ -1267,6 +1290,9 @@ hostgroups:
   rsyncd-hosts:
     alias: hosts providing rsync services via xinetd
     private: 1
+  rsyncd-systemd-hosts:
+    alias: hosts providing rsync services via systemd
+    private: 1
   xinetd-hosts:
     alias: hosts providing services via xinetd
     private: 1
@@ -1402,21 +1428,21 @@ services:
     excludehostgroups: layer3-infrastructure, high-RTT
     check_interval: 5
     max_check_attempts: 4
-    retry_check_interval: 1
+    retry_interval: 1
   -
     name: PING
     check: "check_ping!600.0,20%!900.0,40%"
     hostgroups: high-RTT
     check_interval: 5
     max_check_attempts: 4
-    retry_check_interval: 1
+    retry_interval: 1
   -
     name: PING
     check: "check_ping!2000.0,60%!3000.0,80%"
     hostgroups: layer3-infrastructure
     check_interval: 5
     max_check_attempts: 4
-    retry_check_interval: 1
+    retry_interval: 1
   # }}}
   # {{{ ### disk usage
   -
@@ -1444,7 +1470,7 @@ services:
   -
     name: disk usage on /srv
     servicegroups: diskspace
-    nrpe: "/usr/lib/nagios/plugins/check_disk 95 98 /srv"
+    nrpe: "/usr/lib/nagios/plugins/check_disk 90 95 /srv"
     hostgroups: hassrvfs
   -
     name: disk usage on /var/lib/postgresql
@@ -1574,11 +1600,11 @@ services:
     name: processes - total
     nrpe: "/usr/lib/nagios/plugins/check_procs 620 700"
     hostgroups: computers
-    excludehosts: prokofiev, pieta, ubc-enc2bl01, ubc-enc2bl02, ubc-enc2bl09, ubc-enc2bl10, fasolo
+    excludehosts: prokofiev, pieta, ubc-enc2bl01, ubc-enc2bl02, ubc-enc2bl09, ubc-enc2bl10, fasolo, casulana
   -
     name: processes - total
     nrpe: "/usr/lib/nagios/plugins/check_procs 1500 1700"
-    hosts: prokofiev, pieta, ubc-enc2bl01, ubc-enc2bl02, ubc-enc2bl09, ubc-enc2bl10, fasolo
+    hosts: prokofiev, pieta, ubc-enc2bl01, ubc-enc2bl02, ubc-enc2bl09, ubc-enc2bl10, fasolo, casulana
   -
     name: swap usage - percent
     nrpe: "/usr/lib/nagios/plugins/check_swap -w 20% -c 10%"
@@ -1592,7 +1618,7 @@ services:
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C getty -a /sbin/getty"
     hostgroups: computers
     excludehosts: zelenka, zandonai
-    excludehostgroups: jessie
+    excludehostgroups: jessie, stretch
   -
     name: process - getty
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C agetty -a /sbin/getty"
@@ -1600,7 +1626,7 @@ services:
   -
     name: process - getty
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C agetty -a /sbin/agetty"
-    hostgroups: jessie
+    hostgroups: jessie, stretch
     excludehostgroups: freebsd
 
   -
@@ -1617,7 +1643,7 @@ services:
     name: system - filesystem check
     nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-filesystems"
     check_interval:  60
-    retry_check_interval: 15
+    retry_interval: 15
     hostgroups: computers
   # }}}
   # {{{ backup
@@ -1629,7 +1655,7 @@ services:
     excludehosts: backuphost, storace
     check_interval: 60
     max_check_attempts: 2
-    retry_check_interval: 5
+    retry_interval: 5
   -
     name: backup server config
     servicegroups: backup
@@ -1637,7 +1663,7 @@ services:
     hosts: storace
     check_interval: 60
     max_check_attempts: 2
-    retry_check_interval: 5
+    retry_interval: 5
   -
     name: backup - bacula - last backup
     servicegroups: backup
@@ -1646,7 +1672,7 @@ services:
     hostgroups: computers
     excludehostgroups: buildd, porterbox, no-bacula
     check_interval:  60
-    retry_check_interval: 15
+    retry_interval: 15
   -
     name: backup - bacula - last full backup
     servicegroups: backup
@@ -1655,7 +1681,7 @@ services:
     hostgroups: computers
     excludehostgroups: buildd, porterbox, no-bacula
     check_interval:  60
-    retry_check_interval: 15
+    retry_interval: 15
   -
     name: process - bacula-dir
     servicegroups: backup
@@ -1686,14 +1712,14 @@ services:
     nrpe: "/usr/lib/nagios/plugins/dsa-check-running-kernel"
     hostgroups: computers
     check_interval: 60
-    retry_check_interval: 5
+    retry_interval: 5
   -
     name: apt - security updates
     servicegroups: apt
     nrpe: "/usr/lib/nagios/plugins/dsa-check-statusfile /var/cache/dsa/nagios/apt"
     hostgroups: computers
     check_interval:  60
-    retry_check_interval: 15
+    retry_interval: 15
   -
     name: unexpected file - apt sources.list
     servicegroups: apt
@@ -1707,7 +1733,7 @@ services:
     hostgroups: computers
     excludehostgroups: freebsd
     check_interval:  60
-    retry_check_interval: 15
+    retry_interval: 15
     notification_interval: 10080
   -
     name: installed firewall
@@ -1723,11 +1749,11 @@ services:
     name: process - ulogd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C ulogd -a '/usr/sbin/ulogd -d'"
     hostgroups: computers
-    excludehostgroups: freebsd, sparc, jessie
+    excludehostgroups: freebsd, sparc, jessie, stretch
   -
     name: process - ulogd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u ulog -C ulogd -a '/usr/sbin/ulogd --daemon --uid ulog'"
-    hostgroups: jessie
+    hostgroups: jessie, stretch
     excludehostgroups: freebsd
   -
     name: unexpected process - ulogd
@@ -1746,7 +1772,7 @@ services:
     hostgroups: computers
     depends: process - samhain
     check_interval: 60
-    retry_check_interval: 5
+    retry_interval: 5
     excludehostgroups: brokensamhain
   -
     name: processes - samhain zombies
@@ -1760,7 +1786,7 @@ services:
     name: process - syslog-ng
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C syslog-ng  -a '/sbin/syslog-ng -p /var/run/syslog-ng.pid'"
     hostgroups: computers
-    excludehostgroups: freebsd, jessie
+    excludehostgroups: freebsd, jessie, stretch
   -
     name: process - syslog-ng
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:2 -c 2: -u root -C syslog-ng  -a '/sbin/syslog-ng -p /var/run/syslog-ng.pid'"
@@ -1768,7 +1794,7 @@ services:
   -
     name: process - syslog-ng
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C syslog-ng  -a '/sbin/syslog-ng -F'"
-    hostgroups: jessie
+    hostgroups: jessie, stretch
     excludehostgroups: freebsd
 
   -
@@ -1813,6 +1839,13 @@ services:
     name: process - nrpe
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:25 -c 1: -u nagios -C nrpe -a '/usr/sbin/nrpe -c /etc/nagios/nrpe.cfg -d'"
     hostgroups: computers
+    excludehostgroups: stretch
+    max_check_attempts: -1
+    depends: network service - nrpe
+  -
+    name: process - nrpe
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:25 -c 1: -u nagios -C nrpe -a '/usr/sbin/nrpe -c /etc/nagios/nrpe.cfg -f'"
+    hostgroups: stretch
     max_check_attempts: -1
     depends: network service - nrpe
   ###
@@ -1880,7 +1913,7 @@ services:
     name: process - monit
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C monit -a '/usr/bin/monit -d 300 -I -c /etc/monit/monitrc -s /var/lib/monit/monit.state'"
     hostgroups: computers
-    excludehostgroups: alioth, jessie
+    excludehostgroups: alioth, jessie, stretch
   ###
   -
     name: MQ connection on rainier
@@ -1889,7 +1922,7 @@ services:
     runfrom: rainier
     hostgroups: computers
     check_interval:  60
-    retry_check_interval: 15
+    retry_interval: 15
     excludehostgroups: alioth, broken_mq
   -
     name: MQ connection on rapoport
@@ -1898,7 +1931,7 @@ services:
     runfrom: rapoport
     hostgroups: computers
     check_interval:  60
-    retry_check_interval: 15
+    retry_interval: 15
     excludehostgroups: alioth, broken_mq
   ###
   -
@@ -1921,11 +1954,11 @@ services:
     name: process - udevd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -p 1 -C udevd -a 'udevd'"
     hostgroups: computers
-    excludehostgroups: freebsd, jessie
+    excludehostgroups: freebsd, jessie, stretch
   -
     name: process - udevd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -p 1 -C systemd-udevd -a '/lib/systemd/systemd-udevd'"
-    hostgroups: jessie
+    hostgroups: jessie, stretch
     excludehostgroups: freebsd
   -
     name: unexpected process - udev
@@ -1936,7 +1969,7 @@ services:
     name: process - acpid
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C acpid -a '/usr/sbin/acpid'"
     hostgroups: acpid-hosts
-    excludehostgroups: jessie
+    excludehostgroups: jessie, stretch
   -
     name: unexpected process - acpid
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C acpid"
@@ -1956,7 +1989,7 @@ services:
   -
     name: process - stunnel4 - puppet-ekeyd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -u stunnel4 -C stunnel4 -a '/usr/bin/stunnel4 /etc/stunnel/puppet-ekeyd.conf'"
-    hostgroups: wheezy, jessie
+    hostgroups: wheezy, jessie, stretch
     excludehostgroups: freebsd, alioth
   -
     name: process - stunnel4 - puppet-ekeyd is crazy
@@ -2037,7 +2070,7 @@ services:
     servicegroups: raid
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C mdadm -a '/sbin/mdadm --monitor --pid-file /run/mdadm/monitor.pid --daemonise --scan'"
     hostgroups: sw-raid
-    excludehostgroups: jessie
+    excludehostgroups: jessie, stretch
   -
     name: process - mdadm monitor
     servicegroups: raid
@@ -2100,7 +2133,7 @@ services:
     nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm"
     check_interval: 120
     hostgroups: dl380, dl360, bl460, bm-bl
-    excludehosts: villa, lobos, storace, mirror-anu
+    excludehosts: villa, lobos, storace, mirror-anu, sallinen
   -
     name: HW - hpasmcli status
     nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --ps-no-redundant"
@@ -2115,7 +2148,7 @@ services:
     name: HW - hpasmcli status
     nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-ignore-not-present"
     check_interval: 120
-    hosts: storace
+    hosts: storace, sallinen
   -
     name: HW - hpasmcli status
     nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-ignore-not-present --ps-no-redundant --ignore-failed='PS1'"
@@ -2182,7 +2215,7 @@ services:
     name: process - clamav - freshclam
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u clamav -C freshclam -a '/usr/bin/freshclam -d --quiet'"
     hostgroups: heavy-exim, heavy-postfix
-    excludehostgroups: jessie
+    excludehostgroups: jessie, stretch
   -
     name: process - clamav - freshclam
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u clamav -C freshclam -a '/usr/bin/freshclam -d --foreground=true'"
@@ -2205,7 +2238,7 @@ services:
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd --create-prefs --max-children 5 --helper-home-dir -d --pidfile=/var/run/spamd.pid'"
     hostgroups: spamd
     excludehosts: picconi
-    excludehostgroups: jessie
+    excludehostgroups: jessie, stretch
   -
     name: process - spamd - master
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd -d --pidfile=/var/run/spamassassin.pid --create-prefs --max-children 5 --helper-home-dir'"
@@ -2244,7 +2277,7 @@ services:
     name: process - postgrey
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgrey -a '/usr/sbin/postgrey --pidfile=/var/run/postgrey.pid --daemonize --unix=/var/run/postgrey/socket --retry-window=4 --auto-whitelist-clients=10 --exim'"
     hostgroups: heavy-exim
-    excludehostgroups: jessie
+    excludehostgroups: jessie, stretch
   -
     name: process - postgrey
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgrey -a 'postgrey --pidfile=/var/run/postgrey.pid --daemonize --unix=/var/run/postgrey/socket --retry-window=4 --auto-whitelist-clients=10 --exim'"
@@ -2529,7 +2562,7 @@ services:
     contact_groups: +buildd
     check_interval: 5
     max_check_attempts: 24
-    retry_check_interval: 5
+    retry_interval: 5
   -
     name: processes - lvcreate
     nrpe: "/usr/lib/nagios/plugins/check_procs -m 'ELAPSED' -c 500 -C lvcreate -u root -a 'lvcreate'"
@@ -2581,7 +2614,7 @@ services:
     hosts: global
     check_interval: 15
     max_check_attempts: 5
-    retry_check_interval: 5
+    retry_interval: 5
     servicegroups: mirror
   -
     name: mirror sync - snapshot
@@ -2589,7 +2622,7 @@ services:
     hosts: global
     check_interval: 15
     max_check_attempts: 5
-    retry_check_interval: 5
+    retry_interval: 5
     servicegroups: mirror
 
   -
@@ -2702,6 +2735,11 @@ services:
     check: "dsa_check_staticsync_nossl!deb.debian.org"
     hosts: global
     servicegroups: mirror
+  -
+    name: mirror static sync - manpages
+    check: "dsa_check_staticsync!manpages.debian.org"
+    hosts: global
+    servicegroups: mirror
 
   -
     name: mirror static sync - 10years
@@ -2749,8 +2787,8 @@ services:
     hosts: global
     servicegroups: mirror
   -
-    name: mirror static sync - debconf1
-    check: "dsa_check_staticsync!debconf1.debconf.org"
+    name: mirror static sync - debconf16
+    check: "dsa_check_staticsync!debconf16.debconf.org"
     hosts: global
     servicegroups: mirror
   -
@@ -2855,7 +2893,7 @@ services:
     hosts: giustini
     check_interval: 5
     max_check_attempts: 4
-    retry_check_interval: 1
+    retry_interval: 1
   -
     name: Overall Unit Status
     remotecheck: "/usr/lib/nagios/plugins/check_snmp -H $HOSTADDRESS$ -C public -P 2c -o connUnitStatus -n -c 3 -w 3"
@@ -2883,7 +2921,7 @@ services:
     nrpe: "/usr/lib/nagios/plugins/dsa-check-dchroots-current"
     hostgroups: porterbox
     check_interval:  60
-    retry_check_interval: 15
+    retry_interval: 15
   # }}}
   # {{{ openstack
 #  -
@@ -2931,7 +2969,7 @@ services:
   -
     name: system - all services running
     nrpe: "/usr/bin/sudo /bin/systemctl is-system-running"
-    hostgroups: jessie
+    hostgroups: jessie, stretch
     excludehostgroups: freebsd
   ###
   -
@@ -2951,6 +2989,10 @@ services:
     hosts: draghi
     depends: process - xinetd
   ###
+  -
+    name: network service - rsync
+    check: check_tcp!873
+    hostgroups: rsyncd-systemd-hosts
   -
     name: network service - rsync
     check: check_tcp!873
@@ -3010,7 +3052,7 @@ services:
     hosts: handel
     check_interval: 60
     max_check_attempts: 2
-    retry_check_interval: 5
+    retry_interval: 5
   # }}}
 # }}}