sibelius no longer runs postgresql
[mirror/dsa-nagios.git] / config / nagios-master.cfg
index ca5bdcf..06bfb67 100644 (file)
@@ -139,7 +139,7 @@ servers:
     parents: gw-ubcece
     hostgroups: layer3-infrastructure
   gw-unicamp:
-    address: 177.220.10.129
+    address: 143.106.167.113
     parents: gw-ubcece
     hostgroups: layer3-infrastructure
   gw-utwente:
@@ -238,15 +238,15 @@ servers:
   arm-arm-01:
     address: 217.140.96.58
     parents: gw-arm
-    hostgroups: computers, hassrvfs, buildd, stretch, broken_mq
+    hostgroups: computers, hassrvfs, buildd, stretch, broken_mq, sw-raid
   arm-arm-03:
     address: 217.140.96.60
     parents: gw-arm
-    hostgroups: computers, hassrvfs, buildd, stretch, broken_mq
+    hostgroups: computers, hassrvfs, buildd, stretch, broken_mq, sw-raid
   arm-arm-04:
     address: 217.140.96.61
     parents: gw-arm
-    hostgroups: computers, hassrvfs, buildd, stretch, broken_mq
+    hostgroups: computers, hassrvfs, buildd, stretch, broken_mq, sw-raid
   harris:
     address: 217.140.96.66
     parents: gw-arm
@@ -405,9 +405,7 @@ servers:
   moszumanska:
     address: 5.153.231.21
     parents: ganeti-bytemark
-    contact_groups: alioth-admins
-    hostgroups: computers, general, wheezy, postgres91-hosts, apache2-hosts, acpid-hosts, apache-https, brokensamhain, no-bacula, bind9-hosts, xinetd-hosts, alioth, heavy-exim, spamd
-    no-servicegroups: true
+    hostgroups: secondary-IPs
   dillon:
     address: 5.153.231.22
     parents: ganeti-bytemark
@@ -517,11 +515,11 @@ servers:
   conova-node01:
     address: 217.196.149.227
     parents: gw-conova
-    hostgroups: computers, stretch, service, sw-raid
+    hostgroups: computers, stretch, service, sw-raid, drbd-hosts
   conova-node02:
     address: 217.196.149.228
     parents: gw-conova
-    hostgroups: computers, stretch, service, sw-raid
+    hostgroups: computers, stretch, service, sw-raid, drbd-hosts
   ganeti-conova:
     address: 217.196.149.235
     parents: gw-conova
@@ -593,7 +591,7 @@ servers:
   pkgmirror-csail:
     address: 128.31.0.51
     parents: ganeti-csail
-    hostgroups: computers, service, kvmdomains, stretch, apache2-hosts, no-bacula, apache-https, hassrvfs, systemd-timesyncd
+    hostgroups: computers, service, kvmdomains, stretch, apache2-hosts, no-bacula, apache-https, hassrvfs, systemd-timesyncd, varnish-hosts
   usper:
     address: 128.31.0.69
     parents: ganeti-csail
@@ -683,27 +681,32 @@ servers:
   lw01:
     address: 185.17.185.177
     parents: gw-leaseweb
-    hostgroups: computers, service, jessie, dl180, nfs-server, rsyncd-hosts
+    hostgroups: computers, service, stretch, dl180, nfs-server, rsyncd-hosts
   lw02:
     address: 185.17.185.178
     parents: gw-leaseweb
-    hostgroups: computers, service, jessie, dl180, nfs-server, rsyncd-hosts
+    hostgroups: computers, service, stretch, dl180, nfs-server, rsyncd-hosts
   lw03:
     address: 185.17.185.179
     parents: gw-leaseweb
-    hostgroups: computers, service, jessie, dl180, nfs-server, rsyncd-hosts
+    hostgroups: computers, service, stretch, dl180, nfs-server, rsyncd-hosts
   lw04:
     address: 185.17.185.180
     parents: gw-leaseweb
-    hostgroups: computers, service, jessie, dl180, nfs-server, rsyncd-hosts
+    hostgroups: computers, service, stretch, dl180, nfs-server, rsyncd-hosts
   lw07:
     address: 185.17.185.187
     parents: gw-leaseweb
-    hostgroups: computers, service, jessie, dl180, nfs-client, autofs, hassrvfs, postgres94-hosts, apache2-hosts
+    hostgroups: computers, service, stretch, dl180, nfs-client, autofs, hassrvfs, postgres96-hosts, apache2-hosts, haproxy-hosts, haproxy-https-host, varnish-hosts
+  lw07-2:
+    address: 185.17.185.185
+    parents: lw07
+    hostgroups: secondary-IPs, https-service
+
   lw08:
     address: 185.17.185.189
     parents: gw-leaseweb
-    hostgroups: computers, service, jessie, dl180, nfs-client, autofs, hassrvfs, apache2-hosts
+    hostgroups: computers, service, stretch, dl180, nfs-client, autofs, hassrvfs, apache2-hosts
   lw09:
     address: 185.17.185.181
     parents: gw-leaseweb
@@ -749,7 +752,7 @@ servers:
   handel:
     address: 82.195.75.104
     parents: ganeti3
-    hostgroups: computers, service, kvmdomains, apache2-hosts, stretch, postgres96-hosts
+    hostgroups: computers, service, kvmdomains, apache2-hosts, stretch, postgres96-hosts, hassrvfs
   kaufmann:
     address: 82.195.75.107
     parents: ganeti3
@@ -827,29 +830,20 @@ servers:
     address: 140.211.166.196
     parents: pieta
     hostgroups: computers, hassrvfs, buildd, stretch
-  powerpc-osuosl-01:
-    address: 140.211.166.197
-    parents: pieta
-    hostgroups: computers, hassrvfs, buildd, jessie
-  partch:
-    address: 140.211.15.152
-    parents: gw-osuosl
-    hostgroups: computers, jessie, hassrvfs, porterbox, sw-raid
   # }}}
   # {{{ gw-sanger
   sallinen:
     address: 193.62.202.26
     parents: gw-sanger
-    hostgroups: computers, service, stretch, dl380, nfs-client, autofs, postgres96-hosts
+    hostgroups: computers, service, stretch, dl380, nfs-client, autofs, postgres96-hosts, apache2-hosts, haproxy-hosts, haproxy-https-host, varnish-hosts
+  sallinen-2:
+    address: 193.62.202.27
+    parents: sallinen
+    hostgroups: secondary-IPs, https-service
   sibelius:
     address: 193.62.202.28
     parents: gw-sanger
-    hostgroups: computers, postgres94-hosts, service, apache2-hosts, sw-raid, jessie, rsyncd-hosts, hasvarlogfs, multipath-hosts, nfs-server
-    contacts: tjrc1, dave
-  smetana:
-    address: 193.62.202.29
-    parents: gw-sanger
-    hostgroups: computers, sw-raid, sparc, wheezy, no-bacula
+    hostgroups: computers, service, apache2-hosts, sw-raid, jessie, rsyncd-hosts, hasvarlogfs, multipath-hosts, nfs-server, varnish-hosts
     contacts: tjrc1, dave
   # }}}
   # {{{ gw-scanplus
@@ -992,7 +986,7 @@ servers:
   kantuser:
     address: 209.87.16.47
     parents: ubc-gateway
-    hostgroups: computers, service, kvmdomains, stretch, systemd-timesyncd
+    hostgroups: computers, service, kvmdomains, stretch, systemd-timesyncd, apache2-hosts
   grabbe:
     address: 209.87.16.48
     parents: ubc-gateway
@@ -1022,19 +1016,15 @@ servers:
   # }}}
   # {{{ gw-unicamp
   prokofiev:
-    address: 177.220.10.140
+    address: 143.106.167.124
     parents: gw-unicamp
     hostgroups: computers, stretch, service, manyprocesses
-  powerpc-unicamp-01:
-    address: 177.220.10.141
-    parents: prokofiev
-    hostgroups: computers, hassrvfs, buildd, jessie
   ppc64el-unicamp-01:
-    address: 177.220.10.142
+    address: 143.106.167.121
     parents: prokofiev
     hostgroups: computers, hassrvfs, buildd, stretch
   plummer:
-    address: 177.220.10.143
+    address: 143.106.167.122
     parents: prokofiev
     hostgroups: computers, porterbox, hassrvfs, stretch
   # }}}
@@ -1100,9 +1090,6 @@ hostgroups:
   armhf:
     alias: armhf
     private: 1
-  sparc:
-    alias: sparc
-    private: 1
 
   porterbox:
     alias: developer accessible porter machines
@@ -1146,8 +1133,6 @@ hostgroups:
     alias: Dell PowerEdge 1950 hosts
     private: 1
 
-  wheezy:
-    alias: Hosts running wheezy
   jessie:
     alias: Hosts running jessie
   stretch:
@@ -1191,12 +1176,6 @@ hostgroups:
   xinetd-hosts:
     alias: hosts providing services via xinetd
     private: 1
-  postgres91-hosts:
-    alias: hosts running postgres91
-    private: 1
-  postgres94-hosts:
-    alias: hosts running postgres94
-    private: 1
   postgres96-hosts:
     alias: hosts running postgres96
     private: 1
@@ -1228,6 +1207,15 @@ hostgroups:
     alias: hosts with lots and lots of (kernel) processes
   crazymanyprocesses:
     alias: hosts with stupidly lots of processes
+  varnish-hosts:
+    alias: hosts running varnish
+    private: 1
+  haproxy-hosts:
+    alias: hosts running haproxy
+    private: 1
+  haproxy-https-host:
+    alias: "host providing https on the standard port via haproxy"
+    private: 1
 
   no-bacula:
     alias: hosts which are not being backed up with bacula
@@ -1279,9 +1267,6 @@ hostgroups:
   high-RTT:
     alias: machines with high round trip times
     private: 1
-  alioth:
-    alias: machines that just are just awkward
-    private: 1
   #openstack-compute:
   #  alias: nodes that run OpenStack compute
   #  private: 1
@@ -1353,7 +1338,6 @@ services:
     nrpe: "/usr/lib/nagios/plugins/dsa-check-ipv6-default-gw"
     hostgroups: computers
     check_interval: 60
-    excludehostgroups: alioth
   # }}}
   # {{{ ### disk usage
   -
@@ -1483,6 +1467,38 @@ services:
     servicegroups: diskspace
     nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /storage/snapshot-farm-10"
     hosts: lw10
+
+  -
+    name: disk usage on nfs farm 1
+    servicegroups: diskspace
+    nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /auto.dsa/snapshot-1"
+    hosts: lw07
+  -
+    name: disk usage on nfs farm 2
+    servicegroups: diskspace
+    nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /auto.dsa/snapshot-2"
+    hosts: lw07
+  -
+    name: disk usage on nfs farm 3
+    servicegroups: diskspace
+    nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /auto.dsa/snapshot-3"
+    hosts: lw07
+  -
+    name: disk usage on nfs farm 4
+    servicegroups: diskspace
+    nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /auto.dsa/snapshot-4"
+    hosts: lw07
+  -
+    name: disk usage on nfs farm 09
+    servicegroups: diskspace
+    nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /auto.dsa/snapshot-09"
+    hosts: lw07
+  -
+    name: disk usage on nfs farm 10
+    servicegroups: diskspace
+    nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /auto.dsa/snapshot-10"
+    hosts: lw07
+
   -
     name: disk usage on /srv/morgue.debian.org/
     servicegroups: diskspace
@@ -1506,7 +1522,6 @@ services:
     nrpe: "/usr/lib/nagios/plugins/dsa-check-config"
     hostgroups: computers
     check_interval: 60
-    excludehostgroups: alioth
   -
     name: setup - local hostname etc-hosts
     nrpe: 'if getent ahosts `hostname` | grep -q 127.0; then echo "Warning: local hostname resolves to 127/8 address"; exit 1; else echo "OK: Hostname resolves to non-127/8 address."; exit 0; fi'
@@ -1553,16 +1568,10 @@ services:
     name: free memory - percent
     nrpe: "/usr/lib/nagios/plugins/dsa-check-memory -m pct"
     hostgroups: computers
-  -
-    name: process - getty
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C getty -a /sbin/getty"
-    hostgroups: computers
-    excludehosts: zelenka, zandonai
-    excludehostgroups: jessie, stretch
   -
     name: process - getty
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C agetty -a /sbin/agetty"
-    hostgroups: jessie, stretch
+    hostgroups: computers
 
   -
     name: processes - zombies
@@ -1626,7 +1635,6 @@ services:
     servicegroups: backup
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u bacula -C bacula-fd -a '/usr/sbin/bacula-fd -c /etc/bacula/bacula-fd.conf'"
     hostgroups: computers
-    excludehostgroups: alioth
 
   -
     name: network backup status - draghi
@@ -1676,19 +1684,10 @@ services:
     name: puppetized firewall
     nrpe: "/usr/lib/nagios/plugins/dsa-check-file -w -f /etc/ferm/conf.d/defs.conf"
     hostgroups: computers
-  -
-    name: process - ulogd
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C ulogd -a '/usr/sbin/ulogd -d'"
-    hostgroups: computers
-    excludehostgroups: sparc, jessie, stretch
   -
     name: process - ulogd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u ulog -C ulogd -a '/usr/sbin/ulogd --daemon --uid ulog'"
-    hostgroups: jessie, stretch
-  -
-    name: unexpected process - ulogd
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C ulogd"
-    hostgroups: sparc
+    hostgroups: computers
   ####
   -
     name: process - samhain
@@ -1712,34 +1711,26 @@ services:
     excludehostgroups: brokensamhain
   # }}}
   # {{{ logging
-  -
-    name: process - syslog-ng
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C syslog-ng  -a '/sbin/syslog-ng -p /var/run/syslog-ng.pid'"
-    hostgroups: computers
-    excludehostgroups: jessie, stretch
   -
     name: process - syslog-ng
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C syslog-ng  -a '/sbin/syslog-ng -F'"
-    hostgroups: jessie, stretch
+    hostgroups: computers
 
   -
     name: remote logging on lotti
     remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$"
     runfrom: lotti
     hostgroups: computers
-    excludehostgroups: alioth
   -
     name: remote logging on lully
     remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$"
     runfrom: lully
     hostgroups: computers
-    excludehostgroups: alioth
   -
     name: remote logging on loghost-grnet-01
     remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$"
     runfrom: loghost-grnet-01
     hostgroups: computers
-    excludehostgroups: alioth
   # }}}
   # {{{ base service
   -
@@ -1801,7 +1792,7 @@ services:
     name: system time synced
     nrpe: "/usr/lib/nagios/plugins/dsa-check-timedatectl -s"
     hostgroups: computers
-    excludehostgroups: systemd-timesyncd, wheezy
+    excludehostgroups: systemd-timesyncd
     servicegroups: time
   -
     name: system time synced
@@ -1818,7 +1809,7 @@ services:
     name: process - irqbalance
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C irqbalance -a '/usr/sbin/irqbalance'"
     hostgroups: computers
-    excludehosts: harris, smetana
+    excludehosts: harris
   ###
   -
     name: process - cron
@@ -1829,13 +1820,6 @@ services:
     name: process - ud-replicated
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C ud-replicated -a '/usr/bin/python /usr/bin/ud-replicated'"
     hostgroups: computers
-    excludehostgroups: alioth
-  ###
-  -
-    name: process - monit
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C monit -a '/usr/bin/monit -d 300 -I -c /etc/monit/monitrc -s /var/lib/monit/monit.state'"
-    hostgroups: computers
-    excludehostgroups: alioth, jessie, stretch
   ###
   -
     name: MQ connection on rainier
@@ -1845,7 +1829,7 @@ services:
     hostgroups: computers
     check_interval:  60
     retry_interval: 15
-    excludehostgroups: alioth, broken_mq
+    excludehostgroups: broken_mq
   -
     name: MQ connection on rapoport
     servicegroups: MQ
@@ -1854,7 +1838,7 @@ services:
     hostgroups: computers
     check_interval:  60
     retry_interval: 15
-    excludehostgroups: alioth, broken_mq
+    excludehostgroups: broken_mq
   ###
   -
     name: local resolver
@@ -1865,28 +1849,21 @@ services:
     name: process - unbound
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u unbound -C unbound -a '/usr/sbin/unbound'"
     hostgroups: computers
-    excludehostgroups: alioth
-  ###
   -
-    name: process - uptimed
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u daemon -C uptimed -a '/usr/sbin/uptimed'"
+    name: unbound trust anchors
+    nrpe: "/usr/lib/nagios/plugins/dsa-check-unbound-anchors"
     hostgroups: computers
+    check_interval: 60
   ###
   -
-    name: process - udevd
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -p 1 -C udevd -a 'udevd'"
+    name: process - uptimed
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u daemon -C uptimed -a '/usr/sbin/uptimed'"
     hostgroups: computers
-    excludehostgroups: jessie, stretch
   -
     name: process - udevd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -p 1 -C systemd-udevd -a '/lib/systemd/systemd-udevd'"
-    hostgroups: jessie, stretch
+    hostgroups: computers
   ###
-  -
-    name: process - acpid
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C acpid -a '/usr/sbin/acpid'"
-    hostgroups: acpid-hosts
-    excludehostgroups: jessie, stretch
   -
     name: unexpected process - acpid
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C acpid"
@@ -1906,13 +1883,11 @@ services:
   -
     name: process - stunnel4 - puppet-ekeyd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -u stunnel4 -C stunnel4 -a '/usr/bin/stunnel4 /etc/stunnel/puppet-ekeyd.conf'"
-    hostgroups: wheezy, jessie, stretch
-    excludehostgroups: alioth
+    hostgroups: computers
   -
     name: process - stunnel4 - puppet-ekeyd is crazy
     nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-stunnel-sanity"
     hostgroups: computers
-    excludehostgroups: alioth
     excludehosts: czerny, grnet-node01, storace
   # }}}
   # {{{ anti-services
@@ -1973,18 +1948,11 @@ services:
     runfrom: handel
   # }}}
   # {{{ HW health/raid
-  -
-    name: process - mdadm monitor
-    servicegroups: raid
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C mdadm -a '/sbin/mdadm --monitor --pid-file /run/mdadm/monitor.pid --daemonise --scan'"
-    hostgroups: sw-raid
-    excludehostgroups: jessie, stretch
   -
     name: process - mdadm monitor
     servicegroups: raid
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C mdadm -a '/sbin/mdadm --monitor --scan'"
     hostgroups: sw-raid
-    excludehostgroups: wheezy
   -
     name: RAID - sw raid
     servicegroups: raid
@@ -2091,6 +2059,15 @@ services:
     name: mail queue
     nrpe: "/usr/lib/nagios/plugins/check_mailq -M exim -w 1000 -c 2000"
     hostgroups: heavy-exim
+  -
+    name: process - fail2ban
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -C fail2ban-server"
+    hostgroups: heavy-exim, heavy-postfix
+  -
+    name: unwanted process - fail2ban
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C fail2ban-server"
+    hostgroups: computers
+    excludehostgroups: heavy-exim, heavy-postfix
   # }}}
   # {{{ clamav
   -
@@ -2102,16 +2079,10 @@ services:
     nrpe: "/usr/lib/nagios/plugins/check_clamd -H /var/run/clamav/clamd.ctl"
     hostgroups: heavy-exim, heavy-postfix
     depends: process - clamav - clamd
-  -
-    name: process - clamav - freshclam
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u clamav -C freshclam -a '/usr/bin/freshclam -d --quiet'"
-    hostgroups: heavy-exim, heavy-postfix
-    excludehostgroups: jessie, stretch
   -
     name: process - clamav - freshclam
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u clamav -C freshclam -a '/usr/bin/freshclam -d --foreground=true'"
     hostgroups: heavy-exim, heavy-postfix
-    excludehostgroups: wheezy
   -
     name: unwanted process - clamav
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C clamd"
@@ -2124,18 +2095,11 @@ services:
     excludehostgroups: heavy-exim, heavy-postfix
   # }}}
   # {{{ anti-spam
-  -
-    name: process - spamd - master
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd --create-prefs --max-children 5 --helper-home-dir -d --pidfile=/var/run/spamd.pid'"
-    hostgroups: spamd
-    excludehosts: picconi
-    excludehostgroups: jessie, stretch
   -
     name: process - spamd - master
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd -d --pidfile=/var/run/spamd.pid --create-prefs --max-children 5 --helper-home-dir'"
     hostgroups: spamd
     excludehosts: picconi
-    excludehostgroups: wheezy
   -
     name: process - spamd - master
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd -d --pidfile=/var/run/spamd.pid --create-prefs --max-children 20 --min-spare=5 --helper-home-dir'"
@@ -2164,16 +2128,10 @@ services:
     hostgroups: computers
 
   ###
-  -
-    name: process - postgrey
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgrey -a '/usr/sbin/postgrey --pidfile=/var/run/postgrey.pid --daemonize --unix=/var/run/postgrey/socket --retry-window=4 --auto-whitelist-clients=10 --exim'"
-    hostgroups: heavy-exim
-    excludehostgroups: jessie, stretch
   -
     name: process - postgrey
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgrey -a 'postgrey --pidfile=/var/run/postgrey.pid --daemonize --unix=/var/run/postgrey/socket --retry-window=4 --auto-whitelist-clients=10 --exim'"
     hostgroups: heavy-exim
-    excludehostgroups: wheezy
   -
     name: process - postgrey
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgrey -a 'postgrey --pidfile=/var/run/postgrey.pid --daemonize --inet=127.0.0.1:60000'"
@@ -2204,23 +2162,23 @@ services:
   -
     name: process - weightd - master
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (master)'"
-    hostgroups: heavy-postfix, alioth
+    hostgroups: heavy-postfix
   -
     name: process - weightd - cache
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (cache)'"
-    hostgroups: heavy-postfix, alioth
+    hostgroups: heavy-postfix
     depends: process - weightd - master
   -
     name: process - weightd - child
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:50 -c 1: -u polw -a 'policyd-weight (child)'"
-    hostgroups: heavy-postfix, alioth
+    hostgroups: heavy-postfix
     depends: process - weightd - master
   ###
   -
     name: unwanted process - policyd-weight
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C policyd-weight"
     hostgroups: computers
-    excludehostgroups: heavy-postfix, alioth
+    excludehostgroups: heavy-postfix
   # }}}
   # {{{ postfix
   ###
@@ -2372,15 +2330,52 @@ services:
   -
     name: network service - https cert
     check: dsa_check_cert!443
-    hostgroups: apache-https, https-service
+    hostgroups: apache-https, https-service, haproxy-https-host
     depends: network service - https
     check_interval: 60
   -
     name: unwanted network service - https
     check: dsa_check_port_closed!443
     hostgroups: apache2-hosts
-    excludehostgroups: apache-https
+    excludehostgroups: apache-https, haproxy-https-host
     check_interval: 60
+
+ ###
+  -
+    name: process - haproxy - master
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -a '/usr/sbin/haproxy-systemd-wrapper'"
+    hostgroups: haproxy-hosts
+  -
+    name: process - haproxy - worker
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1:15 -u haproxy -a '/usr/sbin/haproxy '"
+    hostgroups: haproxy-hosts
+    depends: process - haproxy - master
+  -
+    name: network service - https
+    check: check_https
+    hostgroups: haproxy-https-host
+    depends: "process - haproxy - master"
+    check_interval: 120
+
+  -
+    name: unwanted process - haproxy
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C haproxy"
+    hostgroups: computers
+    excludehostgroups: haproxy-hosts
+
+ ###
+  -
+    name: process - varnish
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:2 -c 1:15 -u vcache -a '/usr/sbin/varnishd -j unix,user=vcache -F -a '"
+    hostgroups: varnish-hosts
+    excludehostgroups: jessie
+  -
+    name: unwanted process - varnish
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C varnishd"
+    hostgroups: computers
+    excludehostgroups: varnish-hosts
+
+
   # }}}
   # {{{ FTP
   -
@@ -2393,19 +2388,11 @@ services:
     name: unwanted process - postgresql
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C postgres"
     hostgroups: computers
-    excludehostgroups: postgres91-hosts, postgres94-hosts, postgres96-hosts
+    excludehostgroups: postgres96-hosts
   -
     name: unwanted process - postgresql 9.0
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C postgres -a '9.0/bin/postgres'"
     hostgroups: computers
-  -
-    name: process - postgresql91 - master
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.1/bin/postgres'"
-    hostgroups: postgres91-hosts
-  -
-    name: process - postgresql94 - master
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.4/bin/postgres'"
-    hostgroups: postgres94-hosts
   -
     name: process - postgresql96 - master
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.6/bin/postgres'"
@@ -2828,7 +2815,7 @@ services:
   -
     name: system - all services running
     nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-systemd-services"
-    hostgroups: jessie, stretch
+    hostgroups: computers
   ###
   -
     name: process - slapd
@@ -2895,9 +2882,23 @@ services:
     name: puppet - agent check
     nrpe: "/usr/lib/nagios/plugins/dsa-check-statusfile /var/cache/dsa/nagios/puppet-agent"
     hostgroups: computers
-    excludehosts: moszumanska
     check_interval:  60
     retry_interval: 15
+  ####
+  -
+    name: ping peer on mgmt network
+    nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.184.12 -w 50,10% -c 200,30%"
+    hosts: conova-node01
+    check_interval: 5
+    max_check_attempts: 4
+    retry_interval: 1
+  -
+    name: ping peer on mgmt network
+    nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.184.11 -w 50,10% -c 200,30%"
+    hosts: conova-node02
+    check_interval: 5
+    max_check_attempts: 4
+    retry_interval: 1
   # }}}
 # }}}