Fix typos in previous commits
[mirror/dsa-nagios.git] / config / nagios-master.cfg
index e9719f7..65f096e 100644 (file)
@@ -402,10 +402,6 @@ servers:
     address: 5.153.231.20
     parents: ganeti-bytemark
     hostgroups: computers, general, kvmdomains, stretch, nfs-client, autofs, systemd-timesyncd
-  moszumanska:
-    address: 5.153.231.21
-    parents: ganeti-bytemark
-    hostgroups: secondary-IPs
   dillon:
     address: 5.153.231.22
     parents: ganeti-bytemark
@@ -515,11 +511,11 @@ servers:
   conova-node01:
     address: 217.196.149.227
     parents: gw-conova
-    hostgroups: computers, stretch, service, sw-raid
+    hostgroups: computers, stretch, service, sw-raid, drbd-hosts
   conova-node02:
     address: 217.196.149.228
     parents: gw-conova
-    hostgroups: computers, stretch, service, sw-raid
+    hostgroups: computers, stretch, service, sw-raid, drbd-hosts
   ganeti-conova:
     address: 217.196.149.235
     parents: gw-conova
@@ -528,22 +524,6 @@ servers:
     address: 217.196.149.229
     parents: gw-conova
     hostgroups: computers, stretch, service, apache2-hosts
-  mirror-conova-debian:
-    address: 217.196.149.232
-    hostgroups: secondary-IPs
-    parents: mirror-conova
-  mirror-conova-security:
-    address: 217.196.149.233
-    hostgroups: secondary-IPs, rsyncd-hosts, security_mirror
-    parents: mirror-conova
-  mirror-conova-archive:
-    address: 217.196.149.234
-    hostgroups: secondary-IPs, rsyncd-hosts
-    parents: mirror-conova
-  mirror-conova-syncproxy4-eu:
-    address: 217.196.149.237
-    hostgroups: secondary-IPs, rsyncd-hosts, https-service
-    parents: mirror-conova
 
   arm-conova-01:
     address: 217.196.149.230
@@ -557,6 +537,27 @@ servers:
     address: 217.196.149.236
     parents: ganeti-conova
     hostgroups: computers, hassrvfs, porterbox, stretch
+
+  schmelzer:
+    address: 185.69.161.161
+    parents: gw-conova
+    hostgroups: computers, service, stretch, r540, manyprocesses, apache2-hosts, apache-https, systemd-timesyncd
+  schmelzer-debian:
+    address: 217.196.149.232
+    hostgroups: secondary-IPs
+    parents: schmelzer
+  schmelzer-security:
+    address: 217.196.149.233
+    hostgroups: secondary-IPs, rsyncd-hosts, security_mirror
+    parents: schmelzer
+  schmelzer-archive:
+    address: 217.196.149.234
+    hostgroups: secondary-IPs, rsyncd-hosts
+    parents: schmelzer
+  schmelzer-syncproxy4-eu:
+    address: 217.196.149.237
+    hostgroups: secondary-IPs, rsyncd-hosts, https-service
+    parents: schmelzer
   # }}}
   # {{{ gw-csail
   csail-node01:
@@ -579,7 +580,7 @@ servers:
   x86-csail-01:
     address: 128.31.0.50
     parents: ganeti-csail
-    hostgroups: computers, buildd, hassrvfs, kvmdomains, stretch, systemd-timesyncd
+    hostgroups: computers, buildd, hassrvfs, kvmdomains, buster, systemd-timesyncd
   x86-csail-02:
     address: 128.31.0.68
     parents: ganeti-csail
@@ -720,11 +721,19 @@ servers:
   czerny:
     address: 82.195.75.109
     parents: gw-manda
-    hostgroups: computers, service, dl380, acpid-hosts, stretch, drbd-hosts, manyprocesses
+    hostgroups: computers, service, dl380, acpid-hosts, stretch, manyprocesses
   clementi:
     address: 82.195.75.103
     parents: gw-manda
-    hostgroups: computers, service, dl380, acpid-hosts, stretch, drbd-hosts, manyprocesses
+    hostgroups: computers, service, dl380, acpid-hosts, stretch, manyprocesses
+  manda-node03:
+    address: 82.195.75.69
+    parents: gw-manda
+    hostgroups: computers, service, stretch, r540, drbd-hosts, manyprocesses
+  manda-node04:
+    address: 82.195.75.70
+    parents: gw-manda
+    hostgroups: computers, service, stretch, r540, drbd-hosts, manyprocesses
   bendel:
     address: 82.195.75.100
     parents: ganeti3
@@ -737,14 +746,10 @@ servers:
     address: 82.195.75.98
     parents: ganeti3
     hostgroups: computers, service, kvmdomains, stretch, hassrvfs, apache2-hosts, apache-https, heavy-exim
-  lully:
-    address: 82.195.75.99
-    parents: ganeti3
-    hostgroups: computers, service, hasbootfs, kvmdomains, stretch, hasvarlogfs
   draghi:
     address: 82.195.75.106
     parents: ganeti3
-    hostgroups: computers, service, hasbootfs, hassrvfs, apache2-hosts, spamd, heavy-exim, kvmdomains, xinetd-hosts, apache-https, stretch
+    hostgroups: computers, service, hassrvfs, apache2-hosts, spamd, heavy-exim, kvmdomains, xinetd-hosts, apache-https, stretch
   geo1:
     address: 82.195.75.105
     parents: ganeti3
@@ -752,7 +757,7 @@ servers:
   handel:
     address: 82.195.75.104
     parents: ganeti3
-    hostgroups: computers, service, kvmdomains, apache2-hosts, stretch, postgres96-hosts
+    hostgroups: computers, service, kvmdomains, apache2-hosts, stretch, postgres96-hosts, hassrvfs
   kaufmann:
     address: 82.195.75.107
     parents: ganeti3
@@ -822,13 +827,22 @@ servers:
     parents: byrd
     hostgroups: computers, service, kvmdomains, stretch, apache2-hosts, hassrvfs, rsyncd-hosts, apache-https
 
+  pijper:
+    address: 140.211.166.194
+    parents: gw-osuosl
+    hostgroups: computers, stretch, service, manyprocesses
+  loghost-osuosl-01:
+    address: 140.211.166.202
+    parents: pijper
+    hostgroups: computers, service, kvmdomains, stretch, hassrvfs, systemd-timesyncd
+
   pieta:
     address: 140.211.166.195
     parents: gw-osuosl
     hostgroups: computers, stretch, service, manyprocesses
   ppc64el-osuosl-01:
     address: 140.211.166.196
-    parents: pieta
+    parents: pijper
     hostgroups: computers, hassrvfs, buildd, stretch
   # }}}
   # {{{ gw-sanger
@@ -843,7 +857,7 @@ servers:
   sibelius:
     address: 193.62.202.28
     parents: gw-sanger
-    hostgroups: computers, postgres94-hosts, service, apache2-hosts, sw-raid, jessie, rsyncd-hosts, hasvarlogfs, multipath-hosts, nfs-server, varnish-hosts
+    hostgroups: computers, service, apache2-hosts, sw-raid, jessie, rsyncd-hosts, hasvarlogfs, multipath-hosts, nfs-server, varnish-hosts
     contacts: tjrc1, dave
   # }}}
   # {{{ gw-scanplus
@@ -983,14 +997,14 @@ servers:
     address: 209.87.16.46
     parents: ubc-gateway
     hostgroups: computers, service, kvmdomains, stretch, systemd-timesyncd, apache2-hosts, apache-https, broken_https_default_vhost
-  kantuser:
-    address: 209.87.16.47
-    parents: ubc-gateway
-    hostgroups: computers, service, kvmdomains, stretch, systemd-timesyncd, apache2-hosts
   grabbe:
     address: 209.87.16.48
     parents: ubc-gateway
     hostgroups: computers, service, kvmdomains, stretch, systemd-timesyncd, apache2-hosts, apache-https
+  trabaci:
+    address: 209.87.16.49
+    parents: ubc-gateway
+    hostgroups: computers, service, kvmdomains, stretch, hassrvfs, systemd-timesyncd
   # }}}
   # {{{ gw-umn
   #saens:
@@ -1045,6 +1059,10 @@ servers:
     address: 130.89.148.14
     parents: klecker
     hostgroups: secondary-IPs
+  smit:
+    address: 130.89.148.78
+    parents: gw-utwente
+    hostgroups: computers, service, stretch, r540, manyprocesses, incomingmailrelayed2025
   # }}}
   # {{{ gw-ynic
   henze:
@@ -1132,11 +1150,16 @@ hostgroups:
   pe1950:
     alias: Dell PowerEdge 1950 hosts
     private: 1
+  r540:
+    alias: Dell PowerEdge R540 hosts
+    private: 1
 
   jessie:
     alias: Hosts running jessie
   stretch:
     alias: Hosts running stretch
+  buster:
+    alias: Hosts running buster
 
   kvmdomains:
     alias: Hosts that are KVM domains
@@ -1176,9 +1199,6 @@ hostgroups:
   xinetd-hosts:
     alias: hosts providing services via xinetd
     private: 1
-  postgres94-hosts:
-    alias: hosts running postgres94
-    private: 1
   postgres96-hosts:
     alias: hosts running postgres96
     private: 1
@@ -1270,9 +1290,6 @@ hostgroups:
   high-RTT:
     alias: machines with high round trip times
     private: 1
-  alioth:
-    alias: machines that just are just awkward
-    private: 1
   #openstack-compute:
   #  alias: nodes that run OpenStack compute
   #  private: 1
@@ -1344,7 +1361,6 @@ services:
     nrpe: "/usr/lib/nagios/plugins/dsa-check-ipv6-default-gw"
     hostgroups: computers
     check_interval: 60
-    excludehostgroups: alioth
   # }}}
   # {{{ ### disk usage
   -
@@ -1529,7 +1545,6 @@ services:
     nrpe: "/usr/lib/nagios/plugins/dsa-check-config"
     hostgroups: computers
     check_interval: 60
-    excludehostgroups: alioth
   -
     name: setup - local hostname etc-hosts
     nrpe: 'if getent ahosts `hostname` | grep -q 127.0; then echo "Warning: local hostname resolves to 127/8 address"; exit 1; else echo "OK: Hostname resolves to non-127/8 address."; exit 0; fi'
@@ -1643,7 +1658,6 @@ services:
     servicegroups: backup
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u bacula -C bacula-fd -a '/usr/sbin/bacula-fd -c /etc/bacula/bacula-fd.conf'"
     hostgroups: computers
-    excludehostgroups: alioth
 
   -
     name: network backup status - draghi
@@ -1730,19 +1744,16 @@ services:
     remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$"
     runfrom: lotti
     hostgroups: computers
-    excludehostgroups: alioth
   -
-    name: remote logging on lully
+    name: remote logging on loghost-grnet-01
     remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$"
-    runfrom: lully
+    runfrom: loghost-grnet-01
     hostgroups: computers
-    excludehostgroups: alioth
   -
-    name: remote logging on loghost-grnet-01
+    name: remote logging on loghost-osuosl-01
     remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$"
-    runfrom: loghost-grnet-01
+    runfrom: loghost-osuosl-01
     hostgroups: computers
-    excludehostgroups: alioth
   # }}}
   # {{{ base service
   -
@@ -1767,13 +1778,13 @@ services:
     name: process - nrpe
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:25 -c 1: -u nagios -C nrpe -a '/usr/sbin/nrpe -c /etc/nagios/nrpe.cfg -d'"
     hostgroups: computers
-    excludehostgroups: stretch
+    excludehostgroups: stretch, buster
     max_check_attempts: -1
     depends: network service - nrpe
   -
     name: process - nrpe
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:25 -c 1: -u nagios -C nrpe -a '/usr/sbin/nrpe -c /etc/nagios/nrpe.cfg -f'"
-    hostgroups: stretch
+    hostgroups: stretch, buster
     max_check_attempts: -1
     depends: network service - nrpe
   ###
@@ -1832,7 +1843,6 @@ services:
     name: process - ud-replicated
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C ud-replicated -a '/usr/bin/python /usr/bin/ud-replicated'"
     hostgroups: computers
-    excludehostgroups: alioth
   ###
   -
     name: MQ connection on rainier
@@ -1842,7 +1852,7 @@ services:
     hostgroups: computers
     check_interval:  60
     retry_interval: 15
-    excludehostgroups: alioth, broken_mq
+    excludehostgroups: broken_mq
   -
     name: MQ connection on rapoport
     servicegroups: MQ
@@ -1851,7 +1861,7 @@ services:
     hostgroups: computers
     check_interval:  60
     retry_interval: 15
-    excludehostgroups: alioth, broken_mq
+    excludehostgroups: broken_mq
   ###
   -
     name: local resolver
@@ -1862,7 +1872,11 @@ services:
     name: process - unbound
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u unbound -C unbound -a '/usr/sbin/unbound'"
     hostgroups: computers
-    excludehostgroups: alioth
+  -
+    name: unbound trust anchors
+    nrpe: "/usr/lib/nagios/plugins/dsa-check-unbound-anchors"
+    hostgroups: computers
+    check_interval: 60
   ###
   -
     name: process - uptimed
@@ -1893,12 +1907,10 @@ services:
     name: process - stunnel4 - puppet-ekeyd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -u stunnel4 -C stunnel4 -a '/usr/bin/stunnel4 /etc/stunnel/puppet-ekeyd.conf'"
     hostgroups: computers
-    excludehostgroups: alioth
   -
     name: process - stunnel4 - puppet-ekeyd is crazy
     nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-stunnel-sanity"
     hostgroups: computers
-    excludehostgroups: alioth
     excludehosts: czerny, grnet-node01, storace
   # }}}
   # {{{ anti-services
@@ -1923,7 +1935,7 @@ services:
   -
     name: unwanted process - rpc.statd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C rpc.statd"
-    hostgroups: stretch
+    hostgroups: stretch, buster
     excludehosts: storace
   -
     name: unwanted process - inetd
@@ -2041,7 +2053,12 @@ services:
   -
     name: HW - OpenManage status
     nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-openmanage"
-    hostgroups: pe1950
+    hostgroups: pe1950, r540
+    excludehosts: wieck, schumann
+  -
+    name: HW - OpenManage status
+    nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-openmanage -b bp=0 -b bat_charge=0:0"
+    hosts: wieck, schumann
   # }}}
   # }}}
   # {{{ ### mail stuff
@@ -2070,6 +2087,15 @@ services:
     name: mail queue
     nrpe: "/usr/lib/nagios/plugins/check_mailq -M exim -w 1000 -c 2000"
     hostgroups: heavy-exim
+  -
+    name: process - fail2ban
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -C fail2ban-server"
+    hostgroups: heavy-exim, heavy-postfix
+  -
+    name: unwanted process - fail2ban
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C fail2ban-server"
+    hostgroups: computers
+    excludehostgroups: heavy-exim, heavy-postfix
   # }}}
   # {{{ clamav
   -
@@ -2164,23 +2190,23 @@ services:
   -
     name: process - weightd - master
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (master)'"
-    hostgroups: heavy-postfix, alioth
+    hostgroups: heavy-postfix
   -
     name: process - weightd - cache
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (cache)'"
-    hostgroups: heavy-postfix, alioth
+    hostgroups: heavy-postfix
     depends: process - weightd - master
   -
     name: process - weightd - child
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:50 -c 1: -u polw -a 'policyd-weight (child)'"
-    hostgroups: heavy-postfix, alioth
+    hostgroups: heavy-postfix
     depends: process - weightd - master
   ###
   -
     name: unwanted process - policyd-weight
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C policyd-weight"
     hostgroups: computers
-    excludehostgroups: heavy-postfix, alioth
+    excludehostgroups: heavy-postfix
   # }}}
   # {{{ postfix
   ###
@@ -2390,15 +2416,11 @@ services:
     name: unwanted process - postgresql
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C postgres"
     hostgroups: computers
-    excludehostgroups: postgres94-hosts, postgres96-hosts
+    excludehostgroups: postgres96-hosts
   -
     name: unwanted process - postgresql 9.0
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C postgres -a '9.0/bin/postgres'"
     hostgroups: computers
-  -
-    name: process - postgresql94 - master
-    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.4/bin/postgres'"
-    hostgroups: postgres94-hosts
   -
     name: process - postgresql96 - master
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.6/bin/postgres'"
@@ -2435,7 +2457,7 @@ services:
     name: process - statd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u statd -C rpc.statd -a '/sbin/rpc.statd'"
     hostgroups: nfs-client, nfs-server
-    excludehostgroups: stretch
+    excludehostgroups: stretch, buster
   -
     name: process - nfsd
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u root -C nfsd -a '[nfsd]'"
@@ -2681,6 +2703,16 @@ services:
     check: "dsa_check_staticsync!miniconf10.debconf.org"
     hosts: global
     servicegroups: mirror
+  -
+    name: mirror static sync - wiki
+    check: "dsa_check_staticsync!wiki.debconf.org"
+    hosts: global
+    servicegroups: mirror
+  -
+    name: mirror static sync - www
+    check: "dsa_check_staticsync!www.debconf.org"
+    hosts: global
+    servicegroups: mirror
   # }}}
   # {{{ DNS
   -
@@ -2877,6 +2909,14 @@ services:
     check_interval: 60
     hosts: vogler
   ####
+  -
+    name: Debian SMTP CA
+    nrpe: "sudo -u puppet /usr/lib/nagios/plugins/dsa-check-cert-expire /srv/puppet.debian.org/ca/ca.crt"
+    hosts: handel
+    check_interval: 60
+    max_check_attempts: 2
+    retry_interval: 5
+  ####
   -
     name: puppetmaster cert
     nrpe: "sudo -u puppet /usr/lib/nagios/plugins/dsa-check-cert-expire /var/lib/puppet/ssl/certs/ca.pem"
@@ -2890,6 +2930,37 @@ services:
     hostgroups: computers
     check_interval:  60
     retry_interval: 15
+  ####
+  -
+    name: ping peer on mgmt network
+    nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.184.12 -w 50,10% -c 200,30%"
+    hosts: conova-node01
+    check_interval: 5
+    max_check_attempts: 4
+    retry_interval: 1
+  -
+    name: ping peer on mgmt network
+    nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.184.11 -w 50,10% -c 200,30%"
+    hosts: conova-node02
+    check_interval: 5
+    max_check_attempts: 4
+    retry_interval: 1
+
+  -
+    name: ping peer on mgmt network
+    nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.182.14 -w 50,10% -c 200,30%"
+    hosts: manda-node03
+    check_interval: 5
+    max_check_attempts: 4
+    retry_interval: 1
+  -
+    name: ping peer on mgmt network
+    nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.182.13 -w 50,10% -c 200,30%"
+    hosts: manda-node04
+    check_interval: 5
+    max_check_attempts: 4
+    retry_interval: 1
+  # }}}
   # }}}
 # }}}