X-Git-Url: https://git.adam-barratt.org.uk/?a=blobdiff_plain;f=config%2Fnagios-master.cfg;h=e43e16dddf638d8bd429e5974e1a9e4743a27a52;hb=921d5435e35bb7a8ffbfdb2ab2e751f17e6384b9;hp=aefb247ff0ef9d26f71b3ba4fd5022f57216af00;hpb=dd34b6e1a1aa98d58b3f98e9612f2d381202252f;p=mirror%2Fdsa-nagios.git diff --git a/config/nagios-master.cfg b/config/nagios-master.cfg index aefb247..e43e16d 100644 --- a/config/nagios-master.cfg +++ b/config/nagios-master.cfg @@ -139,7 +139,7 @@ servers: parents: gw-ubcece hostgroups: layer3-infrastructure gw-unicamp: - address: 177.220.10.129 + address: 143.106.167.113 parents: gw-ubcece hostgroups: layer3-infrastructure gw-utwente: @@ -216,7 +216,7 @@ servers: mipsel-aql-01: address: 141.170.6.152 parents: gw-aql - hostgroups: computers, buildd, stretch, hassrvfs, hasbootfs, sw-raid + hostgroups: computers, buildd, buster, hassrvfs, hasbootfs, sw-raid mipsel-aql-02: address: 141.170.6.153 parents: gw-aql @@ -238,15 +238,15 @@ servers: arm-arm-01: address: 217.140.96.58 parents: gw-arm - hostgroups: computers, hassrvfs, buildd, stretch, broken_mq + hostgroups: computers, hassrvfs, buildd, stretch, broken_mq, sw-raid arm-arm-03: address: 217.140.96.60 parents: gw-arm - hostgroups: computers, hassrvfs, buildd, stretch, broken_mq + hostgroups: computers, hassrvfs, buildd, stretch, broken_mq, sw-raid arm-arm-04: address: 217.140.96.61 parents: gw-arm - hostgroups: computers, hassrvfs, buildd, stretch, broken_mq + hostgroups: computers, hassrvfs, buildd, buster, broken_mq, sw-raid harris: address: 217.140.96.66 parents: gw-arm @@ -254,7 +254,7 @@ servers: hartmann: address: 217.140.96.67 parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, stretch, armhf, buildd, broken_mq + hostgroups: computers, hasbootfs, hassrvfs, buster, armhf, buildd, broken_mq hoiby: address: 217.140.96.71 parents: gw-arm @@ -402,12 +402,6 @@ servers: address: 5.153.231.20 parents: ganeti-bytemark hostgroups: computers, general, kvmdomains, stretch, nfs-client, autofs, systemd-timesyncd - moszumanska: - address: 5.153.231.21 - parents: ganeti-bytemark - contact_groups: alioth-admins - hostgroups: computers, general, wheezy, postgres91-hosts, apache2-hosts, acpid-hosts, apache-https, brokensamhain, no-bacula, bind9-hosts, xinetd-hosts, alioth, heavy-exim, spamd - no-servicegroups: true dillon: address: 5.153.231.22 parents: ganeti-bytemark @@ -455,7 +449,7 @@ servers: lindsay: address: 5.153.231.36 parents: ganeti-bytemark - hostgroups: computers, service, kvmdomains, stretch, autofs, nfs-client, systemd-timesyncd + hostgroups: computers, service, kvmdomains, buster, autofs, nfs-client, systemd-timesyncd sor: address: 5.153.231.38 parents: ganeti-bytemark @@ -517,35 +511,15 @@ servers: conova-node01: address: 217.196.149.227 parents: gw-conova - hostgroups: computers, stretch, service, sw-raid + hostgroups: computers, stretch, service, sw-raid, drbd-hosts conova-node02: address: 217.196.149.228 parents: gw-conova - hostgroups: computers, stretch, service, sw-raid + hostgroups: computers, stretch, service, sw-raid, drbd-hosts ganeti-conova: address: 217.196.149.235 parents: gw-conova hostgroups: notacomputer - mirror-conova: - address: 217.196.149.229 - parents: gw-conova - hostgroups: computers, stretch, service, apache2-hosts - mirror-conova-debian: - address: 217.196.149.232 - hostgroups: secondary-IPs - parents: mirror-conova - mirror-conova-security: - address: 217.196.149.233 - hostgroups: secondary-IPs, rsyncd-hosts, security_mirror - parents: mirror-conova - mirror-conova-archive: - address: 217.196.149.234 - hostgroups: secondary-IPs, rsyncd-hosts - parents: mirror-conova - mirror-conova-syncproxy4-eu: - address: 217.196.149.237 - hostgroups: secondary-IPs, rsyncd-hosts, https-service - parents: mirror-conova arm-conova-01: address: 217.196.149.230 @@ -559,6 +533,27 @@ servers: address: 217.196.149.236 parents: ganeti-conova hostgroups: computers, hassrvfs, porterbox, stretch + + schmelzer: + address: 185.69.161.161 + parents: gw-conova + hostgroups: computers, service, stretch, r540, manyprocesses, apache2-hosts, apache-https, systemd-timesyncd + schmelzer-debian: + address: 217.196.149.232 + hostgroups: secondary-IPs + parents: schmelzer + schmelzer-security: + address: 217.196.149.233 + hostgroups: secondary-IPs, rsyncd-hosts, security_mirror + parents: schmelzer + schmelzer-archive: + address: 217.196.149.234 + hostgroups: secondary-IPs, rsyncd-hosts + parents: schmelzer + schmelzer-syncproxy4-eu: + address: 217.196.149.237 + hostgroups: secondary-IPs, rsyncd-hosts, https-service + parents: schmelzer # }}} # {{{ gw-csail csail-node01: @@ -581,7 +576,7 @@ servers: x86-csail-01: address: 128.31.0.50 parents: ganeti-csail - hostgroups: computers, buildd, hassrvfs, kvmdomains, stretch, systemd-timesyncd + hostgroups: computers, buildd, hassrvfs, kvmdomains, buster, systemd-timesyncd x86-csail-02: address: 128.31.0.68 parents: ganeti-csail @@ -722,67 +717,71 @@ servers: czerny: address: 82.195.75.109 parents: gw-manda - hostgroups: computers, service, dl380, acpid-hosts, stretch, drbd-hosts, manyprocesses + hostgroups: computers, service, dl380, acpid-hosts, stretch, manyprocesses clementi: address: 82.195.75.103 parents: gw-manda - hostgroups: computers, service, dl380, acpid-hosts, stretch, drbd-hosts, manyprocesses + hostgroups: computers, service, dl380, acpid-hosts, stretch, manyprocesses + manda-node03: + address: 82.195.75.69 + parents: gw-manda + hostgroups: computers, service, stretch, r540, drbd-hosts, manyprocesses + manda-node04: + address: 82.195.75.70 + parents: gw-manda + hostgroups: computers, service, stretch, r540, drbd-hosts, manyprocesses bendel: address: 82.195.75.100 - parents: ganeti3 - hostgroups: computers, service, hasbootfs, kvmdomains, hassrvfs, apache2-hosts, stretch, postfix-hosts, heavy-postfix, apache-https, amavis-hosts, hasvarlogfs + parents: ganeti-manda + hostgroups: computers, service, hasbootfs, kvmdomains, hassrvfs, apache2-hosts, stretch, postfix-hosts, heavy-postfix, apache-https, amavis-hosts, hasvarlogfs, systemd-timesyncd master: address: 82.195.75.110 - parents: ganeti3 - hostgroups: computers, service, kvmdomains, stretch, hassrvfs, spamd, heavy-exim, highload + parents: ganeti-manda + hostgroups: computers, service, kvmdomains, stretch, hassrvfs, spamd, heavy-exim, highload, systemd-timesyncd vento: address: 82.195.75.98 - parents: ganeti3 - hostgroups: computers, service, kvmdomains, stretch, hassrvfs, apache2-hosts, apache-https, heavy-exim - lully: - address: 82.195.75.99 - parents: ganeti3 - hostgroups: computers, service, hasbootfs, kvmdomains, stretch, hasvarlogfs + parents: ganeti-manda + hostgroups: computers, service, kvmdomains, stretch, hassrvfs, apache2-hosts, apache-https, heavy-exim, systemd-timesyncd draghi: address: 82.195.75.106 - parents: ganeti3 - hostgroups: computers, service, hasbootfs, hassrvfs, apache2-hosts, spamd, heavy-exim, kvmdomains, xinetd-hosts, apache-https, stretch + parents: ganeti-manda + hostgroups: computers, service, hassrvfs, apache2-hosts, spamd, heavy-exim, kvmdomains, xinetd-hosts, apache-https, stretch, systemd-timesyncd geo1: address: 82.195.75.105 - parents: ganeti3 - hostgroups: computers, service, bind9-hosts, kvmdomains, stretch + parents: ganeti-manda + hostgroups: computers, service, bind9-hosts, kvmdomains, stretch, systemd-timesyncd handel: address: 82.195.75.104 - parents: ganeti3 - hostgroups: computers, service, kvmdomains, apache2-hosts, stretch, postgres96-hosts + parents: ganeti-manda + hostgroups: computers, service, kvmdomains, apache2-hosts, stretch, postgres96-hosts, hassrvfs, systemd-timesyncd kaufmann: address: 82.195.75.107 - parents: ganeti3 - hostgroups: computers, service, apache2-hosts, rsyncd-hosts, kvmdomains, stretch, apache-https - ganeti3: - address: 82.195.75.111 + parents: ganeti-manda + hostgroups: computers, service, apache2-hosts, rsyncd-hosts, kvmdomains, stretch, apache-https, systemd-timesyncd + ganeti-manda: + address: 82.195.75.71 parents: gw-manda hostgroups: notacomputer wilder: address: 82.195.75.112 - parents: ganeti3 - hostgroups: computers, service, hassrvfs, apache2-hosts, kvmdomains, stretch, apache-https, rsyncd-hosts + parents: ganeti-manda + hostgroups: computers, service, hassrvfs, apache2-hosts, kvmdomains, stretch, apache-https, rsyncd-hosts, systemd-timesyncd mailly: address: 82.195.75.114 - parents: ganeti3 - hostgroups: computers, service, kvmdomains, stretch, spamd, heavy-exim, mail-relay + parents: ganeti-manda + hostgroups: computers, service, kvmdomains, stretch, spamd, heavy-exim, mail-relay, systemd-timesyncd denis: address: 82.195.75.91 - parents: ganeti3 - hostgroups: computers, service, kvmdomains, stretch, bind9-hosts + parents: ganeti-manda + hostgroups: computers, service, kvmdomains, stretch, bind9-hosts, systemd-timesyncd vogler: address: 82.195.75.92 - parents: ganeti3 - hostgroups: computers, service, kvmdomains, stretch + parents: ganeti-manda + hostgroups: computers, service, kvmdomains, stretch, systemd-timesyncd wolkenstein: address: 82.195.75.65 - parents: ganeti3 - hostgroups: computers, hasbootfs, hassrvfs, kvmdomains, service, xinetd-hosts, apache2-hosts, stretch, apache-https + parents: ganeti-manda + hostgroups: computers, hasbootfs, hassrvfs, kvmdomains, service, xinetd-hosts, apache2-hosts, stretch, apache-https, systemd-timesyncd mips-manda-01: address: 82.195.75.66 parents: gw-manda @@ -801,18 +800,18 @@ servers: hostgroups: computers, buildd, stretch, hassrvfs seger: address: 82.195.75.93 - parents: ganeti3 - hostgroups: computers, service, apache2-hosts, hassrvfs, rsyncd-hosts, kvmdomains, apache-https, postgres96-hosts, stretch + parents: ganeti-manda + hostgroups: computers, service, apache2-hosts, hassrvfs, rsyncd-hosts, kvmdomains, apache-https, postgres96-hosts, stretch, systemd-timesyncd suchon: address: 82.195.75.68 - parents: ganeti3 + parents: ganeti-manda hostgroups: computers, service, kvmdomains, stretch, uploadqueue, queued, systemd-timesyncd # }}} # {{{ gw-marist zani: address: 148.100.88.22 parents: gw-marist - hostgroups: computers, pybuildd, hassrvfs, stretch, incomingmailrelayed + hostgroups: computers, pybuildd, hassrvfs, buster, incomingmailrelayed # }}} # {{{ gw-osuosl byrd: @@ -824,22 +823,23 @@ servers: parents: byrd hostgroups: computers, service, kvmdomains, stretch, apache2-hosts, hassrvfs, rsyncd-hosts, apache-https + pijper: + address: 140.211.166.194 + parents: gw-osuosl + hostgroups: computers, stretch, service, manyprocesses + loghost-osuosl-01: + address: 140.211.166.202 + parents: pijper + hostgroups: computers, service, kvmdomains, stretch, hassrvfs, systemd-timesyncd + pieta: address: 140.211.166.195 parents: gw-osuosl hostgroups: computers, stretch, service, manyprocesses ppc64el-osuosl-01: address: 140.211.166.196 - parents: pieta - hostgroups: computers, hassrvfs, buildd, stretch - powerpc-osuosl-01: - address: 140.211.166.197 - parents: pieta - hostgroups: computers, hassrvfs, buildd, jessie - partch: - address: 140.211.15.152 - parents: gw-osuosl - hostgroups: computers, jessie, hassrvfs, porterbox, sw-raid + parents: pijper + hostgroups: computers, hassrvfs, buildd, buster # }}} # {{{ gw-sanger sallinen: @@ -853,12 +853,7 @@ servers: sibelius: address: 193.62.202.28 parents: gw-sanger - hostgroups: computers, postgres94-hosts, service, apache2-hosts, sw-raid, jessie, rsyncd-hosts, hasvarlogfs, multipath-hosts, nfs-server, varnish-hosts - contacts: tjrc1, dave - smetana: - address: 193.62.202.29 - parents: gw-sanger - hostgroups: computers, sw-raid, sparc, wheezy, no-bacula + hostgroups: computers, service, apache2-hosts, sw-raid, jessie, rsyncd-hosts, hasvarlogfs, multipath-hosts, nfs-server, varnish-hosts contacts: tjrc1, dave # }}} # {{{ gw-scanplus @@ -879,7 +874,7 @@ servers: mips-sil-01: address: 86.59.118.146 parents: gw-sil - hostgroups: computers, buildd, stretch, hassrvfs + hostgroups: computers, buildd, buster, hassrvfs mipsel-sil-01: address: 86.59.118.147 parents: gw-sil @@ -994,18 +989,22 @@ servers: address: 209.87.16.44 parents: ubc-gateway hostgroups: computers, service, kvmdomains, stretch, hassrvfs, apache2-hosts, apache-https, systemd-timesyncd, postfix-hosts, postgres96-hosts, crazymanyprocesses + godard-pages: + address: 209.87.16.45 + parents: godard + hostgroups: notacomputer debussy: address: 209.87.16.46 parents: ubc-gateway hostgroups: computers, service, kvmdomains, stretch, systemd-timesyncd, apache2-hosts, apache-https, broken_https_default_vhost - kantuser: - address: 209.87.16.47 - parents: ubc-gateway - hostgroups: computers, service, kvmdomains, stretch, systemd-timesyncd, apache2-hosts grabbe: address: 209.87.16.48 parents: ubc-gateway hostgroups: computers, service, kvmdomains, stretch, systemd-timesyncd, apache2-hosts, apache-https + trabaci: + address: 209.87.16.49 + parents: ubc-gateway + hostgroups: computers, service, kvmdomains, stretch, hassrvfs, systemd-timesyncd # }}} # {{{ gw-umn #saens: @@ -1031,19 +1030,15 @@ servers: # }}} # {{{ gw-unicamp prokofiev: - address: 177.220.10.140 + address: 143.106.167.124 parents: gw-unicamp hostgroups: computers, stretch, service, manyprocesses - powerpc-unicamp-01: - address: 177.220.10.141 - parents: prokofiev - hostgroups: computers, hassrvfs, buildd, jessie ppc64el-unicamp-01: - address: 177.220.10.142 + address: 143.106.167.121 parents: prokofiev hostgroups: computers, hassrvfs, buildd, stretch plummer: - address: 177.220.10.143 + address: 143.106.167.122 parents: prokofiev hostgroups: computers, porterbox, hassrvfs, stretch # }}} @@ -1064,6 +1059,10 @@ servers: address: 130.89.148.14 parents: klecker hostgroups: secondary-IPs + smit: + address: 130.89.148.78 + parents: gw-utwente + hostgroups: computers, service, stretch, r540, manyprocesses, incomingmailrelayed2025 # }}} # {{{ gw-ynic henze: @@ -1109,9 +1108,6 @@ hostgroups: armhf: alias: armhf private: 1 - sparc: - alias: sparc - private: 1 porterbox: alias: developer accessible porter machines @@ -1154,13 +1150,16 @@ hostgroups: pe1950: alias: Dell PowerEdge 1950 hosts private: 1 + r540: + alias: Dell PowerEdge R540 hosts + private: 1 - wheezy: - alias: Hosts running wheezy jessie: alias: Hosts running jessie stretch: alias: Hosts running stretch + buster: + alias: Hosts running buster kvmdomains: alias: Hosts that are KVM domains @@ -1200,12 +1199,6 @@ hostgroups: xinetd-hosts: alias: hosts providing services via xinetd private: 1 - postgres91-hosts: - alias: hosts running postgres91 - private: 1 - postgres94-hosts: - alias: hosts running postgres94 - private: 1 postgres96-hosts: alias: hosts running postgres96 private: 1 @@ -1297,9 +1290,6 @@ hostgroups: high-RTT: alias: machines with high round trip times private: 1 - alioth: - alias: machines that just are just awkward - private: 1 #openstack-compute: # alias: nodes that run OpenStack compute # private: 1 @@ -1371,7 +1361,6 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-ipv6-default-gw" hostgroups: computers check_interval: 60 - excludehostgroups: alioth # }}} # {{{ ### disk usage - @@ -1501,6 +1490,38 @@ services: servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /storage/snapshot-farm-10" hosts: lw10 + + - + name: disk usage on nfs farm 1 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /auto.dsa/snapshot-1" + hosts: lw07 + - + name: disk usage on nfs farm 2 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /auto.dsa/snapshot-2" + hosts: lw07 + - + name: disk usage on nfs farm 3 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /auto.dsa/snapshot-3" + hosts: lw07 + - + name: disk usage on nfs farm 4 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /auto.dsa/snapshot-4" + hosts: lw07 + - + name: disk usage on nfs farm 09 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /auto.dsa/snapshot-09" + hosts: lw07 + - + name: disk usage on nfs farm 10 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /auto.dsa/snapshot-10" + hosts: lw07 + - name: disk usage on /srv/morgue.debian.org/ servicegroups: diskspace @@ -1524,7 +1545,6 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-config" hostgroups: computers check_interval: 60 - excludehostgroups: alioth - name: setup - local hostname etc-hosts nrpe: 'if getent ahosts `hostname` | grep -q 127.0; then echo "Warning: local hostname resolves to 127/8 address"; exit 1; else echo "OK: Hostname resolves to non-127/8 address."; exit 0; fi' @@ -1571,16 +1591,10 @@ services: name: free memory - percent nrpe: "/usr/lib/nagios/plugins/dsa-check-memory -m pct" hostgroups: computers - - - name: process - getty - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C getty -a /sbin/getty" - hostgroups: computers - excludehosts: zelenka, zandonai - excludehostgroups: jessie, stretch - name: process - getty nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C agetty -a /sbin/agetty" - hostgroups: jessie, stretch + hostgroups: computers - name: processes - zombies @@ -1637,14 +1651,13 @@ services: - name: process - bacula-dir servicegroups: backup - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u bacula -C bacula-dir -a '/usr/sbin/bacula-dir -f -c /etc/bacula/bacula-dir.conf'" + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u bacula -C bacula-dir -a '/usr/sbin/bacula-dir -fP -c /etc/bacula/bacula-dir.conf'" hosts: dinis - name: process - bacula-fd servicegroups: backup nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u bacula -C bacula-fd -a '/usr/sbin/bacula-fd -c /etc/bacula/bacula-fd.conf'" hostgroups: computers - excludehostgroups: alioth - name: network backup status - draghi @@ -1694,19 +1707,10 @@ services: name: puppetized firewall nrpe: "/usr/lib/nagios/plugins/dsa-check-file -w -f /etc/ferm/conf.d/defs.conf" hostgroups: computers - - - name: process - ulogd - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C ulogd -a '/usr/sbin/ulogd -d'" - hostgroups: computers - excludehostgroups: sparc, jessie, stretch - name: process - ulogd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u ulog -C ulogd -a '/usr/sbin/ulogd --daemon --uid ulog'" - hostgroups: jessie, stretch - - - name: unexpected process - ulogd - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C ulogd" - hostgroups: sparc + hostgroups: computers #### - name: process - samhain @@ -1730,34 +1734,26 @@ services: excludehostgroups: brokensamhain # }}} # {{{ logging - - - name: process - syslog-ng - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C syslog-ng -a '/sbin/syslog-ng -p /var/run/syslog-ng.pid'" - hostgroups: computers - excludehostgroups: jessie, stretch - name: process - syslog-ng nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C syslog-ng -a '/sbin/syslog-ng -F'" - hostgroups: jessie, stretch + hostgroups: computers - name: remote logging on lotti remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$" runfrom: lotti hostgroups: computers - excludehostgroups: alioth - - name: remote logging on lully + name: remote logging on loghost-grnet-01 remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$" - runfrom: lully + runfrom: loghost-grnet-01 hostgroups: computers - excludehostgroups: alioth - - name: remote logging on loghost-grnet-01 + name: remote logging on loghost-osuosl-01 remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$" - runfrom: loghost-grnet-01 + runfrom: loghost-osuosl-01 hostgroups: computers - excludehostgroups: alioth # }}} # {{{ base service - @@ -1782,13 +1778,13 @@ services: name: process - nrpe nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:25 -c 1: -u nagios -C nrpe -a '/usr/sbin/nrpe -c /etc/nagios/nrpe.cfg -d'" hostgroups: computers - excludehostgroups: stretch + excludehostgroups: stretch, buster max_check_attempts: -1 depends: network service - nrpe - name: process - nrpe nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:25 -c 1: -u nagios -C nrpe -a '/usr/sbin/nrpe -c /etc/nagios/nrpe.cfg -f'" - hostgroups: stretch + hostgroups: stretch, buster max_check_attempts: -1 depends: network service - nrpe ### @@ -1819,7 +1815,7 @@ services: name: system time synced nrpe: "/usr/lib/nagios/plugins/dsa-check-timedatectl -s" hostgroups: computers - excludehostgroups: systemd-timesyncd, wheezy + excludehostgroups: systemd-timesyncd servicegroups: time - name: system time synced @@ -1836,7 +1832,7 @@ services: name: process - irqbalance nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C irqbalance -a '/usr/sbin/irqbalance'" hostgroups: computers - excludehosts: harris, smetana + excludehosts: harris ### - name: process - cron @@ -1847,13 +1843,6 @@ services: name: process - ud-replicated nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C ud-replicated -a '/usr/bin/python /usr/bin/ud-replicated'" hostgroups: computers - excludehostgroups: alioth - ### - - - name: process - monit - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C monit -a '/usr/bin/monit -d 300 -I -c /etc/monit/monitrc -s /var/lib/monit/monit.state'" - hostgroups: computers - excludehostgroups: alioth, jessie, stretch ### - name: MQ connection on rainier @@ -1863,7 +1852,7 @@ services: hostgroups: computers check_interval: 60 retry_interval: 15 - excludehostgroups: alioth, broken_mq + excludehostgroups: broken_mq - name: MQ connection on rapoport servicegroups: MQ @@ -1872,7 +1861,7 @@ services: hostgroups: computers check_interval: 60 retry_interval: 15 - excludehostgroups: alioth, broken_mq + excludehostgroups: broken_mq ### - name: local resolver @@ -1883,28 +1872,21 @@ services: name: process - unbound nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u unbound -C unbound -a '/usr/sbin/unbound'" hostgroups: computers - excludehostgroups: alioth - ### - - name: process - uptimed - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u daemon -C uptimed -a '/usr/sbin/uptimed'" + name: unbound trust anchors + nrpe: "/usr/lib/nagios/plugins/dsa-check-unbound-anchors" hostgroups: computers + check_interval: 60 ### - - name: process - udevd - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -p 1 -C udevd -a 'udevd'" + name: process - uptimed + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u daemon -C uptimed -a '/usr/sbin/uptimed'" hostgroups: computers - excludehostgroups: jessie, stretch - name: process - udevd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -p 1 -C systemd-udevd -a '/lib/systemd/systemd-udevd'" - hostgroups: jessie, stretch + hostgroups: computers ### - - - name: process - acpid - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C acpid -a '/usr/sbin/acpid'" - hostgroups: acpid-hosts - excludehostgroups: jessie, stretch - name: unexpected process - acpid nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C acpid" @@ -1924,13 +1906,11 @@ services: - name: process - stunnel4 - puppet-ekeyd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -u stunnel4 -C stunnel4 -a '/usr/bin/stunnel4 /etc/stunnel/puppet-ekeyd.conf'" - hostgroups: wheezy, jessie, stretch - excludehostgroups: alioth + hostgroups: computers - name: process - stunnel4 - puppet-ekeyd is crazy nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-stunnel-sanity" hostgroups: computers - excludehostgroups: alioth excludehosts: czerny, grnet-node01, storace # }}} # {{{ anti-services @@ -1955,7 +1935,7 @@ services: - name: unwanted process - rpc.statd nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C rpc.statd" - hostgroups: stretch + hostgroups: stretch, buster excludehosts: storace - name: unwanted process - inetd @@ -1980,6 +1960,10 @@ services: name: "host SSL cert - debian client" nrpe: "if [ -e /etc/ssl/debian/certs/thishost.crt ]; then /usr/lib/nagios/plugins/dsa-check-cert-expire /etc/ssl/debian/certs/thishost.crt; else echo 'No thishost.crt on this host.'; fi" hostgroups: computers + - + name: "host SSL cert - CA" + nrpe: "sudo -u puppet /usr/lib/nagios/plugins/dsa-check-cert-expire /srv/puppet.debian.org/ca/ca.crt" + hosts: handel - name: "sso CRL" nrpe: "if [ -e /var/lib/dsa/sso/ca.crl ]; then /usr/lib/nagios/plugins/dsa-check-crl-expire -w 129600 -c 86400 /var/lib/dsa/sso/ca.crl; else echo 'No sso/ca.crl on this host.'; fi" @@ -1991,18 +1975,11 @@ services: runfrom: handel # }}} # {{{ HW health/raid - - - name: process - mdadm monitor - servicegroups: raid - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C mdadm -a '/sbin/mdadm --monitor --pid-file /run/mdadm/monitor.pid --daemonise --scan'" - hostgroups: sw-raid - excludehostgroups: jessie, stretch - name: process - mdadm monitor servicegroups: raid nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C mdadm -a '/sbin/mdadm --monitor --scan'" hostgroups: sw-raid - excludehostgroups: wheezy - name: RAID - sw raid servicegroups: raid @@ -2080,7 +2057,12 @@ services: - name: HW - OpenManage status nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-openmanage" - hostgroups: pe1950 + hostgroups: pe1950, r540 + excludehosts: wieck, schumann + - + name: HW - OpenManage status + nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-openmanage -b bp=0 -b bat_charge=0:0" + hosts: wieck, schumann # }}} # }}} # {{{ ### mail stuff @@ -2109,6 +2091,15 @@ services: name: mail queue nrpe: "/usr/lib/nagios/plugins/check_mailq -M exim -w 1000 -c 2000" hostgroups: heavy-exim + - + name: process - fail2ban + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -C fail2ban-server" + hostgroups: heavy-exim, heavy-postfix + - + name: unwanted process - fail2ban + nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C fail2ban-server" + hostgroups: computers + excludehostgroups: heavy-exim, heavy-postfix # }}} # {{{ clamav - @@ -2120,16 +2111,10 @@ services: nrpe: "/usr/lib/nagios/plugins/check_clamd -H /var/run/clamav/clamd.ctl" hostgroups: heavy-exim, heavy-postfix depends: process - clamav - clamd - - - name: process - clamav - freshclam - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u clamav -C freshclam -a '/usr/bin/freshclam -d --quiet'" - hostgroups: heavy-exim, heavy-postfix - excludehostgroups: jessie, stretch - name: process - clamav - freshclam nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u clamav -C freshclam -a '/usr/bin/freshclam -d --foreground=true'" hostgroups: heavy-exim, heavy-postfix - excludehostgroups: wheezy - name: unwanted process - clamav nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C clamd" @@ -2142,18 +2127,11 @@ services: excludehostgroups: heavy-exim, heavy-postfix # }}} # {{{ anti-spam - - - name: process - spamd - master - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd --create-prefs --max-children 5 --helper-home-dir -d --pidfile=/var/run/spamd.pid'" - hostgroups: spamd - excludehosts: picconi - excludehostgroups: jessie, stretch - name: process - spamd - master nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd -d --pidfile=/var/run/spamd.pid --create-prefs --max-children 5 --helper-home-dir'" hostgroups: spamd excludehosts: picconi - excludehostgroups: wheezy - name: process - spamd - master nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd -d --pidfile=/var/run/spamd.pid --create-prefs --max-children 20 --min-spare=5 --helper-home-dir'" @@ -2182,16 +2160,10 @@ services: hostgroups: computers ### - - - name: process - postgrey - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgrey -a '/usr/sbin/postgrey --pidfile=/var/run/postgrey.pid --daemonize --unix=/var/run/postgrey/socket --retry-window=4 --auto-whitelist-clients=10 --exim'" - hostgroups: heavy-exim - excludehostgroups: jessie, stretch - name: process - postgrey nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgrey -a 'postgrey --pidfile=/var/run/postgrey.pid --daemonize --unix=/var/run/postgrey/socket --retry-window=4 --auto-whitelist-clients=10 --exim'" hostgroups: heavy-exim - excludehostgroups: wheezy - name: process - postgrey nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgrey -a 'postgrey --pidfile=/var/run/postgrey.pid --daemonize --inet=127.0.0.1:60000'" @@ -2222,23 +2194,23 @@ services: - name: process - weightd - master nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (master)'" - hostgroups: heavy-postfix, alioth + hostgroups: heavy-postfix - name: process - weightd - cache nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (cache)'" - hostgroups: heavy-postfix, alioth + hostgroups: heavy-postfix depends: process - weightd - master - name: process - weightd - child nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:50 -c 1: -u polw -a 'policyd-weight (child)'" - hostgroups: heavy-postfix, alioth + hostgroups: heavy-postfix depends: process - weightd - master ### - name: unwanted process - policyd-weight nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C policyd-weight" hostgroups: computers - excludehostgroups: heavy-postfix, alioth + excludehostgroups: heavy-postfix # }}} # {{{ postfix ### @@ -2428,6 +2400,7 @@ services: name: process - varnish nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:2 -c 1:15 -u vcache -a '/usr/sbin/varnishd -j unix,user=vcache -F -a '" hostgroups: varnish-hosts + excludehostgroups: jessie - name: unwanted process - varnish nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C varnishd" @@ -2447,19 +2420,11 @@ services: name: unwanted process - postgresql nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C postgres" hostgroups: computers - excludehostgroups: postgres91-hosts, postgres94-hosts, postgres96-hosts + excludehostgroups: postgres96-hosts - name: unwanted process - postgresql 9.0 nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C postgres -a '9.0/bin/postgres'" hostgroups: computers - - - name: process - postgresql91 - master - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.1/bin/postgres'" - hostgroups: postgres91-hosts - - - name: process - postgresql94 - master - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.4/bin/postgres'" - hostgroups: postgres94-hosts - name: process - postgresql96 - master nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.6/bin/postgres'" @@ -2496,7 +2461,7 @@ services: name: process - statd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u statd -C rpc.statd -a '/sbin/rpc.statd'" hostgroups: nfs-client, nfs-server - excludehostgroups: stretch + excludehostgroups: stretch, buster - name: process - nfsd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u root -C nfsd -a '[nfsd]'" @@ -2742,6 +2707,16 @@ services: check: "dsa_check_staticsync!miniconf10.debconf.org" hosts: global servicegroups: mirror + - + name: mirror static sync - wiki + check: "dsa_check_staticsync!wiki.debconf.org" + hosts: global + servicegroups: mirror + - + name: mirror static sync - www + check: "dsa_check_staticsync!www.debconf.org" + hosts: global + servicegroups: mirror # }}} # {{{ DNS - @@ -2882,7 +2857,7 @@ services: - name: system - all services running nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-systemd-services" - hostgroups: jessie, stretch + hostgroups: computers ### - name: process - slapd @@ -2949,9 +2924,39 @@ services: name: puppet - agent check nrpe: "/usr/lib/nagios/plugins/dsa-check-statusfile /var/cache/dsa/nagios/puppet-agent" hostgroups: computers - excludehosts: moszumanska check_interval: 60 retry_interval: 15 + #### + - + name: ping peer on mgmt network + nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.184.12 -w 50,10% -c 200,30%" + hosts: conova-node01 + check_interval: 5 + max_check_attempts: 4 + retry_interval: 1 + - + name: ping peer on mgmt network + nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.184.11 -w 50,10% -c 200,30%" + hosts: conova-node02 + check_interval: 5 + max_check_attempts: 4 + retry_interval: 1 + + - + name: ping peer on mgmt network + nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.182.14 -w 50,10% -c 200,30%" + hosts: manda-node03 + check_interval: 5 + max_check_attempts: 4 + retry_interval: 1 + - + name: ping peer on mgmt network + nrpe: "/usr/lib/nagios/plugins/check_ping -H 172.29.182.13 -w 50,10% -c 200,30%" + hosts: manda-node04 + check_interval: 5 + max_check_attempts: 4 + retry_interval: 1 + # }}} # }}} # }}}