X-Git-Url: https://git.adam-barratt.org.uk/?a=blobdiff_plain;f=config%2Fnagios-master.cfg;h=2ab5be1639f8a0a5f1207fefd063b87bdd2e4fae;hb=f913890f6683f0806c85dcdd989e79111e64cdf3;hp=74a098754cbb1b5d30521d76386e436e65393edb;hpb=763d7b1509ab7c4c30ab7972971e00c4814d5c6c;p=mirror%2Fdsa-nagios.git diff --git a/config/nagios-master.cfg b/config/nagios-master.cfg index 74a0987..2ab5be1 100644 --- a/config/nagios-master.cfg +++ b/config/nagios-master.cfg @@ -85,10 +85,6 @@ servers: address: 72.52.94.70 parents: gw-ubcece hostgroups: layer3-infrastructure - gw-karlsruhe: - address: 129.143.59.214 - parents: gw-ubcece - hostgroups: layer3-infrastructure gw-leaseweb: address: 185.17.185.190 parents: gw-ubcece @@ -170,17 +166,19 @@ servers: schumann: address: 212.227.126.54 parents: gw-1und1-sec - hostgroups: computers, service, apache2-hosts, rsyncd-hosts, stretch, security_mirror, hassrvfs + hostgroups: computers, service, apache2-hosts, rsyncd-hosts, stretch, security_mirror, hassrvfs, pe1950 wieck: address: 195.20.242.89 parents: gw-1und1-sec - hostgroups: computers, service, apache2-hosts, rsyncd-hosts, stretch, security_mirror, hasvarlogfs, no-bacula + hostgroups: computers, service, apache2-hosts, rsyncd-hosts, stretch, security_mirror, hasvarlogfs, no-bacula, pe1950 # }}} # {{{ gw-accumu pettersson: address: 130.239.18.123 parents: gw-accumu hostgroups: computers, hasbootfs, aacraid, nfs-client, service, apache2-hosts, stretch, autofs, sw-raid + contacts: zobel, tfheen, lfilipoz, zumbi, jcristau, pabs, aurel32, dsa-nsa + contact_groups: "" mirror-accumu: address: 130.242.6.199 parents: gw-accumu2 @@ -445,7 +443,7 @@ servers: x86-bm-01: address: 5.153.231.32 parents: ganeti-bytemark - hostgroups: computers, kvmdomains, stretch, no-bacula, systemd-timesyncd + hostgroups: computers, pybuildd, hassrvfs, kvmdomains, stretch, systemd-timesyncd tate: address: 5.153.231.33 parents: ganeti-bytemark @@ -458,18 +456,6 @@ servers: address: 5.153.231.36 parents: ganeti-bytemark hostgroups: computers, service, kvmdomains, stretch, autofs, nfs-client, systemd-timesyncd - mirror-bytemark: - address: 5.153.231.37 - parents: ganeti-bytemark - hostgroups: computers, service, kvmdomains, stretch, systemd-timesyncd, apache2-hosts, hassrvfs - mirror-bytemark-debian: - address: 5.153.231.45 - hostgroups: secondary-IPs - parents: mirror-bytemark - mirror-bytemark-security: - address: 5.153.231.46 - hostgroups: secondary-IPs - parents: mirror-bytemark sor: address: 5.153.231.38 parents: ganeti-bytemark @@ -499,6 +485,8 @@ servers: address: 5.153.231.41 parents: gw-bytemark hostgroups: computers, service, stretch, hassrvfs, dl380, manyprocesses, apache2-hosts + contacts: zobel, tfheen, lfilipoz, zumbi, jcristau, pabs, aurel32, dsa-nsa + contact_groups: "" # }}} # {{{ gw-c3sl santoro: @@ -583,7 +571,7 @@ servers: hostgroups: computers, service, dl360, stretch, drbd-hosts ganeti-csail: address: 128.31.0.49 - parents: gw-bytemark + parents: gw-csail hostgroups: notacomputer mirror-csail: @@ -651,7 +639,7 @@ servers: x86-grnet-01: address: 194.177.211.203 parents: ganeti-grnet - hostgroups: computers, buildd, hassrvfs, kvmdomains, stretch, systemd-timesyncd + hostgroups: computers, pybuildd, hassrvfs, kvmdomains, stretch, systemd-timesyncd vittoria: address: 194.177.211.205 parents: ganeti-grnet @@ -695,19 +683,19 @@ servers: lw01: address: 185.17.185.177 parents: gw-leaseweb - hostgroups: computers, service, jessie, dl180, nfs-server, rsyncd-hosts + hostgroups: computers, service, stretch, dl180, nfs-server, rsyncd-hosts lw02: address: 185.17.185.178 parents: gw-leaseweb - hostgroups: computers, service, jessie, dl180, nfs-server, rsyncd-hosts + hostgroups: computers, service, stretch, dl180, nfs-server, rsyncd-hosts lw03: address: 185.17.185.179 parents: gw-leaseweb - hostgroups: computers, service, jessie, dl180, nfs-server, rsyncd-hosts + hostgroups: computers, service, stretch, dl180, nfs-server, rsyncd-hosts lw04: address: 185.17.185.180 parents: gw-leaseweb - hostgroups: computers, service, jessie, dl180, nfs-server, rsyncd-hosts + hostgroups: computers, service, stretch, dl180, nfs-server, rsyncd-hosts lw07: address: 185.17.185.187 parents: gw-leaseweb @@ -725,13 +713,6 @@ servers: parents: gw-leaseweb hostgroups: computers, service, stretch, dl180 # }}} - # {{{ gw-karlsruhe - zemlinsky: - address: 129.143.160.6 - parents: gw-karlsruhe - hostgroups: computers, buildd, stretch - contacts: pkern - # }}} # {{{ gw-manda czerny: address: 82.195.75.109 @@ -826,7 +807,7 @@ servers: zani: address: 148.100.88.22 parents: gw-marist - hostgroups: computers, buildd, hassrvfs, stretch, incomingmailrelayed + hostgroups: computers, pybuildd, hassrvfs, stretch, incomingmailrelayed # }}} # {{{ gw-osuosl byrd: @@ -859,7 +840,7 @@ servers: sallinen: address: 193.62.202.26 parents: gw-sanger - hostgroups: computers, service, stretch, dl380, nfs-client, autofs, postgres96-hosts + hostgroups: computers, service, stretch, dl380, nfs-client, autofs, postgres96-hosts, apache2-hosts sibelius: address: 193.62.202.28 parents: gw-sanger @@ -1003,11 +984,19 @@ servers: godard: address: 209.87.16.44 parents: ubc-gateway - hostgroups: computers, service, kvmdomains, stretch, hassrvfs, apache2-hosts, apache-https, systemd-timesyncd, postfix-hosts, postgres96-hosts, manyprocesses + hostgroups: computers, service, kvmdomains, stretch, hassrvfs, apache2-hosts, apache-https, systemd-timesyncd, postfix-hosts, postgres96-hosts, crazymanyprocesses debussy: address: 209.87.16.46 parents: ubc-gateway hostgroups: computers, service, kvmdomains, stretch, systemd-timesyncd, apache2-hosts, apache-https, broken_https_default_vhost + kantuser: + address: 209.87.16.47 + parents: ubc-gateway + hostgroups: computers, service, kvmdomains, stretch, systemd-timesyncd, apache2-hosts + grabbe: + address: 209.87.16.48 + parents: ubc-gateway + hostgroups: computers, service, kvmdomains, stretch, systemd-timesyncd, apache2-hosts, apache-https # }}} # {{{ gw-umn #saens: @@ -1103,13 +1092,8 @@ hostgroups: computers: alias: computers private: 1 - extinfo-icon_image: base/debian.png - extinfo-icon_image_alt: Debian GNU/Linux - extinfo-notes_url: https://db.debian.org/machines.cgi?host=%s layer3-infrastructure: alias: Layer 3 Devices - extinfo-icon_image: base/switch40.png - extinfo-icon_image_alt: router notacomputer: alias: Systems that are not really systems. Yeah :) private: 1 @@ -1126,6 +1110,8 @@ hostgroups: alias: machines running services buildd: alias: buildd systems + pybuildd: + alias: buildd systems running pybuildd general: alias: general purpose developer accessible machines @@ -1156,6 +1142,9 @@ hostgroups: aacraid: alias: Hosts with Adaptec AACraid private: 1 + pe1950: + alias: Dell PowerEdge 1950 hosts + private: 1 wheezy: alias: Hosts running wheezy @@ -1237,6 +1226,8 @@ hostgroups: private: 1 manyprocesses: alias: hosts with lots and lots of (kernel) processes + crazymanyprocesses: + alias: hosts with stupidly lots of processes no-bacula: alias: hosts which are not being backed up with bacula @@ -1357,6 +1348,12 @@ services: check_interval: 5 max_check_attempts: 4 retry_interval: 1 + - + name: network - v6 gw + nrpe: "/usr/lib/nagios/plugins/dsa-check-ipv6-default-gw" + hostgroups: computers + check_interval: 60 + excludehostgroups: alioth # }}} # {{{ ### disk usage - @@ -1414,42 +1411,42 @@ services: - name: disk usage on /srv/farm-snapshot/farm-2017-0 servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 97 95 /srv/farm-snapshot/farm-2017-0" + nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /srv/farm-snapshot/farm-2017-0" hosts: sibelius - name: disk usage on /srv/farm-snapshot/farm-2017-1 servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 97 95 /srv/farm-snapshot/farm-2017-1" + nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /srv/farm-snapshot/farm-2017-1" hosts: sibelius - name: disk usage on /srv/farm-snapshot/farm-2017-2 servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 97 95 /srv/farm-snapshot/farm-2017-2" + nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /srv/farm-snapshot/farm-2017-2" hosts: sibelius - name: disk usage on /srv/farm-snapshot/farm-2017-3 servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 97 95 /srv/farm-snapshot/farm-2017-3" + nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /srv/farm-snapshot/farm-2017-3" hosts: sibelius - name: disk usage on /srv/farm-snapshot/farm-2017-4 servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 97 95 /srv/farm-snapshot/farm-2017-4" + nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /srv/farm-snapshot/farm-2017-4" hosts: sibelius - name: disk usage on /srv/farm-snapshot/farm-2017-5 servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 97 95 /srv/farm-snapshot/farm-2017-5" + nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /srv/farm-snapshot/farm-2017-5" hosts: sibelius - name: disk usage on /srv/farm-snapshot/farm-2017-6 servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 97 95 /srv/farm-snapshot/farm-2017-6" + nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /srv/farm-snapshot/farm-2017-6" hosts: sibelius - name: disk usage on /srv/farm-snapshot/farm-2017-7 servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 97 95 /srv/farm-snapshot/farm-2017-7" + nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /srv/farm-snapshot/farm-2017-7" hosts: sibelius - name: disk usage on /srv/ftp-master.debian.org @@ -1459,37 +1456,47 @@ services: - name: disk usage on /storage/snapshot-farm-1 servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 98 92 /storage/snapshot-farm-1" + nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /storage/snapshot-farm-1" hosts: lw01 - name: disk usage on /storage/snapshot-farm-2 servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 98 92 /storage/snapshot-farm-2" + nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /storage/snapshot-farm-2" hosts: lw02 - name: disk usage on /storage/snapshot-farm-3 servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 98 92 /storage/snapshot-farm-3" + nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /storage/snapshot-farm-3" hosts: lw03 - name: disk usage on /storage/snapshot-farm-4 servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 98 92 /storage/snapshot-farm-4" + nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /storage/snapshot-farm-4" hosts: lw04 + - + name: disk usage on /storage/snapshot-farm-90 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /storage/snapshot-farm-09" + hosts: lw09 + - + name: disk usage on /storage/snapshot-farm-10 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 95 97 /storage/snapshot-farm-10" + hosts: lw10 - name: disk usage on /srv/morgue.debian.org/ servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 95 90 /srv/morgue.debian.org" + nrpe: "/usr/lib/nagios/plugins/check_disk 90 95 /srv/morgue.debian.org" hosts: lw03 - name: disk usage on /srv/QNAP-big/ servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 90 80 /srv/QNAP-big" + nrpe: "/usr/lib/nagios/plugins/check_disk 80 90 /srv/QNAP-big" hosts: storace - name: disk usage on /srv/QNAP-tiny servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 90 80 /srv/QNAP-tiny" + nrpe: "/usr/lib/nagios/plugins/check_disk 90 95 /srv/QNAP-tiny" hosts: storace # }}} # {{{ ### system @@ -1529,11 +1536,15 @@ services: name: processes - total nrpe: "/usr/lib/nagios/plugins/check_procs 620 700" hostgroups: computers - excludehostgroups: manyprocesses + excludehostgroups: manyprocesses, crazymanyprocesses - name: processes - total hostgroups: manyprocesses nrpe: "/usr/lib/nagios/plugins/check_procs 1500 1700" + - + name: processes - total + hostgroups: crazymanyprocesses + nrpe: "/usr/lib/nagios/plugins/check_procs 15000 25000" - name: free memory - mb nrpe: "/usr/lib/nagios/plugins/dsa-check-memory -m mb" @@ -1593,7 +1604,7 @@ services: remotecheck: "/usr/lib/nagios/plugins/dsa-check-bacula $HOSTNAME$.debian.org" runfrom: dinis hostgroups: computers - excludehostgroups: buildd, porterbox, no-bacula + excludehostgroups: buildd, pybuildd, porterbox, no-bacula check_interval: 60 retry_interval: 15 - @@ -1602,7 +1613,7 @@ services: remotecheck: "/usr/lib/nagios/plugins/dsa-check-bacula -w 1080 -c 1560 $HOSTNAME$.debian.org F" runfrom: dinis hostgroups: computers - excludehostgroups: buildd, porterbox, no-bacula + excludehostgroups: buildd, pybuildd, porterbox, no-bacula check_interval: 60 retry_interval: 15 - @@ -1955,11 +1966,6 @@ services: name: "sso CRL" nrpe: "if [ -e /var/lib/dsa/sso/ca.crl ]; then /usr/lib/nagios/plugins/dsa-check-crl-expire -w 129600 -c 86400 /var/lib/dsa/sso/ca.crl; else echo 'No sso/ca.crl on this host.'; fi" hostgroups: computers - - - name: SSL certs - puppet - hosts: global - remotecheck: "/usr/lib/nagios/plugins/dsa-check-cert-expire-dir /etc/puppet/modules/ssl/files/servicecerts" - runfrom: handel - name: SSL certs - LE hosts: global @@ -2053,6 +2059,10 @@ services: servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-drbd -d All" hostgroups: drbd-hosts + - + name: HW - OpenManage status + nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-openmanage" + hostgroups: pe1950 # }}} # }}} # {{{ ### mail stuff @@ -2412,19 +2422,20 @@ services: nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:5 -u buildd -C buildd -a '/usr/bin/buildd'" hostgroups: buildd contact_groups: buildd + - + name: process - buildd + servicegroups: buildd + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:5 -u buildd -C python3 -a 'buildd.py'" + hostgroups: pybuildd + contact_groups: buildd - name: processes - zombie schroot nrpe: "(/usr/lib/nagios/plugins/check_procs -a schroot -s Zs -c 0 > /dev/null || /usr/lib/nagios/plugins/check_procs -a schroot -s Zs -c 0) && /usr/lib/nagios/plugins/check_procs -a schroot -s ZNs -c 0" - hostgroups: buildd + hostgroups: buildd, pybuildd contact_groups: +buildd check_interval: 5 max_check_attempts: 24 retry_interval: 5 - - - name: processes - lvcreate - nrpe: "/usr/lib/nagios/plugins/check_procs -m 'ELAPSED' -c 500 -C lvcreate -u root -a 'lvcreate'" - hostgroups: buildd - contact_groups: +buildd # }}} # {{{ NFS Stuff - @@ -2710,10 +2721,6 @@ services: name: DNS SOA sync - 144-28.118.59.86.in-addr.arpa check: "dsa_check_soas_add!denis.debian.org!144-28.118.59.86.in-addr.arpa" hosts: global - - - name: DNS SOA sync - alioth.debian.org - check: "dsa_check_soas_add!denis.debian.org!alioth.debian.org" - hosts: global - name: DNS SOA sync - debconf.net check: "dsa_check_soas_add!denis.debian.org!debconf.net" @@ -2871,6 +2878,11 @@ services: check: dsa_check_cert!5061 check_interval: 60 hosts: vogler + - + name: freeradius process + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u freerad -C freeradius -a '/usr/sbin/freeradius -xx'" + check_interval: 60 + hosts: vogler #### - name: puppetmaster cert