X-Git-Url: https://git.adam-barratt.org.uk/?a=blobdiff_plain;f=config%2Fnagios-master.cfg;h=ae2b7769fb2c610d4114133cdedda37ef1034c97;hb=8f1abbea638f862c05f8da453a22b3b98bd37d81;hp=6604987b82d7bc241716989abbe893f3cc5623e7;hpb=a6e2fbb9aee594b38998038896cdeaa404dbe827;p=mirror%2Fdsa-nagios.git diff --git a/config/nagios-master.cfg b/config/nagios-master.cfg index 6604987..ae2b776 100644 --- a/config/nagios-master.cfg +++ b/config/nagios-master.cfg @@ -159,7 +159,7 @@ servers: pkgmirror-1and1: address: 213.165.95.4 parents: powell - hostgroups: computers, service, kvmdomains, wheezy, apache2-hosts, no-bacula + hostgroups: computers, service, kvmdomains, wheezy, apache2-hosts, no-bacula, apache-https babin: address: 213.165.95.6 parents: powell @@ -290,7 +290,7 @@ servers: picconi: address: 5.153.231.3 parents: gw-bytemark - hostgroups: computers, service, kvmdomains, wheezy, apache2-hosts, nfs-client, autofs, heavy-exim, spamd + hostgroups: computers, service, kvmdomains, wheezy, apache2-hosts, nfs-client, autofs, heavy-exim, spamd, apache-https senfter: address: 5.153.231.4 parents: gw-bytemark @@ -368,7 +368,7 @@ servers: dillon: address: 5.153.231.22 parents: ganeti-bytemark - hostgroups: computers, general, kvmdomains, wheezy, nfs-client, autofs + hostgroups: computers, general, kvmdomains, wheezy, nfs-client, autofs, hassrvfs ticharich: address: 5.153.231.23 parents: ganeti-bytemark @@ -428,18 +428,18 @@ servers: hostgroups: computers, sw-raid, hassrvfs, wheezy # }}} # {{{ gw-ftcollins - alkman: - address: 192.25.206.63 - parents: gw-ftcollins - hostgroups: computers, buildd, acpid-hosts, wheezy - merulo: - address: 192.25.206.58 - parents: gw-ftcollins - hostgroups: computers, porterbox, hasusrfs, wheezy - mundy: - address: 192.25.206.62 - parents: gw-ftcollins - hostgroups: computers, buildd, hassrvfs, sw-raid, acpid-hosts, wheezy + #alkman: + # address: 192.25.206.63 + # parents: gw-ftcollins + # hostgroups: computers, buildd, acpid-hosts, wheezy + #merulo: + # address: 192.25.206.58 + # parents: gw-ftcollins + # hostgroups: computers, porterbox, hasusrfs, wheezy + #mundy: + # address: 192.25.206.62 + # parents: gw-ftcollins + # hostgroups: computers, buildd, hassrvfs, sw-raid, acpid-hosts, wheezy spohr: address: 192.25.206.33 parents: gw-ftcollins @@ -665,6 +665,10 @@ servers: address: 86.59.118.152 parents: gw-sil hostgroups: computers, buildd, wheezy + eberlin: + address: 86.59.118.155 + parents: gw-sil + hostgroups: computers, buildd, wheezy # }}} # {{{ gw-ubcece sw-ubcece: @@ -741,10 +745,6 @@ servers: address: 206.12.19.13 parents: sw-ubcece-kais hostgroups: computers, hashomefs, sw-raid, rsyncd-hosts, apache2-hosts, xinetd-hosts, service, nfs-server, squeeze, hassrvfs - paganini: - address: 206.12.19.10 - parents: sw-ubcece-kais - hostgroups: computers, hasbootfs, aacraid, hassrvfs, nfs-client, service, squeeze, autofs respighi: address: 206.12.19.11 parents: sw-ubcece-kais @@ -814,7 +814,7 @@ servers: nono: address: 206.12.19.123 parents: traetta - hostgroups: computers, service, kvmdomains, wheezy, heavy-exim, xinetd-hosts, apache2-hosts, apache-https + hostgroups: computers, service, kvmdomains, wheezy, heavy-exim, xinetd-hosts, apache2-hosts, apache-https, broken_https_default_vhost reger: address: 206.12.19.124 parents: ganeti2 @@ -854,7 +854,7 @@ servers: diabelli: address: 206.12.19.136 parents: traetta - hostgroups: computers, service, hasbootfs, kvmdomains, wheezy, apache2-hosts, apache-https + hostgroups: computers, service, hasbootfs, kvmdomains, wheezy, apache2-hosts, apache-https, broken_https_default_vhost bizet: address: 206.12.19.137 parents: ganeti2 @@ -883,10 +883,6 @@ servers: address: 206.12.19.143 parents: ganeti2 hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, apache2-hosts, apache-https - stanley: - address: 206.12.19.145 - parents: ganeti2 - hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, apache2-hosts, no-bacula muffat: address: 206.12.19.146 parents: ganeti2 @@ -1121,6 +1117,9 @@ hostgroups: apache-https: alias: hosts with https services private: 1 + broken_https_default_vhost: + alias: https default vhost does not say 200 OK + private: 1 no-bacula: alias: hosts which are not being backed up with bacula @@ -1178,10 +1177,6 @@ hostgroups: # i.e. no port 25 private: 1 - ntpsuckers: - alias: "hosts who's ntp offset is often unknown" - private: 1 - brokensamhain: alias: machines that can not run samhain private: 1 @@ -1248,6 +1243,7 @@ services: ############ Disk Usage ############ #### + - name: disk usage - all servicegroups: diskspace @@ -1347,9 +1343,9 @@ services: nrpe: "/usr/lib/nagios/plugins/check_disk 97 95 /srv/farm-snapshot/farm-misc" hosts: sibelius - - name: disk usage on /var/lib/postgresql/9.1/dak + name: disk usage on /var/lib/postgresql/9.1 servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 75 85 /var/lib/postgresql/9.1/dak" + nrpe: "/usr/lib/nagios/plugins/check_disk 75 85 /var/lib/postgresql/9.1" hosts: franck - name: disk usage on /srv/ftp-master.debian.org @@ -1539,6 +1535,12 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-uptime" hostgroups: computers #### + - + name: processes - samhain zombies + nrpe: "/usr/lib/nagios/plugins/check_procs 3 6 -s Z -u root -a samhain" + event_handler: dsa_event_handler_restart_samhain + hostgroups: computers + excludehostgroups: brokensamhain - name: processes - zombies nrpe: "/usr/lib/nagios/plugins/check_procs 5 10 -s Z" @@ -1621,7 +1623,7 @@ services: hostgroups: computers depends: process - ntpd excludehosts: ancina - excludehostgroups: ntpsuckers, deadslow + excludehostgroups: deadslow servicegroups: time # - @@ -1664,6 +1666,20 @@ services: remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$" runfrom: lully hostgroups: computers + - + name: MQ connection on rainier + remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa" + runfrom: rainier + hostgroups: computers + normal_check_interval: 60 + retry_check_interval: 15 + - + name: MQ connection on rapoport + remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa" + runfrom: rapoport + hostgroups: computers + normal_check_interval: 60 + retry_check_interval: 15 ### MAIL STUFF ### - @@ -1911,10 +1927,6 @@ services: nrpe: 'if getent ahosts `hostname` | grep -q 127.0; then echo "Warning: local hostname resolves to 127/8 address"; exit 1; else echo "OK: Hostname resolves to non-127/8 address."; exit 0; fi' hostgroups: computers normal_check_interval: 60 - - - name: setup - ud-ldap freshness - nrpe: "/usr/lib/nagios/plugins/dsa-check-udldap-freshness" - hostgroups: computers - name: system - available entropy nrpe: "/usr/lib/nagios/plugins/dsa-check-entropy" @@ -1986,10 +1998,6 @@ services: name: "host SSL cert" nrpe: "if [ -e /etc/ssl/certs/thishost.pem ]; then /usr/lib/nagios/plugins/dsa-check-cert-expire /etc/ssl/certs/thishost.pem; else echo 'No thishost.pem on this host.'; fi" hostgroups: computers - - - name: "pg SSL cert" - nrpe: "/usr/lib/nagios/plugins/dsa-check-cert-expire /etc/ssl/certs/pg-ubcece.debian.org-chained.pem" - hosts: danzi ############ Processes/Services that only run on some computers ############ #### @@ -2023,7 +2031,11 @@ services: name: process - ud-replicated nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C ud-replicated -a '/usr/bin/python /usr/bin/ud-replicated'" hostgroups: computers - excludehostgroups: squeeze + excludehostgroups: squeeze,freebsd + - + name: process - ud-replicated + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C python2.7 -a '/usr/bin/python /usr/bin/ud-replicated'" + hostgroups: freebsd - name: process - monit nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C monit -a '/usr/sbin/monit -d 300 -I -c /etc/monit/monitrc -s /var/lib/monit/monit.state'" @@ -2264,6 +2276,7 @@ services: check: check_https hostgroups: apache-https excludehosts: handel,menotti + excludehostgroups: broken_https_default_vhost depends: "process - apache2 - master" normal_check_interval: 120 - @@ -2272,6 +2285,12 @@ services: hosts: handel,menotti depends: "process - apache2 - master" normal_check_interval: 120 + - + name: network service - https + check: dsa_check_https_any_status + hostgroups: broken_https_default_vhost + depends: "process - apache2 - master" + normal_check_interval: 120 - name: network service - https cert check: dsa_check_cert!443 @@ -2375,7 +2394,7 @@ services: hostgroups: computers - name: process - postgresql91 - master - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:4 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.1/bin/postgres'" + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.1/bin/postgres'" hostgroups: postgres91-hosts - name: postgresql backups @@ -2488,6 +2507,13 @@ services: ############ MISC OTHER Stuff ############ ##### + - + name: puppetmaster cert + nrpe: "/usr/lib/nagios/plugins/dsa-check-cert-expire /var/lib/puppet/ssl/certs/ca.pem" + hosts: handel + normal_check_interval: 60 + max_check_attempts: 2 + retry_check_interval: 5 - name: mirror sync - bugs check: "dsa_check_mirrorsync_skew!bugs.debian.org!project/trace/bugs-master.debian.org!120:600" @@ -2574,5 +2600,12 @@ services: remotecheck: "/usr/lib/nagios/plugins/dsa-check-msa-eventlog --start=7778 $HOSTADDRESS$ public" runfrom: dijkstra hosts: giustini + ############ + - + name: current chroots + nrpe: "/usr/lib/nagios/plugins/dsa-check-dchroots-current" + hostgroups: porterbox + normal_check_interval: 60 + retry_check_interval: 15 # vim: set ts=2 sw=2 et ai si fdm=marker: