X-Git-Url: https://git.adam-barratt.org.uk/?a=blobdiff_plain;f=config%2Fnagios-master.cfg;h=099bc2a9ecb7071d42db726dd0a83b42b930db18;hb=efb918076aed02fe22eee2737fa52c67b033a69b;hp=4fe424de8a5e15080512579b43c0dd327ccc1e0b;hpb=8aacbf35628622f1e9bf3bd7b3bf22fb674800e5;p=mirror%2Fdsa-nagios.git diff --git a/config/nagios-master.cfg b/config/nagios-master.cfg index 4fe424d..099bc2a 100644 --- a/config/nagios-master.cfg +++ b/config/nagios-master.cfg @@ -90,6 +90,10 @@ servers: address: 129.143.57.177 parents: gw-ubcece hostgroups: layer3-infrastructure + gw-leaseweb: + address: 185.17.185.190 + parents: gw-ubcece + hostgroups: layer3-infrastructure gw-man-da: address: 82.195.75.126 parents: gw-ubcece @@ -202,10 +206,6 @@ servers: address: 217.140.96.56 parents: gw-arm hostgroups: computers, hasbootfs, hassrvfs, porterbox, wheezy, deadslow - alain: - address: 217.140.96.58 - parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy, deadslow alwyn: address: 217.140.96.59 parents: gw-arm @@ -278,6 +278,10 @@ servers: address: 5.153.231.248 parents: gw-bytemark hostgroups: computers, bm-bl, acpid-hosts, service, wheezy + bm-bl9: + address: 5.153.231.249 + parents: gw-bytemark + hostgroups: computers, bm-bl, acpid-hosts, service, wheezy milanollo: address: 5.153.231.2 @@ -333,20 +337,16 @@ servers: address: 5.153.231.13 parents: ganeti-bytemark hostgroups: computers, hassrvfs, kvmdomains, wheezy, apache2-hosts - petrova: - address: 5.153.231.25 - parents: ganeti-bytemark - hostgroups: computers, kvmdomains, wheezy, apache2-hosts couper: address: 5.153.231.14 parents: ganeti-bytemark hostgroups: computers, hassrvfs, kvmdomains, wheezy, apache2-hosts, nfs-client, autofs rainier: - address: 5.153.231.15 + address: 5.153.231.16 parents: ganeti-bytemark hostgroups: computers, kvmdomains, wheezy, no-bacula rapoport: - address: 5.153.231.16 + address: 5.153.231.15 parents: ganeti-bytemark hostgroups: computers, kvmdomains, wheezy, no-bacula delfin: @@ -365,6 +365,12 @@ servers: address: 5.153.231.20 parents: ganeti-bytemark hostgroups: computers, general, kvmdomains, wheezy, nfs-client, autofs + moszumanska: + address: 5.153.231.21 + parents: ganeti-bytemark + contact_groups: alioth-admins + hostgroups: computers, general, wheezy, postgres91-hosts, apache2-hosts, acpid-hosts, apache-https, brokensamhain, no-bacula, bind9-hosts, xinetd-hosts, alioth, heavy-exim, spamd + no-servicegroups: true dillon: address: 5.153.231.22 parents: ganeti-bytemark @@ -377,6 +383,18 @@ servers: address: 5.153.231.24 parents: ganeti-bytemark hostgroups: computers, service, kvmdomains, wheezy, bind9-hosts, no-bacula + petrova: + address: 5.153.231.25 + parents: ganeti-bytemark + hostgroups: computers, kvmdomains, wheezy, apache2-hosts + oyens: + address: 5.153.231.26 + parents: ganeti-bytemark + hostgroups: computers, kvmdomains, wheezy + barriere: + address: 5.153.231.27 + parents: ganeti-bytemark + hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, porterbox # }}} # {{{ gw-c3sl santoro: @@ -428,18 +446,18 @@ servers: hostgroups: computers, sw-raid, hassrvfs, wheezy # }}} # {{{ gw-ftcollins - alkman: - address: 192.25.206.63 - parents: gw-ftcollins - hostgroups: computers, buildd, acpid-hosts, wheezy - merulo: - address: 192.25.206.58 - parents: gw-ftcollins - hostgroups: computers, porterbox, hasusrfs, wheezy - mundy: - address: 192.25.206.62 - parents: gw-ftcollins - hostgroups: computers, buildd, hassrvfs, sw-raid, acpid-hosts, wheezy + #alkman: + # address: 192.25.206.63 + # parents: gw-ftcollins + # hostgroups: computers, buildd, acpid-hosts, wheezy + #merulo: + # address: 192.25.206.58 + # parents: gw-ftcollins + # hostgroups: computers, porterbox, hasusrfs, wheezy + #mundy: + # address: 192.25.206.62 + # parents: gw-ftcollins + # hostgroups: computers, buildd, hassrvfs, sw-raid, acpid-hosts, wheezy spohr: address: 192.25.206.33 parents: gw-ftcollins @@ -477,6 +495,32 @@ servers: parents: gw-isc hostgroups: computers, service, apache2-hosts, rsyncd-hosts, acpid-hosts, dl360, hasorgfs, xinetd-hosts, wheezy, security_mirror, no-bacula # }}} + # {{{ gw-leaseweb + lw01: + address: 185.17.185.177 + parents: gw-leaseweb + hostgroups: computers, service, acpid-hosts, wheezy, dl180 + lw02: + address: 185.17.185.178 + parents: gw-leaseweb + hostgroups: computers, service, acpid-hosts, wheezy, dl180 + lw03: + address: 185.17.185.179 + parents: gw-leaseweb + hostgroups: computers, service, acpid-hosts, wheezy, dl180 + lw04: + address: 185.17.185.180 + parents: gw-leaseweb + hostgroups: computers, service, acpid-hosts, wheezy, dl180 + lw05: + address: 185.17.185.181 + parents: gw-leaseweb + hostgroups: computers, service, acpid-hosts, wheezy, dl120, sw-raid + lw06: + address: 185.17.185.182 + parents: gw-leaseweb + hostgroups: computers, service, acpid-hosts, wheezy, dl120, sw-raid + # }}} # {{{ gw-karlsruhe zemlinsky: address: 129.143.160.6 @@ -581,7 +625,7 @@ servers: zani: address: 148.100.88.22 parents: gw-marist - hostgroups: computers, buildd, hassrvfs, wheezy, incomingmailrelayed + hostgroups: computers, buildd, hassrvfs, wheezy, incomingmailrelayed, ping-suckers # }}} # {{{ gw-osuosl busoni: @@ -665,6 +709,10 @@ servers: address: 86.59.118.152 parents: gw-sil hostgroups: computers, buildd, wheezy + eberlin: + address: 86.59.118.155 + parents: gw-sil + hostgroups: computers, buildd, wheezy # }}} # {{{ gw-ubcece sw-ubcece: @@ -810,7 +858,7 @@ servers: nono: address: 206.12.19.123 parents: traetta - hostgroups: computers, service, kvmdomains, wheezy, heavy-exim, xinetd-hosts, apache2-hosts, apache-https + hostgroups: computers, service, kvmdomains, wheezy, heavy-exim, xinetd-hosts, apache2-hosts, apache-https, broken_https_default_vhost reger: address: 206.12.19.124 parents: ganeti2 @@ -843,14 +891,10 @@ servers: address: 206.12.19.134 parents: ganeti2 hostgroups: computers, service, kvmdomains, wheezy, xinetd-hosts, nfs-client, autofs - barriere: - address: 206.12.19.135 - parents: ganeti2 - hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, porterbox diabelli: address: 206.12.19.136 parents: traetta - hostgroups: computers, service, hasbootfs, kvmdomains, wheezy, apache2-hosts, apache-https + hostgroups: computers, service, hasbootfs, kvmdomains, wheezy, apache2-hosts, apache-https, broken_https_default_vhost bizet: address: 206.12.19.137 parents: ganeti2 @@ -916,10 +960,6 @@ servers: hostgroups: secondary-IPs # }}} # {{{ gw-ynic - hildegard: - address: 144.32.168.74 - parents: gw-ynic - hostgroups: computers, hasbootfs, hassrvfs, armhf, wheezy, deadslow, buildd howells: address: 144.32.168.75 parents: gw-ynic @@ -1022,6 +1062,12 @@ hostgroups: dl585: alias: HP DL385 hosts private: 1 + dl180: + alias: HP DL180 + private: 1 + dl120: + alias: HP DL120 + private: 1 sw-raid: alias: Hosts with Linux software raid private: 1 @@ -1113,6 +1159,9 @@ hostgroups: apache-https: alias: hosts with https services private: 1 + broken_https_default_vhost: + alias: https default vhost does not say 200 OK + private: 1 no-bacula: alias: hosts which are not being backed up with bacula @@ -1170,15 +1219,17 @@ hostgroups: # i.e. no port 25 private: 1 - ntpsuckers: - alias: "hosts who's ntp offset is often unknown" - private: 1 - brokensamhain: alias: machines that can not run samhain private: 1 high-RTT: - alias: machines with hight round trip times + alias: machines with high round trip times + private: 1 + ping-suckers: + alias: machines that just suck at icmp + private: 1 + alioth: + alias: machines that just are just awkward private: 1 security_mirror: @@ -1216,7 +1267,7 @@ services: name: PING check: "check_ping!350.0,20%!600.0,40%" hostgroups: pingable - excludehostgroups: layer3-infrastructure, high-RTT + excludehostgroups: layer3-infrastructure, high-RTT, ping-suckers normal_check_interval: 5 max_check_attempts: 4 retry_check_interval: 1 @@ -1227,6 +1278,13 @@ services: normal_check_interval: 5 max_check_attempts: 4 retry_check_interval: 1 + - + name: PING + check: "check_ping!600.0,90%!900.0,95%" + hostgroups: ping-suckers + normal_check_interval: 5 + max_check_attempts: 4 + retry_check_interval: 1 - name: PING check: "check_ping!2000.0,60%!3000.0,80%" @@ -1425,7 +1483,7 @@ services: servicegroups: backup nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u bacula -C bacula-fd -a '/usr/sbin/bacula-fd -c /etc/bacula/bacula-fd.conf'" hostgroups: computers - excludehostgroups: freebsd + excludehostgroups: freebsd, alioth - name: process - bacula-fd servicegroups: backup @@ -1620,7 +1678,7 @@ services: hostgroups: computers depends: process - ntpd excludehosts: ancina - excludehostgroups: ntpsuckers, deadslow + excludehostgroups: deadslow servicegroups: time # - @@ -1658,11 +1716,13 @@ services: remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$" runfrom: lotti hostgroups: computers + excludehostgroups: alioth - name: remote logging on lully remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$" runfrom: lully hostgroups: computers + excludehostgroups: alioth - name: MQ connection on rainier remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa" @@ -1670,6 +1730,7 @@ services: hostgroups: computers normal_check_interval: 60 retry_check_interval: 15 + excludehostgroups: alioth - name: MQ connection on rapoport remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa" @@ -1677,6 +1738,7 @@ services: hostgroups: computers normal_check_interval: 60 retry_check_interval: 15 + excludehostgroups: alioth ### MAIL STUFF ### - @@ -1812,23 +1874,23 @@ services: - name: process - weightd - master nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (master)'" - hostgroups: heavy-postfix + hostgroups: heavy-postfix, alioth - name: process - weightd - cache nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (cache)'" - hostgroups: heavy-postfix + hostgroups: heavy-postfix, alioth depends: process - weightd - master - name: process - weightd - child nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:50 -c 1: -u polw -a 'policyd-weight (child)'" - hostgroups: heavy-postfix + hostgroups: heavy-postfix, alioth depends: process - weightd - master ### - name: unwanted process - policyd-weight nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C policyd-weight" hostgroups: computers - excludehostgroups: heavy-postfix, deadslow + excludehostgroups: heavy-postfix, deadslow, alioth ### @@ -1919,6 +1981,7 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-config" hostgroups: computers normal_check_interval: 60 + excludehostgroups: alioth - name: setup - local hostname etc-hosts nrpe: 'if getent ahosts `hostname` | grep -q 127.0; then echo "Warning: local hostname resolves to 127/8 address"; exit 1; else echo "OK: Hostname resolves to non-127/8 address."; exit 0; fi' @@ -1946,6 +2009,7 @@ services: name: process - unbound nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u unbound -C unbound -a '/usr/sbin/unbound'" hostgroups: unbound-hosts, squeeze, wheezy + excludehostgroups: alioth ### - name: process - uptimed @@ -2028,7 +2092,7 @@ services: name: process - ud-replicated nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C ud-replicated -a '/usr/bin/python /usr/bin/ud-replicated'" hostgroups: computers - excludehostgroups: squeeze,freebsd + excludehostgroups: squeeze, freebsd, alioth - name: process - ud-replicated nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C python2.7 -a '/usr/bin/python /usr/bin/ud-replicated'" @@ -2041,13 +2105,13 @@ services: name: process - monit nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C monit -a '/usr/bin/monit -d 300 -I -c /etc/monit/monitrc -s /var/lib/monit/monit.state'" hostgroups: computers - excludehostgroups: squeeze + excludehostgroups: squeeze, alioth - name: HW - hpacucli status servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli" normal_check_interval: 120 - hostgroups: dl385, dl380, dl360, bl460 + hostgroups: dl385, dl380, dl360, bl460, dl180 excludehosts: schein, rietz - name: HW - hpacucli status @@ -2074,6 +2138,13 @@ services: normal_check_interval: 120 hostgroups: dl585 ### + - + name: HW - edac status + nrpe: "/usr/lib/nagios/plugins/dsa-check-edac" + normal_check_interval: 120 + hosts: lw05, lw06 + #hostgroups: computers + #excludehosts: villa, lobos, senfl, schein - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm" @@ -2273,6 +2344,7 @@ services: check: check_https hostgroups: apache-https excludehosts: handel,menotti + excludehostgroups: broken_https_default_vhost depends: "process - apache2 - master" normal_check_interval: 120 - @@ -2281,6 +2353,12 @@ services: hosts: handel,menotti depends: "process - apache2 - master" normal_check_interval: 120 + - + name: network service - https + check: dsa_check_https_any_status + hostgroups: broken_https_default_vhost + depends: "process - apache2 - master" + normal_check_interval: 120 - name: network service - https cert check: dsa_check_cert!443 @@ -2384,7 +2462,7 @@ services: hostgroups: computers - name: process - postgresql91 - master - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:4 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.1/bin/postgres'" + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.1/bin/postgres'" hostgroups: postgres91-hosts - name: postgresql backups @@ -2397,7 +2475,7 @@ services: name: process - stunnel4 - puppet-ekeyd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -u stunnel4 -C stunnel4 -a '/usr/bin/stunnel4 /etc/stunnel/puppet-ekeyd.conf'" hostgroups: squeeze, wheezy - excludehostgroups: freebsd + excludehostgroups: freebsd, alioth #### - name: process - UPS - nut usbhid-ups - ups1 @@ -2497,6 +2575,13 @@ services: ############ MISC OTHER Stuff ############ ##### + - + name: puppetmaster cert + nrpe: "/usr/lib/nagios/plugins/dsa-check-cert-expire /var/lib/puppet/ssl/certs/ca.pem" + hosts: handel + normal_check_interval: 60 + max_check_attempts: 2 + retry_check_interval: 5 - name: mirror sync - bugs check: "dsa_check_mirrorsync_skew!bugs.debian.org!project/trace/bugs-master.debian.org!120:600" @@ -2583,5 +2668,12 @@ services: remotecheck: "/usr/lib/nagios/plugins/dsa-check-msa-eventlog --start=7778 $HOSTADDRESS$ public" runfrom: dijkstra hosts: giustini + ############ + - + name: current chroots + nrpe: "/usr/lib/nagios/plugins/dsa-check-dchroots-current" + hostgroups: porterbox + normal_check_interval: 60 + retry_check_interval: 15 # vim: set ts=2 sw=2 et ai si fdm=marker: