X-Git-Url: https://git.adam-barratt.org.uk/?a=blobdiff_plain;f=config%2Fnagios-master.cfg;h=e0a3fbb23116a57dd432f4098f8cafd8bcfe5400;hb=d4b04d6bb4e86596fb23fd1a412038a38e41aa92;hp=25de02c29cefe4931c708178b71dea861652d7f7;hpb=a9f9473d0921da8f828fdf0bb79f4d44278bf14d;p=mirror%2Fdsa-nagios.git diff --git a/config/nagios-master.cfg b/config/nagios-master.cfg index 25de02c..e0a3fbb 100644 --- a/config/nagios-master.cfg +++ b/config/nagios-master.cfg @@ -78,6 +78,10 @@ servers: address: 192.25.206.1 parents: gw-ubcece hostgroups: layer3-infrastructure + gw-gatech: + address: 128.61.240.1 + parents: gw-ubcece + hostgroups: layer3-infrastructure gw-grnet: address: 194.177.211.193 parents: gw-ubcece @@ -205,35 +209,43 @@ servers: abel: address: 217.140.96.56 parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, porterbox, wheezy + hostgroups: computers, hasbootfs, hassrvfs, porterbox, wheezy, broken_mq antheil: address: 217.140.96.60 parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy + hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy, broken_mq arnold: address: 217.140.96.57 parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy + hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy, broken_mq + arm-arm-01: + address: 217.140.96.58 + parents: gw-arm + hostgroups: hasbootfs, hassrvfs, buildd, wheezy, broken_mq + arm-arm-02: + address: 217.140.96.59 + parents: gw-arm + hostgroups: hasbootfs, hassrvfs, buildd, wheezy, broken_mq harris: address: 217.140.96.66 parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, porterbox + hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, porterbox, broken_mq hartmann: address: 217.140.96.67 parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, buildd + hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, buildd, broken_mq hasse: address: 217.140.96.68 parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, buildd + hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, buildd, broken_mq henze: address: 217.140.96.70 parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, buildd + hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, buildd, broken_mq hoiby: address: 217.140.96.71 parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, armhf, wheezy, buildd + hostgroups: computers, hasbootfs, hassrvfs, armhf, wheezy, buildd, broken_mq # }}} # {{{ gw-brown franck: @@ -277,19 +289,19 @@ servers: bm-bl9: address: 5.153.231.249 parents: gw-bytemark - hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute + hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute, broken_mq bm-bl10: address: 5.153.231.250 parents: gw-bytemark - hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute + hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute, broken_mq bm-bl11: address: 5.153.231.251 parents: gw-bytemark - hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute + hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute, broken_mq bm-bl12: address: 5.153.231.252 parents: gw-bytemark - hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute + hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute, broken_mq milanollo: address: 5.153.231.2 @@ -336,7 +348,7 @@ servers: coccia: address: 5.153.231.11 parents: ganeti-bytemark - hostgroups: computers, hassrvfs, kvmdomains, wheezy, autofs, nfs-client + hostgroups: computers, hassrvfs, kvmdomains, wheezy, autofs, nfs-client, uploadqueue, xinetd-hosts backuphost: address: 5.153.231.12 parents: ganeti-bytemark @@ -387,10 +399,6 @@ servers: address: 5.153.231.23 parents: ganeti-bytemark hostgroups: computers, general, kvmdomains, wheezy, nfs-client, autofs, apache2-hosts, apache-https, service - diamond: - address: 5.153.231.24 - parents: ganeti-bytemark - hostgroups: computers, service, kvmdomains, wheezy, bind9-hosts, no-bacula petrova: address: 5.153.231.25 parents: ganeti-bytemark @@ -398,7 +406,7 @@ servers: oyens: address: 5.153.231.26 parents: ganeti-bytemark - hostgroups: computers, kvmdomains, wheezy, apache2-hosts, openstack-controller, apache-https + hostgroups: computers, kvmdomains, wheezy, apache2-hosts, openstack-controller, apache-https, broken_mq barriere: address: 5.153.231.27 parents: ganeti-bytemark @@ -406,11 +414,15 @@ servers: quantz: address: 5.153.231.28 parents: ganeti-bytemark - hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, nfs-client, xinetd-hosts, heavy-exim, apache2-hosts, autofs + hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, nfs-client, xinetd-hosts, heavy-exim, apache2-hosts, autofs, apache-https portman: address: 5.153.231.29 parents: ganeti-bytemark - hostgroups: computers, service, kvmdomains, wheezy, hassrvfs + hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, apache2-hosts + paradis: + address: 5.153.231.30 + parents: ganeti-bytemark + hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, apache2-hosts, apache-https # }}} # {{{ gw-c3sl santoro: @@ -449,27 +461,23 @@ servers: senfl: address: 128.31.0.51 parents: gw-csail - hostgroups: computers, service, dl360, acpid-hosts, hassrvfs, apache2-hosts, rsyncd-hosts, bind9-hosts, xinetd-hosts, squeeze, apache-https + hostgroups: computers, service, dl360, acpid-hosts, hassrvfs, apache2-hosts, rsyncd-hosts, xinetd-hosts, squeeze, apache-https steffani: address: 128.31.0.36 parents: gw-csail hostgroups: computers, service, apache2-hosts, rsyncd-hosts, sw-raid, acpid-hosts, hasbootfs, hasorgfs, xinetd-hosts, wheezy, security_mirror, no-bacula # }}} # {{{ gw-dgi - argento: - address: 93.94.130.160 + storace: + address: 93.94.130.161 parents: gw-dgi - hostgroups: computers, sw-raid, hassrvfs, wheezy + hostgroups: computers, acpid-hosts, wheezy, dl380, nfs-client, hassrvfs # }}} # {{{ gw-ftcollins #alkman: # address: 192.25.206.63 # parents: gw-ftcollins # hostgroups: computers, buildd, acpid-hosts, wheezy - #merulo: - # address: 192.25.206.58 - # parents: gw-ftcollins - # hostgroups: computers, porterbox, hasusrfs, wheezy #mundy: # address: 192.25.206.62 # parents: gw-ftcollins @@ -479,6 +487,12 @@ servers: parents: gw-ftcollins hostgroups: computers, service, dl380, apache2-hosts, wheezy, no-bacula # }}} + # {{{ gw-gatech + sechter: + address: 128.61.240.73 + parents: gw-gatech + hostgroups: computers, service, apache2-hosts, rsyncd-hosts, sw-raid, acpid-hosts, hasbootfs, hassrvfs, xinetd-hosts, wheezy, security_mirror + # }}} # {{{ gw-grnet barber: address: 194.177.211.203 @@ -507,27 +521,28 @@ servers: lw01: address: 185.17.185.177 parents: gw-leaseweb - hostgroups: computers, service, acpid-hosts, wheezy, dl180 + hostgroups: computers, service, acpid-hosts, wheezy, dl180, nfs-server lw02: address: 185.17.185.178 parents: gw-leaseweb - hostgroups: computers, service, acpid-hosts, wheezy, dl180 + hostgroups: computers, service, acpid-hosts, wheezy, dl180, nfs-server lw03: address: 185.17.185.179 parents: gw-leaseweb - hostgroups: computers, service, acpid-hosts, wheezy, dl180 + hostgroups: computers, service, acpid-hosts, wheezy, dl180, nfs-server lw04: address: 185.17.185.180 parents: gw-leaseweb - hostgroups: computers, service, acpid-hosts, wheezy, dl180 - lw05: - address: 185.17.185.181 + hostgroups: computers, service, acpid-hosts, wheezy, dl180, nfs-server + lw07: + address: 185.17.185.187 parents: gw-leaseweb - hostgroups: computers, service, acpid-hosts, wheezy, dl120, sw-raid - lw06: - address: 185.17.185.182 + hostgroups: computers, service, acpid-hosts, wheezy, dl180, nfs-client, autofs, hassrvfs, postgres91-hosts, apache2-hosts + lw08: + address: 185.17.185.189 parents: gw-leaseweb - hostgroups: computers, service, acpid-hosts, wheezy, dl120, sw-raid + hostgroups: computers, service, acpid-hosts, wheezy, dl180, nfs-client, autofs, hassrvfs + #, apache2-hosts # }}} # {{{ gw-karlsruhe zemlinsky: @@ -558,6 +573,14 @@ servers: address: 82.195.75.110 parents: ganeti3 hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, spamd, heavy-exim, highload + mipsel-manda-01: + address: 82.195.75.72 + parents: gw-arm + hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy + mipsel-manda-02: + address: 82.195.75.74 + parents: gw-arm + hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy fils: address: 82.195.75.89 parents: ganeti3 @@ -624,6 +647,14 @@ servers: address: 82.195.75.92 parents: ganeti3 hostgroups: computers, service, kvmdomains, wheezy + mipsel-manda-01: + address: 82.195.75.72 + parents: gw-man-da + hostgroups: computers, buildd, wheezy, hassrvfs, sw-raid + mipsel-manda-02: + address: 82.195.75.74 + parents: gw-man-da + hostgroups: computers, buildd, wheezy, hassrvfs, sw-raid # }}} # {{{ gw-marist zani: @@ -653,6 +684,10 @@ servers: # address: 140.211.166.58 # parents: gw-osuosl # hostgroups: computers, buildd, hasbootfs + merulo: + address: 140.211.166.46 + parents: gw-osuosl + hostgroups: computers, porterbox, hasusrfs, wheezy parry: address: 140.211.15.153 parents: gw-osuosl @@ -744,7 +779,7 @@ servers: ravel: address: 206.12.19.5 parents: sw-ubcece-kais - hostgroups: computers, general, dl385, apache2-hosts, acpid-hosts, hasbootfs, nfs-client, rsyncd-hosts, bind9-hosts, uploadqueue, hasorgfs, xinetd-hosts, wheezy, autofs + hostgroups: computers, general, dl385, apache2-hosts, acpid-hosts, hasbootfs, nfs-client, hasorgfs, wheezy, autofs dijkstra: address: 206.12.19.218 parents: sw-ubcece-kais @@ -793,10 +828,6 @@ servers: address: 206.12.19.13 parents: sw-ubcece-kais hostgroups: computers, hashomefs, sw-raid, rsyncd-hosts, apache2-hosts, xinetd-hosts, service, nfs-server, squeeze, hassrvfs - respighi: - address: 206.12.19.11 - parents: sw-ubcece-kais - hostgroups: computers, hasbootfs, aacraid, hassrvfs, service, apache2-hosts, squeeze # MSA 2000 (2012i) giustini: address: 192.168.2.6 @@ -1215,6 +1246,10 @@ hostgroups: security_mirror: alias: hosts that are security mirrors private: 1 + + broken_mq: + alias: hosts whose MQ is broken + private: 1 # }}} # {{{ ############################# servicegroups ############################# servicegroups: @@ -1237,8 +1272,10 @@ servicegroups: security: alias: security servicegroup_members: apt, kernel, samhain + MQ: + alias: rabbitMQ stuff # }}} -# {{{ ############################# # services ############################# +# {{{ ############################# services ############################# services: # {{{ ### basic networking - @@ -1339,7 +1376,7 @@ services: name: disk usage on /var/lib/postgresql servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /var/lib/postgresql" - hosts: sibelius, busoni, buxtehude + hosts: sibelius, busoni, buxtehude, lw07 - name: disk usage on /var/log servicegroups: diskspace @@ -1400,6 +1437,31 @@ services: servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk 85 95 /srv/bacula" hosts: beethoven + - + name: disk usage on /storage/snapshot-farm-1 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 98 92 /storage/snapshot-farm-1" + hosts: lw01 + - + name: disk usage on /storage/snapshot-farm-2 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 98 92 /storage/snapshot-farm-2" + hosts: lw02 + - + name: disk usage on /storage/snapshot-farm-3 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 98 92 /storage/snapshot-farm-3" + hosts: lw03 + - + name: disk usage on /storage/snapshot-farm-4 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 98 92 /storage/snapshot-farm-4" + hosts: lw04 + - + name: disk usage on /srv/morgue.debian.org/ + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 95 90 /srv/morgue.debian.org" + hosts: lw03 # }}} # {{{ ### system # {{{ setup @@ -1701,20 +1763,22 @@ services: ### - name: MQ connection on rainier + servicegroups: MQ remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa" runfrom: rainier hostgroups: computers normal_check_interval: 60 retry_check_interval: 15 - excludehostgroups: alioth + excludehostgroups: alioth, broken_mq - name: MQ connection on rapoport + servicegroups: MQ remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa" runfrom: rapoport hostgroups: computers normal_check_interval: 60 retry_check_interval: 15 - excludehostgroups: alioth + excludehostgroups: alioth, broken_mq ### - name: local resolver @@ -1859,11 +1923,10 @@ services: normal_check_interval: 120 hostgroups: dl585 ### - - - name: HW - edac status - nrpe: "/usr/lib/nagios/plugins/dsa-check-edac" - normal_check_interval: 120 - hosts: lw05, lw06 +# - +# name: HW - edac status +# nrpe: "/usr/lib/nagios/plugins/dsa-check-edac" +# normal_check_interval: 120 #hostgroups: computers #excludehosts: villa, lobos, senfl, schein - @@ -1871,17 +1934,27 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm" normal_check_interval: 120 hostgroups: dl385, dl380, dl360, bl460, dl585, bm-bl - excludehosts: villa, lobos, senfl, schein + excludehosts: villa, lobos, senfl, schein, storace - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --ps-no-redundant" normal_check_interval: 120 - hosts: villa, lobos + hosts: villa + - + name: HW - hpasmcli status + nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --ps-no-redundant --ignore-failed='PS2'" + normal_check_interval: 120 + hosts: lobos - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-no-redundant" normal_check_interval: 120 hosts: senfl, schein + - + name: HW - hpasmcli status + nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-ignore-not-present" + normal_check_interval: 120 + hosts: storace ### - name: RAID - 3ware @@ -2240,28 +2313,28 @@ services: hosts: backuphost # }}} # {{{ power - - - name: process - UPS - nut usbhid-ups - ups1 - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u nut -C usbhid-ups -a '/lib/nut/usbhid-ups -a ups1'" - hosts: franck - - - name: process - UPS - nut upsd - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u nut -C upsd -a '/sbin/upsd'" - hosts: franck - - - name: process - UPS - nut upsmon master - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C upsmon -a '/sbin/upsmon'" - hosts: franck - - - name: process - UPS - nut upsmon worker - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u nut -C upsmon -a '/sbin/upsmon'" - hosts: franck - depends: process - UPS - nut upsmon master - - - name: UPS - on line power - nrpe: "/usr/lib/nagios/plugins/dsa-check-ups" - hosts: franck - depends: process - UPS - nut upsd +# - +# name: process - UPS - nut usbhid-ups - ups1 +# nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u nut -C usbhid-ups -a '/lib/nut/usbhid-ups -a ups1'" +# hosts: franck +# - +# name: process - UPS - nut upsd +# nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u nut -C upsd -a '/sbin/upsd'" +# hosts: franck +# - +# name: process - UPS - nut upsmon master +# nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C upsmon -a '/sbin/upsmon'" +# hosts: franck +# - +# name: process - UPS - nut upsmon worker +# nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u nut -C upsmon -a '/sbin/upsmon'" +# hosts: franck +# depends: process - UPS - nut upsmon master +# - +# name: UPS - on line power +# nrpe: "/usr/lib/nagios/plugins/dsa-check-ups" +# hosts: franck +# depends: process - UPS - nut upsd # }}} # {{{ buildd - @@ -2377,7 +2450,7 @@ services: hosts: global - name: DNS SOA sync - alioth.debian.org - check: "dsa_check_soas_add!alioth.debian.org!alioth.debian.org" + check: "dsa_check_soas_add!denis.debian.org!alioth.debian.org" hosts: global - name: DNS - delegation and signature expiry @@ -2416,7 +2489,7 @@ services: hosts: giustini - name: event log - remotecheck: "/usr/lib/nagios/plugins/dsa-check-msa-eventlog --start=8867 $HOSTADDRESS$ public" + remotecheck: "/usr/lib/nagios/plugins/dsa-check-msa-eventlog --start=9966 $HOSTADDRESS$ public" runfrom: dijkstra hosts: giustini # }}}