X-Git-Url: https://git.adam-barratt.org.uk/?a=blobdiff_plain;f=config%2Fnagios-master.cfg;h=b5f8011e525917cfb25b6c307e5747e23627adc5;hb=40389fe7ad8028bedd698e6d804e9474e1156594;hp=25de02c29cefe4931c708178b71dea861652d7f7;hpb=a9f9473d0921da8f828fdf0bb79f4d44278bf14d;p=mirror%2Fdsa-nagios.git diff --git a/config/nagios-master.cfg b/config/nagios-master.cfg index 25de02c..cf8594c 100644 --- a/config/nagios-master.cfg +++ b/config/nagios-master.cfg @@ -33,6 +33,10 @@ servers: address: 130.239.18.97 parents: gw-ubcece hostgroups: layer3-infrastructure + gw-aql: + address: 141.170.2.17 + parents: gw-ubcece + hostgroups: layer3-infrastructure gw-bytemark: address: 89.16.160.116 parents: gw-ubcece @@ -74,8 +78,8 @@ servers: address: 62.104.23.249 parents: gw-ubcece hostgroups: layer3-infrastructure - gw-ftcollins: - address: 192.25.206.1 + gw-gatech: + address: 128.61.240.1 parents: gw-ubcece hostgroups: layer3-infrastructure gw-grnet: @@ -94,6 +98,10 @@ servers: address: 185.17.185.190 parents: gw-ubcece hostgroups: layer3-infrastructure + gw-linaro: + address: 64.28.108.36 + parents: gw-ubcece + hostgroups: layer3-infrastructure gw-man-da: address: 82.195.75.126 parents: gw-ubcece @@ -110,6 +118,10 @@ servers: address: 193.201.200.129 parents: gw-ubcece hostgroups: layer3-infrastructure + gw-sakura: + address: 133.242.99.65 + parents: gw-ubcece + hostgroups: layer3-infrastructure gw-sanger: address: 193.62.202.20 parents: gw-ubcece @@ -139,6 +151,10 @@ servers: address: 128.101.240.222 parents: gw-ubcece hostgroups: layer3-infrastructure + gw-unicamp: + address: 143.106.167.234 + parents: gw-ubcece + hostgroups: layer3-infrastructure gw-utwente: address: 130.89.149.1 parents: gw-ubcece @@ -168,6 +184,11 @@ servers: address: 213.165.95.6 parents: powell hostgroups: computers, buildd, hassrvfs, kvmdomains, wheezy + piu-slave-1und1-01: + address: 213.165.95.7 + parents: powell + hostgroups: computers, service, kvmdomains, wheezy + contacts: holger # }}} # {{{ gw-1und1-sec schumann: @@ -178,10 +199,6 @@ servers: address: 195.20.242.124 parents: schumann hostgroups: computers, service, apache2-hosts, hassrvfs, hasbootfs, rsyncd-hosts, uploadqueue, kvmdomains, xinetd-hosts, apache-https, postgres91-hosts, wheezy - geo3: - address: 195.20.242.125 - parents: schumann - hostgroups: computers, service, hasbootfs, bind9-hosts, kvmdomains, wheezy soler: address: 195.20.242.126 parents: schumann @@ -195,45 +212,71 @@ servers: pettersson: address: 130.239.18.123 parents: gw-accumu - hostgroups: computers, hasbootfs, aacraid, nfs-client, acpid-hosts, service, apache2-hosts, wheezy, autofs + hostgroups: computers, hasbootfs, aacraid, nfs-client, acpid-hosts, service, apache2-hosts, jessie, autofs, sw-raid praetorius: address: 130.239.18.121 parents: gw-accumu hostgroups: computers, buildd, hassrvfs, wheezy # }}} + # {{{ gw-aql + mips-aql-01: + address: 141.170.6.149 + parents: gw-aql + hostgroups: computers, buildd, jessie, nfs-client + mips-aql-02: + address: 141.170.6.150 + parents: gw-aql + hostgroups: computers, buildd, wheezy, nfs-client + minkus: + address: 141.170.6.151 + parents: gw-aql + hostgroups: computers, porterbox, jessie, nfs-client + mipsel-aql-01: + address: 141.170.6.152 + parents: gw-aql + hostgroups: computers, buildd, jessie, hassrvfs, hasbootfs, sw-raid + mipsel-aql-02: + address: 141.170.6.153 + parents: gw-aql + hostgroups: computers, buildd, jessie, hassrvfs, hasbootfs, sw-raid + # }}} # {{{ gw-arm abel: address: 217.140.96.56 parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, porterbox, wheezy - antheil: - address: 217.140.96.60 - parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy + hostgroups: computers, hasbootfs, hassrvfs, porterbox, wheezy, broken_mq arnold: address: 217.140.96.57 parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy + hostgroups: computers, hasbootfs, hassrvfs, buildd, jessie, broken_mq + arm-arm-01: + address: 217.140.96.58 + parents: gw-arm + hostgroups: computers, hasbootfs, hassrvfs, buildd, jessie, broken_mq + arm-arm-02: + address: 217.140.96.59 + parents: gw-arm + hostgroups: computers, hasbootfs, hassrvfs, buildd, jessie, broken_mq + arm-arm-03: + address: 217.140.96.60 + parents: gw-arm + hostgroups: computers, hasbootfs, hassrvfs, buildd, jessie, broken_mq harris: address: 217.140.96.66 parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, porterbox + hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, porterbox, broken_mq hartmann: address: 217.140.96.67 parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, buildd - hasse: - address: 217.140.96.68 - parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, buildd - henze: - address: 217.140.96.70 - parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, buildd + hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, buildd, broken_mq hoiby: address: 217.140.96.71 parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, armhf, wheezy, buildd + hostgroups: computers, hasbootfs, hassrvfs, armhf, jessie, buildd, broken_mq + ia64-arm-01: + address: 217.140.96.61 + parents: gw-arm + hostgroups: computers, hasbootfs, hassrvfs, wheezy, buildd, broken_mq, sw-raid, acpid-hosts # }}} # {{{ gw-brown franck: @@ -277,19 +320,19 @@ servers: bm-bl9: address: 5.153.231.249 parents: gw-bytemark - hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute + hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute, broken_mq bm-bl10: address: 5.153.231.250 parents: gw-bytemark - hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute + hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute, broken_mq bm-bl11: address: 5.153.231.251 parents: gw-bytemark - hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute + hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute, broken_mq bm-bl12: address: 5.153.231.252 parents: gw-bytemark - hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute + hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute, broken_mq milanollo: address: 5.153.231.2 @@ -336,11 +379,7 @@ servers: coccia: address: 5.153.231.11 parents: ganeti-bytemark - hostgroups: computers, hassrvfs, kvmdomains, wheezy, autofs, nfs-client - backuphost: - address: 5.153.231.12 - parents: ganeti-bytemark - hostgroups: computers, hassrvfs, kvmdomains, wheezy + hostgroups: computers, hassrvfs, kvmdomains, wheezy, autofs, nfs-client, uploadqueue, xinetd-hosts, apache-https, apache2-hosts philp: address: 5.153.231.13 parents: ganeti-bytemark @@ -368,7 +407,7 @@ servers: dinis: address: 5.153.231.19 parents: ganeti-bytemark - hostgroups: computers, general, kvmdomains, wheezy + hostgroups: computers, general, kvmdomains, wheezy, hassrvfs donizetti: address: 5.153.231.20 parents: ganeti-bytemark @@ -386,11 +425,7 @@ servers: ticharich: address: 5.153.231.23 parents: ganeti-bytemark - hostgroups: computers, general, kvmdomains, wheezy, nfs-client, autofs, apache2-hosts, apache-https, service - diamond: - address: 5.153.231.24 - parents: ganeti-bytemark - hostgroups: computers, service, kvmdomains, wheezy, bind9-hosts, no-bacula + hostgroups: computers, general, kvmdomains, wheezy, nfs-client, autofs, apache2-hosts, apache-https, service, broken_https_default_vhost petrova: address: 5.153.231.25 parents: ganeti-bytemark @@ -398,25 +433,56 @@ servers: oyens: address: 5.153.231.26 parents: ganeti-bytemark - hostgroups: computers, kvmdomains, wheezy, apache2-hosts, openstack-controller, apache-https + hostgroups: computers, kvmdomains, jessie, apache2-hosts, openstack-controller, apache-https, broken_mq barriere: address: 5.153.231.27 parents: ganeti-bytemark - hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, porterbox + hostgroups: computers, service, kvmdomains, jessie, hassrvfs, porterbox quantz: address: 5.153.231.28 parents: ganeti-bytemark - hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, nfs-client, xinetd-hosts, heavy-exim, apache2-hosts, autofs + hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, nfs-client, xinetd-hosts, heavy-exim, apache2-hosts, autofs, apache-https portman: address: 5.153.231.29 parents: ganeti-bytemark - hostgroups: computers, service, kvmdomains, wheezy, hassrvfs + hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, apache2-hosts + paradis: + address: 5.153.231.30 + parents: ganeti-bytemark + hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, apache2-hosts, apache-https + x86-bm-01: + address: 5.153.231.32 + parents: ganeti-bytemark + hostgroups: computers, kvmdomains, wheezy, acpid-hosts, no-bacula + gideon: + address: 5.153.231.34 + parents: ganeti-bytemark + hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, apache2-hosts, apache-https + httpredir-bm-01: + address: 5.153.231.35 + parents: ganeti-bytemark + hostgroups: computers, service, kvmdomains, wheezy, apache2-hosts + lindsay: + address: 5.153.231.36 + parents: ganeti-bytemark + hostgroups: computers, service, kvmdomains, jessie, autofs, nfs-client + fede: + address: 5.153.231.37 + hostgroups: computers, service, kvmdomains, jessie, hassrvfs + sor: + address: 5.153.231.38 + parents: ganeti-bytemark + hostgroups: computers, service, kvmdomains, jessie, hassrvfs, apache2-hosts, autofs, nfs-client + jerea: + address: 5.153.231.39 + parents: ganeti-bytemark + hostgroups: computers, service, kvmdomains, jessie, hassrvfs, apache2-hosts # }}} # {{{ gw-c3sl santoro: address: 200.17.202.197 parents: gw-c3sl - hostgroups: computers, service, apache2-hosts, rsyncd-hosts, xinetd-hosts, hassrvfs, wheezy, high-RTT, security_mirror, no-bacula, apache-https + hostgroups: computers, service, apache2-hosts, rsyncd-hosts, xinetd-hosts, hassrvfs, jessie, high-RTT, security_mirror, no-bacula, apache-https contacts: faw # }}} # {{{ gw-carnet @@ -446,56 +512,90 @@ servers: hostgroups: computers, buildd, hassrvfs, sw-raid, wheezy, sparc # }}} # {{{ gw-csail - senfl: - address: 128.31.0.51 - parents: gw-csail - hostgroups: computers, service, dl360, acpid-hosts, hassrvfs, apache2-hosts, rsyncd-hosts, bind9-hosts, xinetd-hosts, squeeze, apache-https steffani: address: 128.31.0.36 parents: gw-csail hostgroups: computers, service, apache2-hosts, rsyncd-hosts, sw-raid, acpid-hosts, hasbootfs, hasorgfs, xinetd-hosts, wheezy, security_mirror, no-bacula + + csail-node01: + address: 128.31.0.16 + parents: gw-csail + hostgroups: computers, service, dl360, acpid-hosts, wheezy, drbd-hosts + csail-node02: + address: 128.31.0.46 + parents: gw-csail + hostgroups: computers, service, dl360, acpid-hosts, wheezy, drbd-hosts + ganeti-csail: + address: 128.31.0.49 + parents: gw-bytemark + hostgroups: notacomputer + + falla: + address: 128.31.0.65 + parents: ganeti-csail + hostgroups: computers, freebsd, hassrvfs, porterbox, wheezy + fischer: + address: 128.31.0.35 + parents: ganeti-csail + hostgroups: computers, freebsd, hassrvfs, porterbox, wheezy + mirror-csail: + address: 128.31.0.62 + parents: ganeti-csail + hostgroups: computers, service, hassrvfs, kvmdomains, wheezy, apache2-hosts, rsyncd-hosts, xinetd-hosts, apache-https + x86-csail-01: + address: 128.31.0.50 + parents: ganeti-csail + hostgroups: computers, buildd, hassrvfs, kvmdomains, wheezy + httpredir-csail-01: + address: 128.31.0.66 + parents: ganeti-csail + hostgroups: computers, service, kvmdomains, jessie, apache2-hosts # }}} # {{{ gw-dgi - argento: - address: 93.94.130.160 + storace: + address: 93.94.130.161 parents: gw-dgi - hostgroups: computers, sw-raid, hassrvfs, wheezy - # }}} - # {{{ gw-ftcollins - #alkman: - # address: 192.25.206.63 - # parents: gw-ftcollins - # hostgroups: computers, buildd, acpid-hosts, wheezy - #merulo: - # address: 192.25.206.58 - # parents: gw-ftcollins - # hostgroups: computers, porterbox, hasusrfs, wheezy - #mundy: - # address: 192.25.206.62 - # parents: gw-ftcollins - # hostgroups: computers, buildd, hassrvfs, sw-raid, acpid-hosts, wheezy - spohr: - address: 192.25.206.33 - parents: gw-ftcollins - hostgroups: computers, service, dl380, apache2-hosts, wheezy, no-bacula + hostgroups: computers, acpid-hosts, wheezy, dl380, nfs-client, hassrvfs + # }}} + # {{{ gw-gatech + sechter: + address: 128.61.240.73 + parents: gw-gatech + hostgroups: computers, service, apache2-hosts, rsyncd-hosts, sw-raid, acpid-hosts, hasbootfs, hassrvfs, xinetd-hosts, wheezy, security_mirror # }}} # {{{ gw-grnet - barber: - address: 194.177.211.203 + ganeti-grnet: + address: 194.177.211.194 parents: gw-grnet - hostgroups: computers, acpid-hosts, buildd, hassrvfs, mptraid, wheezy - biber: - address: 194.177.211.204 + hostgroups: notacomputer + grnet-node01: + address: 194.177.211.195 parents: gw-grnet - hostgroups: computers, acpid-hosts, buildd, hassrvfs, mptraid, wheezy + hostgroups: computers, service, dl380, acpid-hosts, wheezy, drbd-hosts + grnet-node02: + address: 194.177.211.196 + parents: gw-grnet + hostgroups: computers, service, dl380, acpid-hosts, wheezy, drbd-hosts rautavaara: address: 194.177.211.199 parents: gw-grnet - hostgroups: computers, acpid-hosts, megaraid, service, squeeze, nfs-server - vitry: - address: 194.177.211.206 + hostgroups: computers, acpid-hosts, megaraid, service, wheezy + loghost-grnet-01: + address: 194.177.211.200 parents: gw-grnet - hostgroups: computers, acpid-hosts, mptraid, no-bacula, wheezy + hostgroups: computers, service, kvmdomains, jessie, hassrvfs + geo3: + address: 194.177.211.201 + parents: gw-grnet + hostgroups: computers, service, bind9-hosts, kvmdomains, jessie + cgi-grnet-01: + address: 194.177.211.202 + parents: gw-grnet + hostgroups: computers, service, kvmdomains, jessie, hassrvfs, apache2-hosts, apache-https + x86-grnet-01: + address: 194.177.211.203 + parents: ganeti-grnet + hostgroups: computers, buildd, hassrvfs, kvmdomains, jessie # }}} # {{{ gw-isc schein: @@ -507,27 +607,42 @@ servers: lw01: address: 185.17.185.177 parents: gw-leaseweb - hostgroups: computers, service, acpid-hosts, wheezy, dl180 + hostgroups: computers, service, acpid-hosts, wheezy, dl180, nfs-server lw02: address: 185.17.185.178 parents: gw-leaseweb - hostgroups: computers, service, acpid-hosts, wheezy, dl180 + hostgroups: computers, service, acpid-hosts, wheezy, dl180, nfs-server lw03: address: 185.17.185.179 parents: gw-leaseweb - hostgroups: computers, service, acpid-hosts, wheezy, dl180 + hostgroups: computers, service, acpid-hosts, wheezy, dl180, nfs-server lw04: address: 185.17.185.180 parents: gw-leaseweb - hostgroups: computers, service, acpid-hosts, wheezy, dl180 - lw05: - address: 185.17.185.181 + hostgroups: computers, service, acpid-hosts, wheezy, dl180, nfs-server + lw07: + address: 185.17.185.187 parents: gw-leaseweb - hostgroups: computers, service, acpid-hosts, wheezy, dl120, sw-raid - lw06: - address: 185.17.185.182 + hostgroups: computers, service, acpid-hosts, wheezy, dl180, nfs-client, autofs, hassrvfs, postgres91-hosts, apache2-hosts + lw08: + address: 185.17.185.189 parents: gw-leaseweb - hostgroups: computers, service, acpid-hosts, wheezy, dl120, sw-raid + hostgroups: computers, service, acpid-hosts, wheezy, dl180, nfs-client, autofs, hassrvfs + #, apache2-hosts + # }}} + # {{{ gw-linaro + arm-linaro-01: + address: 64.28.108.83 + parents: gw-linaro + hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy, broken_mq + arm-linaro-03: + address: 64.28.108.85 + parents: gw-linaro + hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy, broken_mq + asachi: + address: 64.28.108.84 + parents: gw-linaro + hostgroups: computers, hasbootfs, hassrvfs, porterbox, wheezy, broken_mq # }}} # {{{ gw-karlsruhe zemlinsky: @@ -540,7 +655,7 @@ servers: ball: address: 82.195.75.70 parents: gw-man-da - hostgroups: computers, buildd, hasbootfs, wheezy + hostgroups: computers, buildd, hasbootfs, wheezy, sw-raid # bartok TODO czerny: address: 82.195.75.109 @@ -558,6 +673,14 @@ servers: address: 82.195.75.110 parents: ganeti3 hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, spamd, heavy-exim, highload + mipsel-manda-01: + address: 82.195.75.72 + parents: gw-man-da + hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy + mipsel-manda-02: + address: 82.195.75.74 + parents: gw-man-da + hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy fils: address: 82.195.75.89 parents: ganeti3 @@ -576,14 +699,6 @@ servers: address: 82.195.75.99 parents: ganeti3 hostgroups: computers, service, hasbootfs, kvmdomains, wheezy, hasvarlogfs - rem: - address: 82.195.75.68 - parents: gw-man-da - hostgroups: computers, buildd, hasbootfs, hassrvfs, wheezy - unger: - address: 82.195.75.102 - parents: gw-man-da - hostgroups: computers, service, dl360, acpid-hosts, wheezy draghi: address: 82.195.75.106 parents: ganeti3 @@ -591,7 +706,7 @@ servers: geo1: address: 82.195.75.105 parents: ganeti3 - hostgroups: computers, service, bind9-hosts, kvmdomains, wheezy + hostgroups: computers, service, bind9-hosts, kvmdomains, jessie handel: address: 82.195.75.104 parents: ganeti3 @@ -603,7 +718,7 @@ servers: stockhausen: address: 82.195.75.108 parents: ganeti3 - hostgroups: computers, service, kvmdomains, wheezy, acpid-hosts, jetty-hosts + hostgroups: computers, service, kvmdomains, jessie, acpid-hosts ganeti3: address: 82.195.75.111 parents: gw-man-da @@ -624,12 +739,24 @@ servers: address: 82.195.75.92 parents: ganeti3 hostgroups: computers, service, kvmdomains, wheezy + wolkenstein: + address: 82.195.75.65 + parents: ganeti3 + hostgroups: computers, hasbootfs, hassrvfs, kvmdomains, service, xinetd-hosts, rsyncd-hosts, apache2-hosts, wheezy + mipsel-manda-01: + address: 82.195.75.72 + parents: gw-man-da + hostgroups: computers, buildd, wheezy, hassrvfs, sw-raid + mipsel-manda-02: + address: 82.195.75.74 + parents: gw-man-da + hostgroups: computers, buildd, wheezy, hassrvfs, sw-raid # }}} # {{{ gw-marist zani: address: 148.100.88.22 parents: gw-marist - hostgroups: computers, buildd, hassrvfs, wheezy, incomingmailrelayed, ping-suckers + hostgroups: computers, buildd, hassrvfs, jessie, incomingmailrelayed # }}} # {{{ gw-osuosl busoni: @@ -643,7 +770,7 @@ servers: buxtehude: address: 140.211.166.26 parents: byrd - hostgroups: computers, service, hassrvfs, acpid-hosts, apache2-hosts, heavy-exim, postgres91-hosts, wheezy, hasvarlogfs, apache-https + hostgroups: computers, service, hassrvfs, acpid-hosts, apache2-hosts, heavy-exim, postgres91-hosts, wheezy, hasvarlogfs, apache-https, spamd # malo TODO mayer: address: 140.211.166.78 @@ -653,14 +780,18 @@ servers: # address: 140.211.166.58 # parents: gw-osuosl # hostgroups: computers, buildd, hasbootfs + merulo: + address: 140.211.166.46 + parents: gw-osuosl + hostgroups: computers, porterbox, hasusrfs, wheezy parry: address: 140.211.15.153 parents: gw-osuosl - hostgroups: computers, wheezy, buildd, hassrvfs + hostgroups: computers, wheezy, buildd, hassrvfs, sw-raid partch: address: 140.211.15.152 parents: gw-osuosl - hostgroups: computers, wheezy, hassrvfs, porterbox + hostgroups: computers, jessie, hassrvfs, porterbox, sw-raid rietz: address: 140.211.166.43 parents: gw-osuosl @@ -682,7 +813,7 @@ servers: sibelius: address: 193.62.202.28 parents: gw-sanger - hostgroups: computers, acpid-hosts, postgres91-hosts, service, apache2-hosts, sw-raid, squeeze, rsyncd-hosts, xinetd-hosts, hasvarlogfs + hostgroups: computers, acpid-hosts, postgres91-hosts, service, apache2-hosts, sw-raid, wheezy, rsyncd-hosts, xinetd-hosts, hasvarlogfs contacts: tjrc1, dave smetana: address: 193.62.202.29 @@ -698,13 +829,9 @@ servers: villa: address: 212.211.132.32 parents: gw-scanplus-villa - hostgroups: computers, service, apache2-hosts, rsyncd-hosts, dl380, hasvarfs, hasusrfs, hasorgfs, xinetd-hosts, wheezy, security_mirror, no-bacula + hostgroups: computers, service, apache2-hosts, rsyncd-hosts, dl360, hassrvfs, xinetd-hosts, jessie, security_mirror, acpid-hosts # }}} # {{{ gw-sil - beethoven: - address: 86.59.118.146 - parents: gw-sil - hostgroups: computers, hasbootfs, aacraid, service, acpid-hosts, wheezy eder: address: 86.59.118.151 parents: gw-sil @@ -716,7 +843,7 @@ servers: eberlin: address: 86.59.118.155 parents: gw-sil - hostgroups: computers, buildd, wheezy + hostgroups: computers, buildd, wheezy, sw-raid # }}} # {{{ gw-ubcece sw-ubcece: @@ -744,7 +871,7 @@ servers: ravel: address: 206.12.19.5 parents: sw-ubcece-kais - hostgroups: computers, general, dl385, apache2-hosts, acpid-hosts, hasbootfs, nfs-client, rsyncd-hosts, bind9-hosts, uploadqueue, hasorgfs, xinetd-hosts, wheezy, autofs + hostgroups: computers, general, dl385, apache2-hosts, acpid-hosts, hasbootfs, nfs-client, hasorgfs, wheezy, autofs dijkstra: address: 206.12.19.218 parents: sw-ubcece-kais @@ -777,10 +904,6 @@ servers: address: 206.12.19.118 parents: ganeti2 hostgroups: computers, general, apache2-hosts, hasbootfs, kvmdomains, apache-https, wheezy - wolkenstein: - address: 206.12.19.116 - parents: ganeti2 - hostgroups: computers, hasbootfs, hassrvfs, kvmdomains, service, xinetd-hosts, rsyncd-hosts, apache2-hosts, wheezy brahms: address: 206.12.19.115 parents: ganeti2 @@ -789,26 +912,14 @@ servers: address: 206.12.19.113 parents: ganeti2 hostgroups: computers, service, bind9-hosts, kvmdomains, wheezy - stabile: - address: 206.12.19.13 - parents: sw-ubcece-kais - hostgroups: computers, hashomefs, sw-raid, rsyncd-hosts, apache2-hosts, xinetd-hosts, service, nfs-server, squeeze, hassrvfs - respighi: - address: 206.12.19.11 - parents: sw-ubcece-kais - hostgroups: computers, hasbootfs, aacraid, hassrvfs, service, apache2-hosts, squeeze # MSA 2000 (2012i) giustini: address: 192.168.2.6 - parents: dijkstra + parents: sw-ubcece-kais hostgroups: notacomputer # unless we implement runfrom for host alive checks pingable: false check_command: dsa_check_always_ok - falla: - address: 206.12.19.117 - parents: ganeti2 - hostgroups: computers, freebsd, hassrvfs, porterbox, wheezy fano: address: 206.12.19.110 parents: ganeti2 @@ -819,10 +930,6 @@ servers: parents: ganeti2 hostgroups: computers, freebsd, wheezy, buildd, hassrvfs contacts: christoph - fischer: - address: 206.12.19.112 - parents: ganeti2 - hostgroups: computers, freebsd, hassrvfs, porterbox, wheezy gabrielli: address: 206.12.19.17 parents: sw-ubcece-kais @@ -835,14 +942,6 @@ servers: address: 206.12.19.15 parents: sw-ubcece-kais hostgroups: computers, buildd, wheezy - rossini: - address: 206.12.19.19 - parents: sw-ubcece-kais - hostgroups: computers, dl585, acpid-hosts, service, wheezy - salieri: - address: 206.12.19.20 - parents: sw-ubcece-kais - hostgroups: computers, dl585, acpid-hosts, service, wheezy traetta: address: 206.12.19.21 parents: sw-ubcece-kais @@ -853,11 +952,11 @@ servers: # hostgroups: computers, service, hasbootfs, kvmdomains, squeeze, hasvicepa lotti: address: 206.12.19.121 - parents: sw-ubcece-kais + parents: ganeti2 hostgroups: computers, service, hasbootfs, kvmdomains, wheezy, hassrvfs nono: address: 206.12.19.123 - parents: traetta + parents: ganeti2 hostgroups: computers, service, kvmdomains, wheezy, heavy-exim, xinetd-hosts, apache2-hosts, apache-https, broken_https_default_vhost reger: address: 206.12.19.124 @@ -869,7 +968,7 @@ servers: hostgroups: computers, service, kvmdomains, wheezy, postgres91-hosts, xinetd-hosts glinka: address: 206.12.19.126 - parents: traetta + parents: ganeti2 hostgroups: computers, service, kvmdomains, wheezy, apache2-hosts, nfs-client, autofs, xinetd-hosts tye: address: 206.12.19.129 @@ -877,7 +976,7 @@ servers: hostgroups: computers, service, kvmdomains, wheezy, heavy-exim, apache2-hosts, nfs-client, autofs, hassrvfs elgar: address: 206.12.19.130 - parents: salieri + parents: ganeti2 hostgroups: computers, service, kvmdomains, wheezy gombert: address: 206.12.19.132 @@ -885,7 +984,7 @@ servers: hostgroups: computers, service, kvmdomains, wheezy, apache2-hosts jenkins: address: 206.12.19.133 - parents: salieri + parents: ganeti2 hostgroups: computers, service, kvmdomains, wheezy blavet: address: 206.12.19.134 @@ -893,16 +992,12 @@ servers: hostgroups: computers, service, kvmdomains, wheezy, xinetd-hosts, nfs-client, autofs diabelli: address: 206.12.19.136 - parents: traetta + parents: ganeti2 hostgroups: computers, service, hasbootfs, kvmdomains, wheezy, apache2-hosts, apache-https, broken_https_default_vhost bizet: address: 206.12.19.137 parents: ganeti2 hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, no-bacula - lilburn: - address: 206.12.19.138 - parents: ganeti2 - hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, apache2-hosts, nfs-client, autofs popov: address: 206.12.19.119 parents: ganeti2 @@ -929,11 +1024,6 @@ servers: hostgroups: computers, service, kvmdomains, wheezy, spamd, heavy-exim, mail-relay # }}} # {{{ gw-ugent - ancina: - address: 157.193.39.13 - parents: gw-ugent - hostgroups: computers, buildd, hassrvfs, hasbootfs, incomingmailrelayed2025, xinetd-hosts, wheezy - contacts: luk # }}} # {{{ gw-umn saens: @@ -941,6 +1031,28 @@ servers: parents: gw-umn hostgroups: computers, service, apache2-hosts, rsyncd-hosts, dl380, hasvarfs, hasusrfs, hasorgfs, xinetd-hosts, wheezy, security_mirror, no-bacula # }}} + # {{{ gw-unicamp + asgard: + address: 143.106.167.145 + parents: gw-unicamp + hostgroups: layer3-infrastructure + prokofiev: + address: 143.106.167.147 + parents: gw-unicamp + hostgroups: computers, jessie, service + powerpc-unicamp-01: + address: 143.106.167.149 + parents: prokofiev + hostgroups: computers, hasbootfs, hassrvfs, buildd, jessie + ppc64el-unicamp-01: + address: 143.106.167.135 + parents: prokofiev + hostgroups: computers, hasbootfs, hassrvfs, buildd, jessie + plummer: + address: 143.106.167.146 + parents: prokofiev + hostgroups: computers, porterbox, hassrvfs, jessie + # }}} # {{{ gw-utwente klecker: address: 130.89.148.10 @@ -960,11 +1072,15 @@ servers: hostgroups: secondary-IPs # }}} # {{{ gw-ynic - howells: + henze: + address: 144.32.168.74 + parents: gw-ynic + hostgroups: computers, hasbootfs, hassrvfs, armhf, jessie, buildd + hasse: address: 144.32.168.75 parents: gw-ynic hostgroups: computers, hasbootfs, hassrvfs, armhf, wheezy, buildd - hummel: + antheil: address: 144.32.168.76 parents: gw-ynic hostgroups: computers, hasbootfs, hassrvfs, armhf, wheezy, buildd @@ -976,6 +1092,18 @@ servers: address: 144.32.168.77 parents: gw-ynic hostgroups: computers, buildd, sw-raid, hassrvfs, wheezy + #antheil: + # address: 217.140.96.60 + # parents: gw-arm + # hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy, broken_mq + #hasse: + # address: 217.140.96.68 + # parents: gw-arm + # hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, buildd, broken_mq + #henze: + # address: 217.140.96.70 + # parents: gw-arm + # hostgroups: computers, hasbootfs, hassrvfs, wheezy, armhf, buildd, broken_mq # }}} # {{{ gw-zivit zandonai: @@ -987,6 +1115,11 @@ servers: parents: gw-zivit hostgroups: computers, porterbox, hassrvfs, wheezy # }}} + # {{{ gw-sakura + setoguchi: + address: 133.242.99.74 + parents: gw-sakura + hostgroups: computers, service, wheezy, no-bacula, hassrvfs, apache2-hosts, rsyncd-hosts, xinetd-hosts, security_mirror, acpid-hosts # }}} # {{{ ############################# host groups ############################# @@ -994,6 +1127,9 @@ hostgroups: computers: alias: computers private: 1 + extinfo-icon_image: base/debian.png + extinfo-icon_image_alt: Debian GNU/Linux + extinfo-notes_url: https://db.debian.org/machines.cgi?host=%s layer2-infrastructure: alias: Layer 2 Devices extinfo-icon_image: base/switch40.png @@ -1008,6 +1144,7 @@ hostgroups: freebsd: alias: freebsd private: 1 + extinfo-icon_image_alt: Debian GNU/kFreeBSD armhf: alias: armhf private: 1 @@ -1017,24 +1154,12 @@ hostgroups: porterbox: alias: developer accessible porter machines - extinfo-icon_image: base/debian.png - extinfo-icon_image_alt: Debian GNU/Linux - extinfo-notes_url: http://db.debian.org/machines.cgi?host=%s service: alias: machines running services - extinfo-icon_image: base/debian.png - extinfo-icon_image_alt: Debian GNU/Linux - extinfo-notes_url: http://db.debian.org/machines.cgi?host=%s buildd: alias: buildd systems - extinfo-icon_image: base/debian.png - extinfo-icon_image_alt: Debian GNU/Linux - extinfo-notes_url: http://db.debian.org/machines.cgi?host=%s general: alias: general purpose developer accessible machines - extinfo-icon_image: base/debian.png - extinfo-icon_image_alt: Debian GNU/Linux - extinfo-notes_url: http://db.debian.org/machines.cgi?host=%s dl380: alias: HP DL380 hosts @@ -1069,14 +1194,11 @@ hostgroups: megaraid: alias: Hosts with LSI Logic MegaRAID private: 1 - mptraid: - alias: Hosts with LSI Logic Fusion-MPT - private: 1 - squeeze: - alias: Hosts running squeeze wheezy: alias: Hosts running wheezy + jessie: + alias: Hosts running jessie kvmdomains: alias: Hosts that are KVM domains @@ -1102,9 +1224,6 @@ hostgroups: apache2-hosts: alias: hosts running apache2 private: 1 - jetty-hosts: - alias: hosts running jetty - private: 1 bind9-hosts: alias: hosts running bind9 private: 1 @@ -1171,9 +1290,6 @@ hostgroups: hasorgfs: alias: hosts with a /org filesystem private: 1 - hashomefs: - alias: hosts with a /home filesystem - private: 1 hasvarfs: alias: hosts with a /var filesystem private: 1 @@ -1199,9 +1315,6 @@ hostgroups: high-RTT: alias: machines with high round trip times private: 1 - ping-suckers: - alias: machines that just suck at icmp - private: 1 alioth: alias: machines that just are just awkward private: 1 @@ -1215,6 +1328,10 @@ hostgroups: security_mirror: alias: hosts that are security mirrors private: 1 + + broken_mq: + alias: hosts whose MQ is broken + private: 1 # }}} # {{{ ############################# servicegroups ############################# servicegroups: @@ -1237,15 +1354,17 @@ servicegroups: security: alias: security servicegroup_members: apt, kernel, samhain + MQ: + alias: rabbitMQ stuff # }}} -# {{{ ############################# # services ############################# +# {{{ ############################# services ############################# services: # {{{ ### basic networking - name: PING check: "check_ping!350.0,20%!600.0,40%" hostgroups: pingable - excludehostgroups: layer3-infrastructure, high-RTT, ping-suckers + excludehostgroups: layer3-infrastructure, high-RTT normal_check_interval: 5 max_check_attempts: 4 retry_check_interval: 1 @@ -1256,13 +1375,6 @@ services: normal_check_interval: 5 max_check_attempts: 4 retry_check_interval: 1 - - - name: PING - check: "check_ping!600.0,90%!900.0,95%" - hostgroups: ping-suckers - normal_check_interval: 5 - max_check_attempts: 4 - retry_check_interval: 1 - name: PING check: "check_ping!2000.0,60%!3000.0,80%" @@ -1277,16 +1389,11 @@ services: servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk -w 5% -c 2% -A -X devpts -X proc -X linprocfs -X devfs -X fdescfs -X sysfs -X nfs --ignore-eregi-path='/home/buildd/build-tr|/var/lib/schroot/mount'" hostgroups: computers - excludehosts: sibelius,stabile - - - name: disk usage - all - servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk -w 5% -c 3% -A -X devpts -X proc -X linprocfs -X devfs -X fdescfs -X sysfs -X nfs --ignore-eregi-path=/srv/snapshot.debian.org" - hosts: stabile + excludehosts: sibelius - name: disk usage - all servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk -X devpts -X proc -X linprocfs -X devfs -X fdescfs -X sysfs -X nfs 95 98" + nrpe: "/usr/lib/nagios/plugins/check_disk -X devpts -X proc -X linprocfs -X devfs -X fdescfs -X sysfs -X nfs -x /srv/farm-snapshot/farm-misc 95 98" hosts: sibelius - @@ -1294,12 +1401,6 @@ services: servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk 90 95 /" hostgroups: computers - excludehosts: spohr - - - name: disk usage on / - servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 90 93 /" - hosts: spohr - name: disk usage on /boot servicegroups: diskspace @@ -1330,16 +1431,11 @@ services: servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /usr" hostgroups: hasusrfs - - - name: disk usage on /home - servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /home" - hostgroups: hashomefs - name: disk usage on /var/lib/postgresql servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk 75 90 /var/lib/postgresql" - hosts: sibelius, busoni, buxtehude + hosts: sibelius, busoni, buxtehude, lw07 - name: disk usage on /var/log servicegroups: diskspace @@ -1354,21 +1450,31 @@ services: name: disk usage on /srv/mirrors servicegroups: diskspace nrpe: "/usr/lib/nagios/plugins/check_disk 95 98 /srv/mirrors" - hosts: beethoven, sibelius + hosts: sibelius - name: disk usage on /srv/snapshot.debian.org servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 98 99 /srv/snapshot.debian.org" - hosts: stabile + nrpe: "/usr/lib/nagios/plugins/check_disk 90 94 /srv/snapshot.debian.org" + hosts: sibelius - - name: disk usage on /srv/snapshot.debian.org + name: disk usage on /srv/farm-snapshot/farm-1 servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 90 94 /srv/snapshot.debian.org" + nrpe: "/usr/lib/nagios/plugins/check_disk 97 95 /srv/farm-snapshot/farm-1" + hosts: sibelius + - + name: disk usage on /srv/farm-snapshot/farm-2 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 97 95 /srv/farm-snapshot/farm-2" hosts: sibelius - - name: disk usage on /srv/farm-snapshot/farm-misc + name: disk usage on /srv/farm-snapshot/farm-3 servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 97 95 /srv/farm-snapshot/farm-misc" + nrpe: "/usr/lib/nagios/plugins/check_disk 97 95 /srv/farm-snapshot/farm-3" + hosts: sibelius + - + name: disk usage on /srv/farm-snapshot/farm-4 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 97 95 /srv/farm-snapshot/farm-4" hosts: sibelius - name: disk usage on /var/lib/postgresql/9.1 @@ -1381,25 +1487,30 @@ services: nrpe: "/usr/lib/nagios/plugins/check_disk 85 95 /srv/ftp-master.debian.org" hosts: franck - - name: disk usage on /srv/pgbackup + name: disk usage on /storage/snapshot-farm-1 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 98 92 /storage/snapshot-farm-1" + hosts: lw01 + - + name: disk usage on /storage/snapshot-farm-2 servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 85 95 /srv/pgbackup" - hosts: beethoven + nrpe: "/usr/lib/nagios/plugins/check_disk 98 92 /storage/snapshot-farm-2" + hosts: lw02 - - name: disk usage on /srv/git-backup + name: disk usage on /storage/snapshot-farm-3 servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 85 95 /srv/git-backup" - hosts: beethoven + nrpe: "/usr/lib/nagios/plugins/check_disk 98 92 /storage/snapshot-farm-3" + hosts: lw03 - - name: disk usage on /srv/da-backup + name: disk usage on /storage/snapshot-farm-4 servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 85 95 /srv/da-backup" - hosts: beethoven + nrpe: "/usr/lib/nagios/plugins/check_disk 98 92 /storage/snapshot-farm-4" + hosts: lw04 - - name: disk usage on /srv/bacula + name: disk usage on /srv/morgue.debian.org/ servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 85 95 /srv/bacula" - hosts: beethoven + nrpe: "/usr/lib/nagios/plugins/check_disk 95 90 /srv/morgue.debian.org" + hosts: lw03 # }}} # {{{ ### system # {{{ setup @@ -1438,6 +1549,11 @@ services: name: processes - total nrpe: "/usr/lib/nagios/plugins/check_procs 620 700" hostgroups: computers + excludehosts: prokofiev + - + name: processes - total + nrpe: "/usr/lib/nagios/plugins/check_procs 1500 1700" + hosts: prokofiev - name: swap usage - percent nrpe: "/usr/lib/nagios/plugins/check_swap -w 20% -c 10%" @@ -1451,6 +1567,12 @@ services: nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C getty -a /sbin/getty" hostgroups: computers excludehosts: zelenka, zandonai + excludehostgroups: jessie + - + name: process - getty + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C agetty -a /sbin/agetty" + hostgroups: computers + hostgroups: jessie - name: processes - zombies nrpe: "/usr/lib/nagios/plugins/check_procs 5 10 -s Z" @@ -1474,7 +1596,7 @@ services: servicegroups: backup nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-dabackup" hostgroups: computers - excludehosts: backuphost + excludehosts: storace normal_check_interval: 60 max_check_attempts: 2 retry_check_interval: 5 @@ -1482,7 +1604,7 @@ services: name: backup server config servicegroups: backup nrpe: "/usr/lib/nagios/plugins/dsa-check-dabackup-server" - hosts: beethoven, backuphost + hosts: storace normal_check_interval: 60 max_check_attempts: 2 retry_check_interval: 5 @@ -1537,6 +1659,16 @@ services: hostgroups: computers normal_check_interval: 60 retry_check_interval: 15 + - + name: upgraded libraries + servicegroups: security + nrpe: "sudo /usr/local/sbin/dsa-check-libs" + #nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-libs" + hostgroups: computers + excludehostgroups: freebsd + normal_check_interval: 60 + retry_check_interval: 15 + notification_interval: 10080 - name: installed firewall nrpe: "/usr/lib/nagios/plugins/dsa-check-file -w -f /etc/ferm/ferm.conf" @@ -1551,7 +1683,12 @@ services: name: process - ulogd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C ulogd -a '/usr/sbin/ulogd -d'" hostgroups: computers - excludehostgroups: freebsd, sparc + excludehostgroups: freebsd, sparc, jessie + - + name: process - ulogd + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u ulog -C ulogd -a '/usr/sbin/ulogd --daemon --uid ulog'" + hostgroups: jessie + excludehostgroups: freebsd - name: unexpected process - ulogd nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C ulogd" @@ -1583,12 +1720,16 @@ services: name: process - syslog-ng nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C syslog-ng -a '/sbin/syslog-ng -p /var/run/syslog-ng.pid'" hostgroups: computers - excludehostgroups: freebsd + excludehostgroups: freebsd, jessie - name: process - syslog-ng nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:2 -c 2: -u root -C syslog-ng -a '/sbin/syslog-ng -p /var/run/syslog-ng.pid'" hostgroups: freebsd + - + name: process - syslog-ng + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C syslog-ng -a '/sbin/syslog-ng -F'" + hostgroups: jessie - name: remote logging on lotti @@ -1602,6 +1743,12 @@ services: runfrom: lully hostgroups: computers excludehostgroups: alioth + - + name: remote logging on loghost-grnet-01 + remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$" + runfrom: loghost-grnet-01 + hostgroups: computers + excludehostgroups: alioth # }}} # {{{ base service - @@ -1641,7 +1788,7 @@ services: - name: process - munin-node nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C munin-node -a 'munin-node'" - hostgroups: wheezy + hostgroups: wheezy, jessie excludehostgroups: freebsd - name: network service - munin-node @@ -1659,14 +1806,6 @@ services: check: dsa_check_ntp hostgroups: computers depends: process - ntpd - excludehosts: ancina - servicegroups: time - ### - - - name: network service - time - check: dsa_check_time - hosts: ancina - depends: process - xinetd servicegroups: time ### - @@ -1683,38 +1822,36 @@ services: name: process - ud-replicated nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C ud-replicated -a '/usr/bin/python /usr/bin/ud-replicated'" hostgroups: computers - excludehostgroups: squeeze, freebsd, alioth + excludehostgroups: freebsd, alioth - name: process - ud-replicated nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C python2.7 -a '/usr/bin/python /usr/bin/ud-replicated'" hostgroups: freebsd ### - - - name: process - monit - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C monit -a '/usr/sbin/monit -d 300 -I -c /etc/monit/monitrc -s /var/lib/monit/monit.state'" - hostgroups: squeeze - name: process - monit nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C monit -a '/usr/bin/monit -d 300 -I -c /etc/monit/monitrc -s /var/lib/monit/monit.state'" hostgroups: computers - excludehostgroups: squeeze, alioth + excludehostgroups: alioth, jessie ### - name: MQ connection on rainier + servicegroups: MQ remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa" runfrom: rainier hostgroups: computers normal_check_interval: 60 retry_check_interval: 15 - excludehostgroups: alioth + excludehostgroups: alioth, broken_mq - name: MQ connection on rapoport + servicegroups: MQ remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa" runfrom: rapoport hostgroups: computers normal_check_interval: 60 retry_check_interval: 15 - excludehostgroups: alioth + excludehostgroups: alioth, broken_mq ### - name: local resolver @@ -1736,7 +1873,11 @@ services: name: process - udevd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -p 1 -C udevd -a 'udevd'" hostgroups: computers - excludehostgroups: freebsd + excludehostgroups: freebsd, jessie + - + name: process - udevd + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -p 1 -C systemd-udevd -a '/lib/systemd/systemd-udevd'" + hostgroups: jessie - name: unexpected process - udev nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C udevd" @@ -1754,7 +1895,7 @@ services: ### - name: process - xinetd - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C xinetd -a '/usr/sbin/xinetd -pidfile /var/run/xinetd.pid -stayalive'" + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C xinetd -a '/usr/sbin/xinetd '" hostgroups: xinetd-hosts - name: unwanted process - xinetd @@ -1765,8 +1906,14 @@ services: - name: process - stunnel4 - puppet-ekeyd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -u stunnel4 -C stunnel4 -a '/usr/bin/stunnel4 /etc/stunnel/puppet-ekeyd.conf'" - hostgroups: squeeze, wheezy + hostgroups: wheezy, jessie excludehostgroups: freebsd, alioth + - + name: process - stunnel4 - puppet-ekeyd is crazy + nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-stunnel-sanity" + hostgroups: computers + excludehostgroups: freebsd, alioth + excludehosts: czerny, grnet-node01, storace # }}} # {{{ anti-services - @@ -1806,26 +1953,39 @@ services: name: "host SSL cert" nrpe: "if [ -e /etc/ssl/certs/thishost.pem ]; then /usr/lib/nagios/plugins/dsa-check-cert-expire /etc/ssl/certs/thishost.pem; else echo 'No thishost.pem on this host.'; fi" hostgroups: computers + - + name: "host SSL cert - debian server" + nrpe: "if [ -e /etc/ssl/debian/certs/thishost-server.crt ]; then /usr/lib/nagios/plugins/dsa-check-cert-expire /etc/ssl/debian/certs/thishost-server.crt; else echo 'No thishost-server.crt on this host.'; fi" + hostgroups: computers + - + name: "host SSL cert - debian client" + nrpe: "if [ -e /etc/ssl/debian/certs/thishost.crt ]; then /usr/lib/nagios/plugins/dsa-check-cert-expire /etc/ssl/debian/certs/thishost.crt; else echo 'No thishost.crt on this host.'; fi" + hostgroups: computers # }}} # {{{ HW health/raid - name: process - mdadm monitor servicegroups: raid - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C mdadm -a '/sbin/mdadm --monitor --pid-file /var/run/mdadm/monitor.pid --daemonise --scan'" + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C mdadm -a '/sbin/mdadm --monitor --pid-file /run/mdadm/monitor.pid --daemonise --scan'" hostgroups: sw-raid - excludehostgroups: wheezy + excludehostgroups: jessie - - # wheezy: name: process - mdadm monitor servicegroups: raid - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C mdadm -a '/sbin/mdadm --monitor --pid-file /run/mdadm/monitor.pid --daemonise --scan'" + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C mdadm -a '/sbin/mdadm --monitor --scan'" hostgroups: sw-raid - excludehostgroups: squeeze + excludehostgroups: wheezy - name: RAID - sw raid servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-raid-sw" hostgroups: sw-raid + - + name: RAID - unexpected sw raid + servicegroups: raid + nrpe: "if [ -e /proc/mdstat ]; then echo 'Found /proc/mdstat'; exit 1; else echo 'No /proc/mdstat on this host.'; fi" + hostgroups: computers + excludehostgroups: sw-raid ### - name: HW - hpacucli status @@ -1859,29 +2019,38 @@ services: normal_check_interval: 120 hostgroups: dl585 ### - - - name: HW - edac status - nrpe: "/usr/lib/nagios/plugins/dsa-check-edac" - normal_check_interval: 120 - hosts: lw05, lw06 +# - +# name: HW - edac status +# nrpe: "/usr/lib/nagios/plugins/dsa-check-edac" +# normal_check_interval: 120 #hostgroups: computers - #excludehosts: villa, lobos, senfl, schein + #excludehosts: villa, lobos, schein - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm" normal_check_interval: 120 hostgroups: dl385, dl380, dl360, bl460, dl585, bm-bl - excludehosts: villa, lobos, senfl, schein + excludehosts: villa, lobos, schein, storace - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --ps-no-redundant" normal_check_interval: 120 - hosts: villa, lobos + hosts: villa + - + name: HW - hpasmcli status + nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --ps-no-redundant --ignore-failed='PS2'" + normal_check_interval: 120 + hosts: lobos - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-no-redundant" normal_check_interval: 120 - hosts: senfl, schein + hosts: schein + - + name: HW - hpasmcli status + nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-ignore-not-present" + normal_check_interval: 120 + hosts: storace ### - name: RAID - 3ware @@ -1901,12 +2070,6 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-raid-megaraid" hostgroups: megaraid ### - - - name: RAID - MPT - servicegroups: raid - nrpe: "/usr/lib/nagios/plugins/dsa-check-raid-mpt" - hostgroups: mptraid - ### - name: RAID - DRBD servicegroups: raid @@ -1987,21 +2150,12 @@ services: hostgroups: spamd depends: process - spamd - master # - - - name: process - spamd - master - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u debbugs -C spamd -a '/usr/sbin/spamd -d '" - hosts: buxtehude - - - name: process - spamd - child - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -u debbugs -C spamd -a 'spamd child'" - hosts: buxtehude - # - name: unwanted process - spamd nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C spamd" hostgroups: computers excludehostgroups: spamd - excludehosts: bendel, busoni, buxtehude + excludehosts: bendel, busoni - name: unwanted process - greylistd @@ -2237,31 +2391,31 @@ services: - name: postgresql backups nrpe: "/usr/bin/sudo -u debbackup /usr/lib/nagios/plugins/dsa-check-backuppg" - hosts: backuphost + hosts: storace # }}} # {{{ power - - - name: process - UPS - nut usbhid-ups - ups1 - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u nut -C usbhid-ups -a '/lib/nut/usbhid-ups -a ups1'" - hosts: franck - - - name: process - UPS - nut upsd - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u nut -C upsd -a '/sbin/upsd'" - hosts: franck - - - name: process - UPS - nut upsmon master - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C upsmon -a '/sbin/upsmon'" - hosts: franck - - - name: process - UPS - nut upsmon worker - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u nut -C upsmon -a '/sbin/upsmon'" - hosts: franck - depends: process - UPS - nut upsmon master - - - name: UPS - on line power - nrpe: "/usr/lib/nagios/plugins/dsa-check-ups" - hosts: franck - depends: process - UPS - nut upsd +# - +# name: process - UPS - nut usbhid-ups - ups1 +# nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u nut -C usbhid-ups -a '/lib/nut/usbhid-ups -a ups1'" +# hosts: franck +# - +# name: process - UPS - nut upsd +# nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u nut -C upsd -a '/sbin/upsd'" +# hosts: franck +# - +# name: process - UPS - nut upsmon master +# nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C upsmon -a '/sbin/upsmon'" +# hosts: franck +# - +# name: process - UPS - nut upsmon worker +# nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u nut -C upsmon -a '/sbin/upsmon'" +# hosts: franck +# depends: process - UPS - nut upsmon master +# - +# name: UPS - on line power +# nrpe: "/usr/lib/nagios/plugins/dsa-check-ups" +# hosts: franck +# depends: process - UPS - nut upsd # }}} # {{{ buildd - @@ -2346,7 +2500,7 @@ services: # {{{ DNS - name: process - named - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:15 -c 1: -u bind -C named -a '/usr/sbin/named -u bind'" + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:15 -c 1: -u bind -C named -a '/usr/sbin/named '" hostgroups: bind9-hosts - name: network service - dns @@ -2377,7 +2531,7 @@ services: hosts: global - name: DNS SOA sync - alioth.debian.org - check: "dsa_check_soas_add!alioth.debian.org!alioth.debian.org" + check: "dsa_check_soas_add!denis.debian.org!alioth.debian.org" hosts: global - name: DNS - delegation and signature expiry @@ -2416,7 +2570,7 @@ services: hosts: giustini - name: event log - remotecheck: "/usr/lib/nagios/plugins/dsa-check-msa-eventlog --start=8867 $HOSTADDRESS$ public" + remotecheck: "/usr/lib/nagios/plugins/dsa-check-msa-eventlog --start=9966 $HOSTADDRESS$ public" runfrom: dijkstra hosts: giustini # }}} @@ -2431,7 +2585,7 @@ services: # {{{ openstack - name: process - openstack - memcached - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u nobody -C memcached -a '/usr/bin/memcached -m 64 -p 11211 -u nobody -l 127.0.0.1'" + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u nobody -C memcached -a '/usr/bin/memcached -m 128 -p 11211 -u nobody -l 0.0.0.0'" hostgroups: openstack-controller - name: process - openstack - glance-registry @@ -2471,6 +2625,14 @@ services: hostgroups: openstack-controller # }}} # {{{ misc +# - +# Disable this check until logind and binfmt_misc issues are fixed +# something unknown is triggering mount of binfmt_misc +# https://bugs.debian.org/772700 +# name: system - all services running +# nrpe: "/usr/bin/sudo /bin/systemctl is-system-running" +# hostgroups: jessie + ### - name: process - rngd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C rngd -a '/usr/sbin/rngd -r /dev/hwrng'" @@ -2515,16 +2677,6 @@ services: nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:30 -c 1: -u nagios -C icinga -a '/usr/sbin/icinga -d /etc/icinga/icinga.cfg'" hosts: tchaikovsky ### - - - name: process - jetty - master - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -a 'jsvc.exec'" - hostgroups: jetty-hosts - - - name: process - jetty - worker - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:50 -c 1:100 -u jetty -a 'jsvc.exec -user jetty'" - hostgroups: jetty-hosts - depends: process - jetty - master - ### - name: process - debianqueued nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u dak-unpriv -C debianqueued" @@ -2536,6 +2688,17 @@ services: hosts: gombert contact_groups: gobby #### + - + name: network service - sip-tls cert - 443 + check: dsa_check_cert!443 + normal_check_interval: 60 + hosts: vogler + - + name: network service - sip-tls cert - 5061 + check: dsa_check_cert!5061 + normal_check_interval: 60 + hosts: vogler + #### - name: puppetmaster cert nrpe: "/usr/lib/nagios/plugins/dsa-check-cert-expire /var/lib/puppet/ssl/certs/ca.pem"