X-Git-Url: https://git.adam-barratt.org.uk/?a=blobdiff_plain;f=config%2Fnagios-master.cfg;h=625c471bc310866e8dd6814189d955047a58759c;hb=33010000c54cdee9cff1fbd8a1876238c9173498;hp=871091768d4e613678e050ef18ad2675c59a34a7;hpb=586f144d8e7c5cf370f34c69c80526d25af0d6d4;p=mirror%2Fdsa-nagios.git diff --git a/config/nagios-master.cfg b/config/nagios-master.cfg index 8710917..625c471 100644 --- a/config/nagios-master.cfg +++ b/config/nagios-master.cfg @@ -90,6 +90,10 @@ servers: address: 129.143.57.177 parents: gw-ubcece hostgroups: layer3-infrastructure + gw-leaseweb: + address: 185.17.185.190 + parents: gw-ubcece + hostgroups: layer3-infrastructure gw-man-da: address: 82.195.75.126 parents: gw-ubcece @@ -202,10 +206,6 @@ servers: address: 217.140.96.56 parents: gw-arm hostgroups: computers, hasbootfs, hassrvfs, porterbox, wheezy, deadslow - alain: - address: 217.140.96.58 - parents: gw-arm - hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy, deadslow alwyn: address: 217.140.96.59 parents: gw-arm @@ -278,6 +278,14 @@ servers: address: 5.153.231.248 parents: gw-bytemark hostgroups: computers, bm-bl, acpid-hosts, service, wheezy + bm-bl9: + address: 5.153.231.249 + parents: gw-bytemark + hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-conpute + bm-bl10: + address: 5.153.231.250 + parents: gw-bytemark + hostgroups: computers, bm-bl, acpid-hosts, service, wheezy, openstack-compute milanollo: address: 5.153.231.2 @@ -333,20 +341,16 @@ servers: address: 5.153.231.13 parents: ganeti-bytemark hostgroups: computers, hassrvfs, kvmdomains, wheezy, apache2-hosts - petrova: - address: 5.153.231.25 - parents: ganeti-bytemark - hostgroups: computers, kvmdomains, wheezy, apache2-hosts couper: address: 5.153.231.14 parents: ganeti-bytemark hostgroups: computers, hassrvfs, kvmdomains, wheezy, apache2-hosts, nfs-client, autofs rainier: - address: 5.153.231.15 + address: 5.153.231.16 parents: ganeti-bytemark hostgroups: computers, kvmdomains, wheezy, no-bacula rapoport: - address: 5.153.231.16 + address: 5.153.231.15 parents: ganeti-bytemark hostgroups: computers, kvmdomains, wheezy, no-bacula delfin: @@ -365,6 +369,12 @@ servers: address: 5.153.231.20 parents: ganeti-bytemark hostgroups: computers, general, kvmdomains, wheezy, nfs-client, autofs + moszumanska: + address: 5.153.231.21 + parents: ganeti-bytemark + contact_groups: alioth-admins + hostgroups: computers, general, wheezy, postgres91-hosts, apache2-hosts, acpid-hosts, apache-https, brokensamhain, no-bacula, bind9-hosts, xinetd-hosts, alioth, heavy-exim, spamd + no-servicegroups: true dillon: address: 5.153.231.22 parents: ganeti-bytemark @@ -377,6 +387,18 @@ servers: address: 5.153.231.24 parents: ganeti-bytemark hostgroups: computers, service, kvmdomains, wheezy, bind9-hosts, no-bacula + petrova: + address: 5.153.231.25 + parents: ganeti-bytemark + hostgroups: computers, kvmdomains, wheezy, apache2-hosts + oyens: + address: 5.153.231.26 + parents: ganeti-bytemark + hostgroups: computers, kvmdomains, wheezy, apache2-hosts, openstack-controller + barriere: + address: 5.153.231.27 + parents: ganeti-bytemark + hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, porterbox # }}} # {{{ gw-c3sl santoro: @@ -477,6 +499,32 @@ servers: parents: gw-isc hostgroups: computers, service, apache2-hosts, rsyncd-hosts, acpid-hosts, dl360, hasorgfs, xinetd-hosts, wheezy, security_mirror, no-bacula # }}} + # {{{ gw-leaseweb + lw01: + address: 185.17.185.177 + parents: gw-leaseweb + hostgroups: computers, service, acpid-hosts, wheezy, dl180 + lw02: + address: 185.17.185.178 + parents: gw-leaseweb + hostgroups: computers, service, acpid-hosts, wheezy, dl180 + lw03: + address: 185.17.185.179 + parents: gw-leaseweb + hostgroups: computers, service, acpid-hosts, wheezy, dl180 + lw04: + address: 185.17.185.180 + parents: gw-leaseweb + hostgroups: computers, service, acpid-hosts, wheezy, dl180 + lw05: + address: 185.17.185.181 + parents: gw-leaseweb + hostgroups: computers, service, acpid-hosts, wheezy, dl120, sw-raid + lw06: + address: 185.17.185.182 + parents: gw-leaseweb + hostgroups: computers, service, acpid-hosts, wheezy, dl120, sw-raid + # }}} # {{{ gw-karlsruhe zemlinsky: address: 129.143.160.6 @@ -581,7 +629,7 @@ servers: zani: address: 148.100.88.22 parents: gw-marist - hostgroups: computers, buildd, hassrvfs, wheezy, incomingmailrelayed + hostgroups: computers, buildd, hassrvfs, wheezy, incomingmailrelayed, ping-suckers # }}} # {{{ gw-osuosl busoni: @@ -847,10 +895,6 @@ servers: address: 206.12.19.134 parents: ganeti2 hostgroups: computers, service, kvmdomains, wheezy, xinetd-hosts, nfs-client, autofs - barriere: - address: 206.12.19.135 - parents: ganeti2 - hostgroups: computers, service, kvmdomains, wheezy, hassrvfs, porterbox diabelli: address: 206.12.19.136 parents: traetta @@ -920,10 +964,6 @@ servers: hostgroups: secondary-IPs # }}} # {{{ gw-ynic - hildegard: - address: 144.32.168.74 - parents: gw-ynic - hostgroups: computers, hasbootfs, hassrvfs, armhf, wheezy, deadslow, buildd howells: address: 144.32.168.75 parents: gw-ynic @@ -1026,6 +1066,12 @@ hostgroups: dl585: alias: HP DL385 hosts private: 1 + dl180: + alias: HP DL180 + private: 1 + dl120: + alias: HP DL120 + private: 1 sw-raid: alias: Hosts with Linux software raid private: 1 @@ -1177,15 +1223,23 @@ hostgroups: # i.e. no port 25 private: 1 - ntpsuckers: - alias: "hosts who's ntp offset is often unknown" - private: 1 - brokensamhain: alias: machines that can not run samhain private: 1 high-RTT: - alias: machines with hight round trip times + alias: machines with high round trip times + private: 1 + ping-suckers: + alias: machines that just suck at icmp + private: 1 + alioth: + alias: machines that just are just awkward + private: 1 + openstack-compute: + alias: nodes that run OpenStack compute + private: 1 + openstack-controller: + alias: nodes that run OpenStack controller private: 1 security_mirror: @@ -1223,7 +1277,7 @@ services: name: PING check: "check_ping!350.0,20%!600.0,40%" hostgroups: pingable - excludehostgroups: layer3-infrastructure, high-RTT + excludehostgroups: layer3-infrastructure, high-RTT, ping-suckers normal_check_interval: 5 max_check_attempts: 4 retry_check_interval: 1 @@ -1234,6 +1288,13 @@ services: normal_check_interval: 5 max_check_attempts: 4 retry_check_interval: 1 + - + name: PING + check: "check_ping!600.0,90%!900.0,95%" + hostgroups: ping-suckers + normal_check_interval: 5 + max_check_attempts: 4 + retry_check_interval: 1 - name: PING check: "check_ping!2000.0,60%!3000.0,80%" @@ -1432,7 +1493,7 @@ services: servicegroups: backup nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u bacula -C bacula-fd -a '/usr/sbin/bacula-fd -c /etc/bacula/bacula-fd.conf'" hostgroups: computers - excludehostgroups: freebsd + excludehostgroups: freebsd, alioth - name: process - bacula-fd servicegroups: backup @@ -1627,7 +1688,7 @@ services: hostgroups: computers depends: process - ntpd excludehosts: ancina - excludehostgroups: ntpsuckers, deadslow + excludehostgroups: deadslow servicegroups: time # - @@ -1665,11 +1726,13 @@ services: remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$" runfrom: lotti hostgroups: computers + excludehostgroups: alioth - name: remote logging on lully remotecheck: "/usr/lib/nagios/plugins/dsa-check-log-age-loghost $HOSTNAME$" runfrom: lully hostgroups: computers + excludehostgroups: alioth - name: MQ connection on rainier remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa" @@ -1677,6 +1740,7 @@ services: hostgroups: computers normal_check_interval: 60 retry_check_interval: 15 + excludehostgroups: alioth - name: MQ connection on rapoport remotecheck: "/usr/lib/nagios/plugins/dsa-check-mq-connection $HOSTNAME$ ud dsa" @@ -1684,6 +1748,7 @@ services: hostgroups: computers normal_check_interval: 60 retry_check_interval: 15 + excludehostgroups: alioth ### MAIL STUFF ### - @@ -1819,23 +1884,23 @@ services: - name: process - weightd - master nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (master)'" - hostgroups: heavy-postfix + hostgroups: heavy-postfix, alioth - name: process - weightd - cache nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u polw -a 'policyd-weight (cache)'" - hostgroups: heavy-postfix + hostgroups: heavy-postfix, alioth depends: process - weightd - master - name: process - weightd - child nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:50 -c 1: -u polw -a 'policyd-weight (child)'" - hostgroups: heavy-postfix + hostgroups: heavy-postfix, alioth depends: process - weightd - master ### - name: unwanted process - policyd-weight nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C policyd-weight" hostgroups: computers - excludehostgroups: heavy-postfix, deadslow + excludehostgroups: heavy-postfix, deadslow, alioth ### @@ -1926,6 +1991,7 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-config" hostgroups: computers normal_check_interval: 60 + excludehostgroups: alioth - name: setup - local hostname etc-hosts nrpe: 'if getent ahosts `hostname` | grep -q 127.0; then echo "Warning: local hostname resolves to 127/8 address"; exit 1; else echo "OK: Hostname resolves to non-127/8 address."; exit 0; fi' @@ -1953,6 +2019,7 @@ services: name: process - unbound nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u unbound -C unbound -a '/usr/sbin/unbound'" hostgroups: unbound-hosts, squeeze, wheezy + excludehostgroups: alioth ### - name: process - uptimed @@ -2035,7 +2102,7 @@ services: name: process - ud-replicated nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C ud-replicated -a '/usr/bin/python /usr/bin/ud-replicated'" hostgroups: computers - excludehostgroups: squeeze,freebsd + excludehostgroups: squeeze, freebsd, alioth - name: process - ud-replicated nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C python2.7 -a '/usr/bin/python /usr/bin/ud-replicated'" @@ -2048,13 +2115,13 @@ services: name: process - monit nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C monit -a '/usr/bin/monit -d 300 -I -c /etc/monit/monitrc -s /var/lib/monit/monit.state'" hostgroups: computers - excludehostgroups: squeeze + excludehostgroups: squeeze, alioth - name: HW - hpacucli status servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-hpacucli" normal_check_interval: 120 - hostgroups: dl385, dl380, dl360, bl460 + hostgroups: dl385, dl380, dl360, bl460, dl180 excludehosts: schein, rietz - name: HW - hpacucli status @@ -2081,6 +2148,13 @@ services: normal_check_interval: 120 hostgroups: dl585 ### + - + name: HW - edac status + nrpe: "/usr/lib/nagios/plugins/dsa-check-edac" + normal_check_interval: 120 + hosts: lw05, lw06 + #hostgroups: computers + #excludehosts: villa, lobos, senfl, schein - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm" @@ -2411,7 +2485,7 @@ services: name: process - stunnel4 - puppet-ekeyd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -u stunnel4 -C stunnel4 -a '/usr/bin/stunnel4 /etc/stunnel/puppet-ekeyd.conf'" hostgroups: squeeze, wheezy - excludehostgroups: freebsd + excludehostgroups: freebsd, alioth #### - name: process - UPS - nut usbhid-ups - ups1 @@ -2601,7 +2675,7 @@ services: hosts: giustini - name: event log - remotecheck: "/usr/lib/nagios/plugins/dsa-check-msa-eventlog --start=7778 $HOSTADDRESS$ public" + remotecheck: "/usr/lib/nagios/plugins/dsa-check-msa-eventlog --start=8867 $HOSTADDRESS$ public" runfrom: dijkstra hosts: giustini ############ @@ -2611,5 +2685,54 @@ services: hostgroups: porterbox normal_check_interval: 60 retry_check_interval: 15 + ############ + - + name: process - openstack - keystone + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u root -C keystone-all -a '/usr/bin/python /usr/bin/keystone-all'" + hostgroups: openstack-controller + - + name: process - openstack - memcached + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u nobody -C memcached -a '/usr/bin/memcached -m 64 -p 11211 -u nobody -l 127.0.0.1'" + hostgroups: openstack-controller + - + name: process - openstack - glance-registry + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u glance -C glance-registry -a '/usr/bin/python /usr/bin/glance-registry'" + hostgroups: openstack-controller + - + name: process - openstack - nova-api + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u nova -C nova-api -a '/usr/bin/python /usr/bin/nova-api --config-file=/etc/nova/nova.conf --log-file /var/log/nova/nova-api.log'" + hostgroups: openstack-controller + - + name: process - openstack - nova-compute + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u nova -C nova-compute -a '/usr/bin/python /usr/bin/nova-compute --config-file=/etc/nova/nova.conf --log-file /var/log/nova/nova-compute.log --config-file=/etc/nova/nova-compute.conf'" + hostgroups: openstack-compute + - + name: process - openstack - nova-cert + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u nova -C nova-cert -a '/usr/bin/python /usr/bin/nova-cert --config-file=/etc/nova/nova.conf --log-file /var/log/nova/nova-cert.log'" + hostgroups: openstack-controller + - + name: process - openstack - nova-conductor + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u nova -C nova-conductor -a '/usr/bin/python /usr/bin/nova-conductor --config-file=/etc/nova/nova.conf --log-file /var/log/nova/nova-conductor.log'" + hostgroups: openstack-controller + - + name: process - openstack - nova-consoleauth + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u nova -C nova-consoleauth -a '/usr/bin/python /usr/bin/nova-consoleauth --config-file=/etc/nova/nova.conf --log-file /var/log/nova/nova-consoleauth.log'" + hostgroups: openstack-controller + - + name: process - openstack - nova-scheduler + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u nova -C nova-scheduler -a '/usr/bin/python /usr/bin/nova-consoleauth --config-file=/etc/nova/nova.conf --log-file /var/log/nova/nova-scheduler.log'" + hostgroups: openstack-controller + - + name: process - openstack - nova-spicehtml5proxy + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u nova -C nova-spicehtml5proxy -a '/usr/bin/python /usr/bin/nova-spicehtml5proxy --log-file /var/log/nova/nova-consoleproxy.log'" + hostgroups: openstack-controller + - + name: process - openstack - neutron-server + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u neutron -C neutron-server -a '/usr/bin/python2.7 /usr/bin/neutron-server --config-file=/etc/neutron/neutron.conf --log-file=/var/log/neutron/neutron-server.log --config-file=/etc/neutron/plugins/ml2/ml2_conf.ini'" + hostgroups: openstack-controller + - + name: process - openstack - nova-compute + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u nova -C nova-compute -a '/usr/bin/python /usr/bin/nova-compute --config-file=/etc/nova/nova.conf --log-file /var/log/nova/nova-compute.log --config-file=/etc/nova/nova-compute.conf'" + hostgroups: openstack-controller # vim: set ts=2 sw=2 et ai si fdm=marker: