X-Git-Url: https://git.adam-barratt.org.uk/?a=blobdiff_plain;f=config%2Fnagios-master.cfg;h=06ec26d8c46097e88d18e26684b5c18e4dd89d10;hb=8df4228d33863c53be45ce92fbc81619695420c5;hp=9301c30d415a9b4985d7806a0df5c147f2ecc624;hpb=c8ca82f3f157310da9912113d1fa5ab31e9a6a21;p=mirror%2Fdsa-nagios.git diff --git a/config/nagios-master.cfg b/config/nagios-master.cfg index 9301c30..06ec26d 100644 --- a/config/nagios-master.cfg +++ b/config/nagios-master.cfg @@ -176,6 +176,10 @@ servers: address: 130.239.18.121 parents: gw-accumu hostgroups: computers, buildd, hassrvfs, jessie + boman: + address: 130.239.18.124 + parents: gw-accumu + hostgroups: computers, service, jessie, apache2-hosts, hassrvfs # }}} # {{{ gw-aql eller: @@ -457,6 +461,10 @@ servers: address: 5.153.231.36 parents: ganeti-bytemark hostgroups: computers, service, kvmdomains, jessie, autofs, nfs-client, systemd-timesyncd + bilbao: + address: 5.153.231.37 + parents: ganeti-bytemark + hostgroups: computers, service, kvmdomains, jessie, systemd-timesyncd, apache2-hosts, hassrvfs sor: address: 5.153.231.38 parents: ganeti-bytemark @@ -481,6 +489,11 @@ servers: address: 5.153.231.44 parents: ganeti-bytemark hostgroups: computers, service, kvmdomains, jessie, autofs, nfs-client, apache2-hosts, apache-https, systemd-timesyncd + + casulana: + address: 5.153.231.41 + parents: gw-bytemark + hostgroups: computers, service, stretch, hpnewraid, hassrvfs, dl380 # }}} # {{{ gw-c3sl santoro: @@ -516,6 +529,10 @@ servers: address: 217.196.149.228 parents: gw-conova hostgroups: computers, jessie, service, sw-raid + mirror-conova: + address: 217.196.149.229 + parents: gw-conova + hostgroups: computers, jessie, service arm-conova-01: address: 217.196.149.230 @@ -629,7 +646,7 @@ servers: porta: address: 194.177.211.207 parents: ganeti-grnet - hostgroups: computers, service, hassrvfs, kvmdomains, jessie, rsyncd-hosts, xinetd-hosts, systemd-timesyncd + hostgroups: computers, service, hassrvfs, kvmdomains, jessie, rsyncd-systemd-hosts, xinetd-hosts, systemd-timesyncd melartin: address: 194.177.211.208 parents: ganeti-grnet @@ -750,7 +767,7 @@ servers: kaufmann: address: 82.195.75.107 parents: ganeti3 - hostgroups: computers, service, apache2-hosts, rsyncd-hosts, kvmdomains, xinetd-hosts, jessie, apache-https + hostgroups: computers, service, apache2-hosts, rsyncd-systemd-hosts, kvmdomains, jessie, apache-https stockhausen: address: 82.195.75.108 parents: ganeti3 @@ -843,6 +860,10 @@ servers: hostgroups: computers, jessie, hassrvfs, porterbox, sw-raid # }}} # {{{ gw-sanger + sallinen: + address: 193.62.202.26 + parents: gw-sanger + hostgroups: computers, service, jessie, dl380, hpnewraid sibelius: address: 193.62.202.28 parents: gw-sanger @@ -1231,6 +1252,8 @@ hostgroups: alias: Hosts running jessie jessie-freebsd: alias: kFreebsd hosts running jessie + stretch: + alias: Hosts running stretch kvmdomains: alias: Hosts that are KVM domains @@ -1267,6 +1290,9 @@ hostgroups: rsyncd-hosts: alias: hosts providing rsync services via xinetd private: 1 + rsyncd-systemd-hosts: + alias: hosts providing rsync services via systemd + private: 1 xinetd-hosts: alias: hosts providing services via xinetd private: 1 @@ -1402,21 +1428,21 @@ services: excludehostgroups: layer3-infrastructure, high-RTT check_interval: 5 max_check_attempts: 4 - retry_check_interval: 1 + retry_interval: 1 - name: PING check: "check_ping!600.0,20%!900.0,40%" hostgroups: high-RTT check_interval: 5 max_check_attempts: 4 - retry_check_interval: 1 + retry_interval: 1 - name: PING check: "check_ping!2000.0,60%!3000.0,80%" hostgroups: layer3-infrastructure check_interval: 5 max_check_attempts: 4 - retry_check_interval: 1 + retry_interval: 1 # }}} # {{{ ### disk usage - @@ -1444,7 +1470,7 @@ services: - name: disk usage on /srv servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 95 98 /srv" + nrpe: "/usr/lib/nagios/plugins/check_disk 90 95 /srv" hostgroups: hassrvfs - name: disk usage on /var/lib/postgresql @@ -1574,11 +1600,11 @@ services: name: processes - total nrpe: "/usr/lib/nagios/plugins/check_procs 620 700" hostgroups: computers - excludehosts: prokofiev, pieta, ubc-enc2bl01, ubc-enc2bl02, ubc-enc2bl09, ubc-enc2bl10, fasolo + excludehosts: prokofiev, pieta, ubc-enc2bl01, ubc-enc2bl02, ubc-enc2bl09, ubc-enc2bl10, fasolo, casulana - name: processes - total nrpe: "/usr/lib/nagios/plugins/check_procs 1500 1700" - hosts: prokofiev, pieta, ubc-enc2bl01, ubc-enc2bl02, ubc-enc2bl09, ubc-enc2bl10, fasolo + hosts: prokofiev, pieta, ubc-enc2bl01, ubc-enc2bl02, ubc-enc2bl09, ubc-enc2bl10, fasolo, casulana - name: swap usage - percent nrpe: "/usr/lib/nagios/plugins/check_swap -w 20% -c 10%" @@ -1592,7 +1618,7 @@ services: nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C getty -a /sbin/getty" hostgroups: computers excludehosts: zelenka, zandonai - excludehostgroups: jessie + excludehostgroups: jessie, stretch - name: process - getty nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C agetty -a /sbin/getty" @@ -1600,7 +1626,7 @@ services: - name: process - getty nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C agetty -a /sbin/agetty" - hostgroups: jessie + hostgroups: jessie, stretch excludehostgroups: freebsd - @@ -1617,7 +1643,7 @@ services: name: system - filesystem check nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-filesystems" check_interval: 60 - retry_check_interval: 15 + retry_interval: 15 hostgroups: computers # }}} # {{{ backup @@ -1629,7 +1655,7 @@ services: excludehosts: backuphost, storace check_interval: 60 max_check_attempts: 2 - retry_check_interval: 5 + retry_interval: 5 - name: backup server config servicegroups: backup @@ -1637,7 +1663,7 @@ services: hosts: storace check_interval: 60 max_check_attempts: 2 - retry_check_interval: 5 + retry_interval: 5 - name: backup - bacula - last backup servicegroups: backup @@ -1646,7 +1672,7 @@ services: hostgroups: computers excludehostgroups: buildd, porterbox, no-bacula check_interval: 60 - retry_check_interval: 15 + retry_interval: 15 - name: backup - bacula - last full backup servicegroups: backup @@ -1655,7 +1681,7 @@ services: hostgroups: computers excludehostgroups: buildd, porterbox, no-bacula check_interval: 60 - retry_check_interval: 15 + retry_interval: 15 - name: process - bacula-dir servicegroups: backup @@ -1686,14 +1712,14 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-running-kernel" hostgroups: computers check_interval: 60 - retry_check_interval: 5 + retry_interval: 5 - name: apt - security updates servicegroups: apt nrpe: "/usr/lib/nagios/plugins/dsa-check-statusfile /var/cache/dsa/nagios/apt" hostgroups: computers check_interval: 60 - retry_check_interval: 15 + retry_interval: 15 - name: unexpected file - apt sources.list servicegroups: apt @@ -1707,7 +1733,7 @@ services: hostgroups: computers excludehostgroups: freebsd check_interval: 60 - retry_check_interval: 15 + retry_interval: 15 notification_interval: 10080 - name: installed firewall @@ -1723,11 +1749,11 @@ services: name: process - ulogd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C ulogd -a '/usr/sbin/ulogd -d'" hostgroups: computers - excludehostgroups: freebsd, sparc, jessie + excludehostgroups: freebsd, sparc, jessie, stretch - name: process - ulogd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u ulog -C ulogd -a '/usr/sbin/ulogd --daemon --uid ulog'" - hostgroups: jessie + hostgroups: jessie, stretch excludehostgroups: freebsd - name: unexpected process - ulogd @@ -1746,7 +1772,7 @@ services: hostgroups: computers depends: process - samhain check_interval: 60 - retry_check_interval: 5 + retry_interval: 5 excludehostgroups: brokensamhain - name: processes - samhain zombies @@ -1760,7 +1786,7 @@ services: name: process - syslog-ng nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C syslog-ng -a '/sbin/syslog-ng -p /var/run/syslog-ng.pid'" hostgroups: computers - excludehostgroups: freebsd, jessie + excludehostgroups: freebsd, jessie, stretch - name: process - syslog-ng nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:2 -c 2: -u root -C syslog-ng -a '/sbin/syslog-ng -p /var/run/syslog-ng.pid'" @@ -1768,7 +1794,7 @@ services: - name: process - syslog-ng nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C syslog-ng -a '/sbin/syslog-ng -F'" - hostgroups: jessie + hostgroups: jessie, stretch excludehostgroups: freebsd - @@ -1813,6 +1839,13 @@ services: name: process - nrpe nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:25 -c 1: -u nagios -C nrpe -a '/usr/sbin/nrpe -c /etc/nagios/nrpe.cfg -d'" hostgroups: computers + excludehostgroups: stretch + max_check_attempts: -1 + depends: network service - nrpe + - + name: process - nrpe + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:25 -c 1: -u nagios -C nrpe -a '/usr/sbin/nrpe -c /etc/nagios/nrpe.cfg -f'" + hostgroups: stretch max_check_attempts: -1 depends: network service - nrpe ### @@ -1880,7 +1913,7 @@ services: name: process - monit nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C monit -a '/usr/bin/monit -d 300 -I -c /etc/monit/monitrc -s /var/lib/monit/monit.state'" hostgroups: computers - excludehostgroups: alioth, jessie + excludehostgroups: alioth, jessie, stretch ### - name: MQ connection on rainier @@ -1889,7 +1922,7 @@ services: runfrom: rainier hostgroups: computers check_interval: 60 - retry_check_interval: 15 + retry_interval: 15 excludehostgroups: alioth, broken_mq - name: MQ connection on rapoport @@ -1898,7 +1931,7 @@ services: runfrom: rapoport hostgroups: computers check_interval: 60 - retry_check_interval: 15 + retry_interval: 15 excludehostgroups: alioth, broken_mq ### - @@ -1921,11 +1954,11 @@ services: name: process - udevd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -p 1 -C udevd -a 'udevd'" hostgroups: computers - excludehostgroups: freebsd, jessie + excludehostgroups: freebsd, jessie, stretch - name: process - udevd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -p 1 -C systemd-udevd -a '/lib/systemd/systemd-udevd'" - hostgroups: jessie + hostgroups: jessie, stretch excludehostgroups: freebsd - name: unexpected process - udev @@ -1936,7 +1969,7 @@ services: name: process - acpid nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C acpid -a '/usr/sbin/acpid'" hostgroups: acpid-hosts - excludehostgroups: jessie + excludehostgroups: jessie, stretch - name: unexpected process - acpid nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C acpid" @@ -1956,7 +1989,7 @@ services: - name: process - stunnel4 - puppet-ekeyd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -u stunnel4 -C stunnel4 -a '/usr/bin/stunnel4 /etc/stunnel/puppet-ekeyd.conf'" - hostgroups: wheezy, jessie + hostgroups: wheezy, jessie, stretch excludehostgroups: freebsd, alioth - name: process - stunnel4 - puppet-ekeyd is crazy @@ -2037,7 +2070,7 @@ services: servicegroups: raid nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C mdadm -a '/sbin/mdadm --monitor --pid-file /run/mdadm/monitor.pid --daemonise --scan'" hostgroups: sw-raid - excludehostgroups: jessie + excludehostgroups: jessie, stretch - name: process - mdadm monitor servicegroups: raid @@ -2100,7 +2133,7 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm" check_interval: 120 hostgroups: dl380, dl360, bl460, bm-bl - excludehosts: villa, lobos, storace, mirror-anu + excludehosts: villa, lobos, storace, mirror-anu, sallinen - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --ps-no-redundant" @@ -2115,7 +2148,7 @@ services: name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-ignore-not-present" check_interval: 120 - hosts: storace + hosts: storace, sallinen - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-ignore-not-present --ps-no-redundant --ignore-failed='PS1'" @@ -2182,7 +2215,7 @@ services: name: process - clamav - freshclam nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u clamav -C freshclam -a '/usr/bin/freshclam -d --quiet'" hostgroups: heavy-exim, heavy-postfix - excludehostgroups: jessie + excludehostgroups: jessie, stretch - name: process - clamav - freshclam nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u clamav -C freshclam -a '/usr/bin/freshclam -d --foreground=true'" @@ -2205,7 +2238,7 @@ services: nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd --create-prefs --max-children 5 --helper-home-dir -d --pidfile=/var/run/spamd.pid'" hostgroups: spamd excludehosts: picconi - excludehostgroups: jessie + excludehostgroups: jessie, stretch - name: process - spamd - master nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C spamd -a '/usr/sbin/spamd -d --pidfile=/var/run/spamassassin.pid --create-prefs --max-children 5 --helper-home-dir'" @@ -2244,7 +2277,7 @@ services: name: process - postgrey nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgrey -a '/usr/sbin/postgrey --pidfile=/var/run/postgrey.pid --daemonize --unix=/var/run/postgrey/socket --retry-window=4 --auto-whitelist-clients=10 --exim'" hostgroups: heavy-exim - excludehostgroups: jessie + excludehostgroups: jessie, stretch - name: process - postgrey nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u postgrey -a 'postgrey --pidfile=/var/run/postgrey.pid --daemonize --unix=/var/run/postgrey/socket --retry-window=4 --auto-whitelist-clients=10 --exim'" @@ -2529,7 +2562,7 @@ services: contact_groups: +buildd check_interval: 5 max_check_attempts: 24 - retry_check_interval: 5 + retry_interval: 5 - name: processes - lvcreate nrpe: "/usr/lib/nagios/plugins/check_procs -m 'ELAPSED' -c 500 -C lvcreate -u root -a 'lvcreate'" @@ -2581,7 +2614,7 @@ services: hosts: global check_interval: 15 max_check_attempts: 5 - retry_check_interval: 5 + retry_interval: 5 servicegroups: mirror - name: mirror sync - snapshot @@ -2589,7 +2622,7 @@ services: hosts: global check_interval: 15 max_check_attempts: 5 - retry_check_interval: 5 + retry_interval: 5 servicegroups: mirror - @@ -2702,6 +2735,11 @@ services: check: "dsa_check_staticsync_nossl!deb.debian.org" hosts: global servicegroups: mirror + - + name: mirror static sync - manpages + check: "dsa_check_staticsync!manpages.debian.org" + hosts: global + servicegroups: mirror - name: mirror static sync - 10years @@ -2749,8 +2787,8 @@ services: hosts: global servicegroups: mirror - - name: mirror static sync - debconf1 - check: "dsa_check_staticsync!debconf1.debconf.org" + name: mirror static sync - debconf16 + check: "dsa_check_staticsync!debconf16.debconf.org" hosts: global servicegroups: mirror - @@ -2855,7 +2893,7 @@ services: hosts: giustini check_interval: 5 max_check_attempts: 4 - retry_check_interval: 1 + retry_interval: 1 - name: Overall Unit Status remotecheck: "/usr/lib/nagios/plugins/check_snmp -H $HOSTADDRESS$ -C public -P 2c -o connUnitStatus -n -c 3 -w 3" @@ -2883,7 +2921,7 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-dchroots-current" hostgroups: porterbox check_interval: 60 - retry_check_interval: 15 + retry_interval: 15 # }}} # {{{ openstack # - @@ -2931,7 +2969,7 @@ services: - name: system - all services running nrpe: "/usr/bin/sudo /bin/systemctl is-system-running" - hostgroups: jessie + hostgroups: jessie, stretch excludehostgroups: freebsd ### - @@ -2951,6 +2989,10 @@ services: hosts: draghi depends: process - xinetd ### + - + name: network service - rsync + check: check_tcp!873 + hostgroups: rsyncd-systemd-hosts - name: network service - rsync check: check_tcp!873 @@ -3010,7 +3052,7 @@ services: hosts: handel check_interval: 60 max_check_attempts: 2 - retry_check_interval: 5 + retry_interval: 5 # }}} # }}}