X-Git-Url: https://git.adam-barratt.org.uk/?a=blobdiff_plain;f=config%2Fnagios-master.cfg;h=bda2a06e99990ab80ad7a6f65ad131dc31c40ece;hb=86a0a9e98acdf44a1a49f24248bd1710d29e8745;hp=d855e590bd25fd529e9a29e0392469487b4db230;hpb=29fcdc19d0841bbaa4c4fd935e22a8901ce06aa8;p=mirror%2Fdsa-nagios.git diff --git a/config/nagios-master.cfg b/config/nagios-master.cfg index d855e59..bda2a06 100644 --- a/config/nagios-master.cfg +++ b/config/nagios-master.cfg @@ -77,7 +77,8 @@ servers: parents: gw-ubcece hostgroups: layer3-infrastructure gw-isc: - address: 149.20.65.129 + # really henet, because of something weird + address: 72.52.94.70 parents: gw-ubcece hostgroups: layer3-infrastructure gw-karlsruhe: @@ -176,7 +177,7 @@ servers: address: 130.239.18.121 parents: gw-accumu hostgroups: computers, buildd, hassrvfs, jessie - boman: + mirror-accumu: address: 130.239.18.124 parents: gw-accumu hostgroups: computers, service, jessie, apache2-hosts, hassrvfs @@ -265,7 +266,7 @@ servers: fasolo: address: 138.16.160.17 parents: gw-brown - hostgroups: computers, service, apache2-hosts, apache-https, dl380, hpnewraid, rsyncd-hosts, xinetd-hosts, jessie, hassrvfs, postgres94-hosts + hostgroups: computers, service, apache2-hosts, apache-https, dl380, hpnewraid, rsyncd-systemd-hosts, jessie, hassrvfs, postgres94-hosts # }}} # {{{ gw-bytemark bm-bl1: @@ -461,10 +462,18 @@ servers: address: 5.153.231.36 parents: ganeti-bytemark hostgroups: computers, service, kvmdomains, jessie, autofs, nfs-client, systemd-timesyncd - bilbao: + mirror-bytemark: address: 5.153.231.37 parents: ganeti-bytemark - hostgroups: computers, service, kvmdomains, jessie, systemd-timesyncd, apache2-hosts, hassrvfs + hostgroups: computers, service, kvmdomains, jessie, systemd-timesyncd, apache2-hosts, hassrvfs, xinetd-hosts + mirror-bytemark-debian: + address: 5.153.231.45 + hostgroups: secondary-IPs + parents: mirror-bytemark + mirror-bytemark-security: + address: 5.153.231.46 + hostgroups: secondary-IPs + parents: mirror-bytemark sor: address: 5.153.231.38 parents: ganeti-bytemark @@ -529,6 +538,18 @@ servers: address: 217.196.149.228 parents: gw-conova hostgroups: computers, jessie, service, sw-raid + mirror-conova: + address: 217.196.149.229 + parents: gw-conova + hostgroups: computers, jessie, service, apache2-hosts, xinetd-hosts + mirror-conova-debian: + address: 217.196.149.232 + hostgroups: secondary-IPs + parents: mirror-conova + mirror-conova-security: + address: 217.196.149.233 + hostgroups: secondary-IPs + parents: mirror-conova arm-conova-01: address: 217.196.149.230 @@ -642,11 +663,11 @@ servers: porta: address: 194.177.211.207 parents: ganeti-grnet - hostgroups: computers, service, hassrvfs, kvmdomains, jessie, rsyncd-hosts, xinetd-hosts, systemd-timesyncd + hostgroups: computers, service, hassrvfs, kvmdomains, jessie, rsyncd-systemd-hosts, xinetd-hosts, systemd-timesyncd melartin: address: 194.177.211.208 parents: ganeti-grnet - hostgroups: computers, service, kvmdomains, jessie, systemd-timesyncd + hostgroups: computers, service, kvmdomains, jessie, systemd-timesyncd, postgres96-hosts # }}} # {{{ gw-isc mirror-isc: @@ -759,11 +780,11 @@ servers: handel: address: 82.195.75.104 parents: ganeti3 - hostgroups: computers, service, kvmdomains, apache2-hosts, jessie + hostgroups: computers, service, kvmdomains, apache2-hosts, stretch, postgres96-hosts kaufmann: address: 82.195.75.107 parents: ganeti3 - hostgroups: computers, service, apache2-hosts, rsyncd-hosts, kvmdomains, jessie, apache-https + hostgroups: computers, service, apache2-hosts, rsyncd-systemd-hosts, kvmdomains, jessie, apache-https stockhausen: address: 82.195.75.108 parents: ganeti3 @@ -811,7 +832,7 @@ servers: seger: address: 82.195.75.93 parents: ganeti3 - hostgroups: computers, service, apache2-hosts, hassrvfs, hasbootfs, rsyncd-hosts, uploadqueue, kvmdomains, xinetd-hosts, apache-https, postgres94-hosts, jessie + hostgroups: computers, service, apache2-hosts, hassrvfs, hasbootfs, rsyncd-systemd-hosts, uploadqueue, kvmdomains, xinetd-hosts, apache-https, postgres94-hosts, jessie # }}} # {{{ gw-marist zani: @@ -821,7 +842,7 @@ servers: # }}} # {{{ gw-osuosl busoni: - address: 140.211.15.34 + address: 140.211.166.202 parents: gw-osuosl hostgroups: computers, service, dl360, hassrvfs, jessie, hasvarlogfs, apache2-hosts, no-bacula, apache-https @@ -863,7 +884,7 @@ servers: sibelius: address: 193.62.202.28 parents: gw-sanger - hostgroups: computers, postgres94-hosts, service, apache2-hosts, sw-raid, jessie, rsyncd-hosts, xinetd-hosts, hasvarlogfs, multipath-hosts + hostgroups: computers, postgres94-hosts, service, apache2-hosts, sw-raid, jessie, rsyncd-systemd-hosts, hasvarlogfs, multipath-hosts contacts: tjrc1, dave smetana: address: 193.62.202.29 @@ -942,10 +963,6 @@ servers: address: 206.12.19.218 parents: sw-ubcece-kais hostgroups: computers, bl460, acpid-hosts, service, jessie, drbd-hosts, multipath-hosts - ganeti2: - address: 206.12.19.23 - parents: sw-ubcece-kais - hostgroups: notacomputer # MSA 2000 (2012i) giustini: address: 192.168.2.6 @@ -958,10 +975,6 @@ servers: # address: 206.12.19.120 # parents: sw-ubcece-kais # hostgroups: computers, service, hasbootfs, kvmdomains, squeeze, hasvicepa - glinka: - address: 206.12.19.126 - parents: ganeti2 - hostgroups: computers, service, kvmdomains, jessie, apache2-hosts, apache-https, nfs-client, autofs, xinetd-hosts # }}} # {{{ ubc-gateway ubc-enc2bl01: @@ -980,14 +993,15 @@ servers: address: 209.87.16.10 parents: ubc-gateway hostgroups: computers, bl460g8, service, jessie, multipath-hosts, hpnewraid + rachmaninoff: address: 209.87.16.20 parents: ubc-gateway - hostgroups: computers, service, kvmdomains, jessie + hostgroups: computers, service, kvmdomains, jessie, systemd-timesyncd x86-ubc-01: address: 209.87.16.21 parents: ubc-gateway - hostgroups: computers, buildd, hassrvfs, kvmdomains, jessie + hostgroups: computers, buildd, hassrvfs, kvmdomains, jessie, systemd-timesyncd finzi: address: 209.87.16.22 parents: ubc-gateway @@ -1001,67 +1015,72 @@ servers: elgar: address: 209.87.16.24 parents: ubc-gateway - hostgroups: computers, service, kvmdomains, jessie + hostgroups: computers, service, kvmdomains, jessie, systemd-timesyncd gombert: address: 209.87.16.25 parents: ubc-gateway - hostgroups: computers, service, kvmdomains, jessie, apache2-hosts, apache-https + hostgroups: computers, service, kvmdomains, jessie, apache2-hosts, apache-https, systemd-timesyncd nono: address: 209.87.16.26 parents: ubc-gateway - hostgroups: computers, service, kvmdomains, jessie, heavy-exim, xinetd-hosts, apache2-hosts, apache-https, broken_https_default_vhost, hassrvfs + hostgroups: computers, service, kvmdomains, jessie, heavy-exim, xinetd-hosts, apache2-hosts, apache-https, broken_https_default_vhost, hassrvfs, systemd-timesyncd reger: address: 209.87.16.27 parents: ubc-gateway - hostgroups: computers, service, kvmdomains, jessie, apache2-hosts, apache-https, heavy-exim + hostgroups: computers, service, kvmdomains, jessie, apache2-hosts, apache-https, heavy-exim, systemd-timesyncd diabelli: address: 209.87.16.28 parents: ubc-gateway - hostgroups: computers, service, kvmdomains, jessie, apache2-hosts, apache-https, broken_https_default_vhost + hostgroups: computers, service, kvmdomains, jessie, apache2-hosts, apache-https, broken_https_default_vhost, systemd-timesyncd menotti: address: 209.87.16.29 parents: ubc-gateway - hostgroups: computers, service, kvmdomains, jessie, hassrvfs, apache2-hosts, apache-https + hostgroups: computers, service, kvmdomains, jessie, hassrvfs, apache2-hosts, apache-https, systemd-timesyncd danzi: address: 209.87.16.30 parents: ubc-gateway - hostgroups: computers, service, kvmdomains, jessie, postgres94-hosts, xinetd-hosts + hostgroups: computers, service, kvmdomains, jessie, postgres94-hosts, xinetd-hosts, systemd-timesyncd geo2: address: 209.87.16.31 parents: ubc-gateway - hostgroups: computers, service, bind9-hosts, kvmdomains, jessie + hostgroups: computers, service, bind9-hosts, kvmdomains, jessie, systemd-timesyncd lotti: address: 209.87.16.32 parents: ubc-gateway - hostgroups: computers, service, kvmdomains, jessie, hassrvfs + hostgroups: computers, service, kvmdomains, jessie, hassrvfs, systemd-timesyncd muffat: address: 209.87.16.33 parents: ubc-gateway - hostgroups: computers, service, kvmdomains, jessie, spamd, heavy-exim, mail-relay + hostgroups: computers, service, kvmdomains, jessie, spamd, heavy-exim, mail-relay, systemd-timesyncd sonntag: address: 209.87.16.34 parents: ubc-gateway - hostgroups: computers, service, kvmdomains, jessie, nfs-client, autofs + hostgroups: computers, service, kvmdomains, jessie, nfs-client, autofs, systemd-timesyncd tchaikovsky: address: 209.87.16.35 parents: ubc-gateway - hostgroups: computers, general, apache2-hosts, kvmdomains, apache-https, jessie + hostgroups: computers, general, apache2-hosts, kvmdomains, apache-https, jessie, systemd-timesyncd gretchaninov: address: 209.87.16.36 parents: ubc-gateway - hostgroups: computers, general, kvmdomains, jessie, hassrvfs, nfs-server, apache2-hosts, xinetd-hosts + hostgroups: computers, general, kvmdomains, jessie, hassrvfs, nfs-server, apache2-hosts, xinetd-hosts, apache-https, systemd-timesyncd tye: address: 209.87.16.37 parents: ubc-gateway - hostgroups: computers, service, kvmdomains, jessie, heavy-exim, apache2-hosts, apache-https, nfs-client, autofs, hassrvfs + hostgroups: computers, service, kvmdomains, jessie, heavy-exim, apache2-hosts, apache-https, nfs-client, autofs, hassrvfs, systemd-timesyncd ullmann: address: 209.87.16.38 parents: ubc-gateway - hostgroups: computers, service, kvmdomains, jessie, postgres94-hosts, nfs-client, apache2-hosts, autofs, apache-https + hostgroups: computers, service, kvmdomains, jessie, postgres94-hosts, nfs-client, apache2-hosts, autofs, apache-https, systemd-timesyncd buxtehude: address: 209.87.16.39 parents: ubc-gateway - hostgroups: computers, service, kvmdomains, jessie, hassrvfs, apache2-hosts, heavy-exim, postgres94-hosts, hasvarlogfs, apache-https, spamd, nfs-server + hostgroups: computers, service, kvmdomains, jessie, hassrvfs, apache2-hosts, heavy-exim, postgres94-hosts, hasvarlogfs, apache-https, spamd, nfs-server, systemd-timesyncd + piu-slave-ubc-01: + address: 209.87.16.42 + parents: ubc-gateway + hostgroups: computers, service, kvmdomains, jessie, nfs-client, autofs, systemd-timesyncd + contacts: holger # }}} # {{{ gw-umn #saens: @@ -1138,10 +1157,6 @@ servers: address: 144.32.168.78 parents: gw-ynic hostgroups: computers, buildd, sw-raid, hassrvfs, jessie - poulenc: - address: 144.32.168.77 - parents: gw-ynic - hostgroups: computers, buildd, sw-raid, hassrvfs, jessie #antheil: # address: 217.140.96.60 # parents: gw-arm @@ -1286,6 +1301,9 @@ hostgroups: rsyncd-hosts: alias: hosts providing rsync services via xinetd private: 1 + rsyncd-systemd-hosts: + alias: hosts providing rsync services via systemd + private: 1 xinetd-hosts: alias: hosts providing services via xinetd private: 1 @@ -1295,6 +1313,9 @@ hostgroups: postgres94-hosts: alias: hosts running postgres94 private: 1 + postgres96-hosts: + alias: hosts running postgres96 + private: 1 no-ulogd: alias: hosts not running ulogd private: 1 @@ -1421,21 +1442,21 @@ services: excludehostgroups: layer3-infrastructure, high-RTT check_interval: 5 max_check_attempts: 4 - retry_check_interval: 1 + retry_interval: 1 - name: PING check: "check_ping!600.0,20%!900.0,40%" hostgroups: high-RTT check_interval: 5 max_check_attempts: 4 - retry_check_interval: 1 + retry_interval: 1 - name: PING check: "check_ping!2000.0,60%!3000.0,80%" hostgroups: layer3-infrastructure check_interval: 5 max_check_attempts: 4 - retry_check_interval: 1 + retry_interval: 1 # }}} # {{{ ### disk usage - @@ -1606,6 +1627,14 @@ services: name: swap usage - mb nrpe: "/usr/lib/nagios/plugins/check_swap -w 20000 -c 5000" hostgroups: computers + - + name: free memory - mb + nrpe: "/usr/lib/nagios/plugins/dsa-check-memory -m mb" + hostgroups: computers + - + name: free memory - percent + nrpe: "/usr/lib/nagios/plugins/dsa-check-memory -m pct" + hostgroups: computers - name: process - getty nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C getty -a /sbin/getty" @@ -1636,7 +1665,7 @@ services: name: system - filesystem check nrpe: "/usr/bin/sudo /usr/lib/nagios/plugins/dsa-check-filesystems" check_interval: 60 - retry_check_interval: 15 + retry_interval: 15 hostgroups: computers # }}} # {{{ backup @@ -1648,7 +1677,7 @@ services: excludehosts: backuphost, storace check_interval: 60 max_check_attempts: 2 - retry_check_interval: 5 + retry_interval: 5 - name: backup server config servicegroups: backup @@ -1656,7 +1685,7 @@ services: hosts: storace check_interval: 60 max_check_attempts: 2 - retry_check_interval: 5 + retry_interval: 5 - name: backup - bacula - last backup servicegroups: backup @@ -1665,7 +1694,7 @@ services: hostgroups: computers excludehostgroups: buildd, porterbox, no-bacula check_interval: 60 - retry_check_interval: 15 + retry_interval: 15 - name: backup - bacula - last full backup servicegroups: backup @@ -1674,7 +1703,7 @@ services: hostgroups: computers excludehostgroups: buildd, porterbox, no-bacula check_interval: 60 - retry_check_interval: 15 + retry_interval: 15 - name: process - bacula-dir servicegroups: backup @@ -1692,6 +1721,12 @@ services: nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C bacula-fd -a '/usr/sbin/bacula-fd -c /etc/bacula/bacula-fd.conf'" hostgroups: freebsd + - + name: network backup status - draghi + servicegroups: backup + nrpe: "/usr/lib/nagios/plugins/dsa-check-statusfile -a 2h /home/debbackup/nagios-status" + hosts: draghi + #### - name: process - acc.umu.se backup @@ -1705,14 +1740,14 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-running-kernel" hostgroups: computers check_interval: 60 - retry_check_interval: 5 + retry_interval: 5 - name: apt - security updates servicegroups: apt nrpe: "/usr/lib/nagios/plugins/dsa-check-statusfile /var/cache/dsa/nagios/apt" hostgroups: computers check_interval: 60 - retry_check_interval: 15 + retry_interval: 15 - name: unexpected file - apt sources.list servicegroups: apt @@ -1721,12 +1756,11 @@ services: - name: upgraded libraries servicegroups: security - nrpe: "sudo /usr/local/sbin/dsa-check-libs" - #nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-libs" + nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-libs" hostgroups: computers excludehostgroups: freebsd check_interval: 60 - retry_check_interval: 15 + retry_interval: 15 notification_interval: 10080 - name: installed firewall @@ -1765,7 +1799,7 @@ services: hostgroups: computers depends: process - samhain check_interval: 60 - retry_check_interval: 5 + retry_interval: 5 excludehostgroups: brokensamhain - name: processes - samhain zombies @@ -1887,6 +1921,15 @@ services: nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u daemon -C atd -a /usr/sbin/atd" hostgroups: computers ### + - + name: process - irqbalance + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C irqbalance -a '/usr/sbin/irqbalance'" + #hosts: casulana + hostgroups: computers + #excludehostgroups: single-cpu, freebsd + excludehostgroups: freebsd + excludehosts: harris, smetana + ### - name: process - cron nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C cron -a /usr/sbin/cron" @@ -1915,7 +1958,7 @@ services: runfrom: rainier hostgroups: computers check_interval: 60 - retry_check_interval: 15 + retry_interval: 15 excludehostgroups: alioth, broken_mq - name: MQ connection on rapoport @@ -1924,7 +1967,7 @@ services: runfrom: rapoport hostgroups: computers check_interval: 60 - retry_check_interval: 15 + retry_interval: 15 excludehostgroups: alioth, broken_mq ### - @@ -2001,10 +2044,6 @@ services: name: ganeti - job watcher paused nrpe: "/usr/lib/nagios/plugins/negate /usr/lib/nagios/plugins/dsa-check-file -f /var/lib/ganeti/watcher.pause" hostgroups: computers - - - name: unwanted process - irqbalance - nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C irqbalance" - hostgroups: computers - name: unwanted process - openvpn nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C openvpn" @@ -2126,7 +2165,7 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm" check_interval: 120 hostgroups: dl380, dl360, bl460, bm-bl - excludehosts: villa, lobos, storace, mirror-anu + excludehosts: villa, lobos, storace, mirror-anu, sallinen - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --ps-no-redundant" @@ -2141,7 +2180,7 @@ services: name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-ignore-not-present" check_interval: 120 - hosts: storace + hosts: storace, sallinen - name: HW - hpasmcli status nrpe: "/usr/lib/nagios/plugins/dsa-check-hpasm --fan-ignore-not-present --ps-no-redundant --ignore-failed='PS1'" @@ -2480,19 +2519,29 @@ services: name: network service - ftp check: check_ftp hostgroups: uploadqueue, security_mirror - excludehosts: klecker + excludehosts: klecker, mirror-isc, mirror-umn - name: network service - ftp check: check_ftp hosts: klecker-ftp depends: klecker:process - xinetd + - + name: network service - ftp + check: check_ftp + hosts: mirror-umn2 + depends: mirror-umn:process - xinetd + - + name: network service - ftp + check: check_ftp + hosts: mirror-isc2 + depends: mirror-isc:process - xinetd # }}} # {{{ postgres - name: unwanted process - postgresql nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C postgres" hostgroups: computers - excludehostgroups: postgres91-hosts, postgres94-hosts + excludehostgroups: postgres91-hosts, postgres94-hosts, postgres96-hosts - name: unwanted process - postgresql 9.0 nrpe: "/usr/lib/nagios/plugins/check_procs -w 0 -C postgres -a '9.0/bin/postgres'" @@ -2505,6 +2554,10 @@ services: name: process - postgresql94 - master nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.4/bin/postgres'" hostgroups: postgres94-hosts + - + name: process - postgresql96 - master + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:10 -c 1: -u postgres -C postgres -a '/usr/lib/postgresql/9.6/bin/postgres'" + hostgroups: postgres96-hosts - name: postgresql backups nrpe: "/usr/bin/sudo -u debbackup /usr/lib/nagios/plugins/dsa-check-backuppg" @@ -2555,7 +2608,7 @@ services: contact_groups: +buildd check_interval: 5 max_check_attempts: 24 - retry_check_interval: 5 + retry_interval: 5 - name: processes - lvcreate nrpe: "/usr/lib/nagios/plugins/check_procs -m 'ELAPSED' -c 500 -C lvcreate -u root -a 'lvcreate'" @@ -2607,7 +2660,7 @@ services: hosts: global check_interval: 15 max_check_attempts: 5 - retry_check_interval: 5 + retry_interval: 5 servicegroups: mirror - name: mirror sync - snapshot @@ -2615,7 +2668,7 @@ services: hosts: global check_interval: 15 max_check_attempts: 5 - retry_check_interval: 5 + retry_interval: 5 servicegroups: mirror - @@ -2723,6 +2776,11 @@ services: check: "dsa_check_staticsync_nossl!metadata.ftp-master.debian.org" hosts: global servicegroups: mirror + - + name: mirror static sync - mirror-master + check: "dsa_check_staticsync_nossl!mirror-master.debian.org" + hosts: global + servicegroups: mirror - name: mirror static sync - deb check: "dsa_check_staticsync_nossl!deb.debian.org" @@ -2886,7 +2944,7 @@ services: hosts: giustini check_interval: 5 max_check_attempts: 4 - retry_check_interval: 1 + retry_interval: 1 - name: Overall Unit Status remotecheck: "/usr/lib/nagios/plugins/check_snmp -H $HOSTADDRESS$ -C public -P 2c -o connUnitStatus -n -c 3 -w 3" @@ -2914,7 +2972,7 @@ services: nrpe: "/usr/lib/nagios/plugins/dsa-check-dchroots-current" hostgroups: porterbox check_interval: 60 - retry_check_interval: 15 + retry_interval: 15 # }}} # {{{ openstack # - @@ -2982,6 +3040,10 @@ services: hosts: draghi depends: process - xinetd ### + - + name: network service - rsync + check: check_tcp!873 + hostgroups: rsyncd-systemd-hosts - name: network service - rsync check: check_tcp!873 @@ -3041,7 +3103,13 @@ services: hosts: handel check_interval: 60 max_check_attempts: 2 - retry_check_interval: 5 + retry_interval: 5 + - + name: puppet - agent check + nrpe: "/usr/lib/nagios/plugins/dsa-check-statusfile /var/cache/dsa/nagios/puppet-agent" + hostgroups: computers + check_interval: 60 + retry_interval: 15 # }}} # }}}