X-Git-Url: https://git.adam-barratt.org.uk/?a=blobdiff_plain;f=config%2Fnagios-master.cfg;h=4ca3344dbb1f26d5a6fad25dad6745f95fd515fc;hb=4dd94be5ec29b8e4eab807fd6de7552a764cfbf7;hp=0f690e3ea4bc4448c9ef94acc3e7dc5c7c8bd56f;hpb=b645ba0e657ad48c28995da6166dae8980d1a5c7;p=mirror%2Fdsa-nagios.git diff --git a/config/nagios-master.cfg b/config/nagios-master.cfg index 0f690e3..4ca3344 100644 --- a/config/nagios-master.cfg +++ b/config/nagios-master.cfg @@ -33,6 +33,10 @@ servers: address: 130.239.18.97 parents: gw-ubcece hostgroups: layer3-infrastructure + gw-aql: + address: 141.170.2.17 + parents: gw-ubcece + hostgroups: layer3-infrastructure gw-bytemark: address: 89.16.160.116 parents: gw-ubcece @@ -114,6 +118,10 @@ servers: address: 193.201.200.129 parents: gw-ubcece hostgroups: layer3-infrastructure + gw-sakura: + address: 133.242.99.65 + parents: gw-ubcece + hostgroups: layer3-infrastructure gw-sanger: address: 193.62.202.20 parents: gw-ubcece @@ -191,10 +199,6 @@ servers: address: 195.20.242.124 parents: schumann hostgroups: computers, service, apache2-hosts, hassrvfs, hasbootfs, rsyncd-hosts, uploadqueue, kvmdomains, xinetd-hosts, apache-https, postgres91-hosts, wheezy - geo3: - address: 195.20.242.125 - parents: schumann - hostgroups: computers, service, hasbootfs, bind9-hosts, kvmdomains, wheezy soler: address: 195.20.242.126 parents: schumann @@ -208,12 +212,34 @@ servers: pettersson: address: 130.239.18.123 parents: gw-accumu - hostgroups: computers, hasbootfs, aacraid, nfs-client, acpid-hosts, service, apache2-hosts, wheezy, autofs + hostgroups: computers, hasbootfs, aacraid, nfs-client, acpid-hosts, service, apache2-hosts, wheezy, autofs, sw-raid praetorius: address: 130.239.18.121 parents: gw-accumu hostgroups: computers, buildd, hassrvfs, wheezy # }}} + # {{{ gw-aql + mips-aql-01: + address: 141.170.6.149 + parents: gw-aql + hostgroups: computers, buildd, wheezy, nfs-client + mips-aql-02: + address: 141.170.6.150 + parents: gw-aql + hostgroups: computers, buildd, wheezy, nfs-client + minkus: + address: 141.170.6.151 + parents: gw-aql + hostgroups: computers, porterbox, jessie, nfs-client + mipsel-aql-01: + address: 141.170.6.152 + parents: gw-aql + hostgroups: computers, buildd, jessie, hassrvfs, hasbootfs, sw-raid + mipsel-aql-02: + address: 141.170.6.153 + parents: gw-aql + hostgroups: computers, buildd, jessie, hassrvfs, hasbootfs, sw-raid + # }}} # {{{ gw-arm abel: address: 217.140.96.56 @@ -243,6 +269,10 @@ servers: address: 217.140.96.71 parents: gw-arm hostgroups: computers, hasbootfs, hassrvfs, armhf, wheezy, buildd, broken_mq + ia64-arm-01: + address: 217.140.96.61 + parents: gw-arm + hostgroups: computers, hasbootfs, hassrvfs, wheezy, buildd, broken_mq, sw-raid, acpid-hosts # }}} # {{{ gw-brown franck: @@ -345,11 +375,7 @@ servers: coccia: address: 5.153.231.11 parents: ganeti-bytemark - hostgroups: computers, hassrvfs, kvmdomains, wheezy, autofs, nfs-client, uploadqueue, xinetd-hosts - backuphost: - address: 5.153.231.12 - parents: ganeti-bytemark - hostgroups: computers, hassrvfs, kvmdomains, wheezy + hostgroups: computers, hassrvfs, kvmdomains, wheezy, autofs, nfs-client, uploadqueue, xinetd-hosts, apache-https, apache2-hosts philp: address: 5.153.231.13 parents: ganeti-bytemark @@ -403,7 +429,7 @@ servers: oyens: address: 5.153.231.26 parents: ganeti-bytemark - hostgroups: computers, kvmdomains, wheezy, apache2-hosts, openstack-controller, apache-https, broken_mq + hostgroups: computers, kvmdomains, jessie, apache2-hosts, openstack-controller, apache-https, broken_mq barriere: address: 5.153.231.27 parents: ganeti-bytemark @@ -432,6 +458,17 @@ servers: address: 5.153.231.35 parents: ganeti-bytemark hostgroups: computers, service, kvmdomains, wheezy, apache2-hosts + lindsay: + address: 5.153.231.36 + parents: ganeti-bytemark + hostgroups: computers, service, kvmdomains, jessie + fede: + address: 5.153.231.37 + hostgroups: computers, service, kvmdomains, wheezy, hassrvfs + sor: + address: 5.153.231.38 + parents: ganeti-bytemark + hostgroups: computers, service, kvmdomains, jessie, hassrvfs, apache2-hosts, autofs, nfs-client # }}} # {{{ gw-c3sl santoro: @@ -534,11 +571,19 @@ servers: rautavaara: address: 194.177.211.199 parents: gw-grnet - hostgroups: computers, acpid-hosts, megaraid, service, squeeze, nfs-server + hostgroups: computers, acpid-hosts, megaraid, service, wheezy loghost-grnet-01: address: 194.177.211.200 parents: gw-grnet - hostgroups: computers, service, kvmdomains, wheezy, hassrvfs + hostgroups: computers, service, kvmdomains, jessie, hassrvfs + geo3: + address: 194.177.211.201 + parents: gw-grnet + hostgroups: computers, service, bind9-hosts, kvmdomains, jessie + cgi-grnet-01: + address: 194.177.211.202 + parents: gw-grnet + hostgroups: computers, service, kvmdomains, jessie, hassrvfs, apache2-hosts # }}} # {{{ gw-isc schein: @@ -598,7 +643,7 @@ servers: ball: address: 82.195.75.70 parents: gw-man-da - hostgroups: computers, buildd, hasbootfs, wheezy + hostgroups: computers, buildd, hasbootfs, wheezy, sw-raid # bartok TODO czerny: address: 82.195.75.109 @@ -642,10 +687,6 @@ servers: address: 82.195.75.99 parents: ganeti3 hostgroups: computers, service, hasbootfs, kvmdomains, wheezy, hasvarlogfs - rem: - address: 82.195.75.68 - parents: gw-man-da - hostgroups: computers, hasbootfs, hassrvfs, wheezy, no-bacula draghi: address: 82.195.75.106 parents: ganeti3 @@ -665,7 +706,7 @@ servers: stockhausen: address: 82.195.75.108 parents: ganeti3 - hostgroups: computers, service, kvmdomains, wheezy, acpid-hosts, jetty-hosts + hostgroups: computers, service, kvmdomains, jessie, acpid-hosts, jetty-hosts ganeti3: address: 82.195.75.111 parents: gw-man-da @@ -686,6 +727,10 @@ servers: address: 82.195.75.92 parents: ganeti3 hostgroups: computers, service, kvmdomains, wheezy + wolkenstein: + address: 82.195.75.65 + parents: ganeti3 + hostgroups: computers, hasbootfs, hassrvfs, kvmdomains, service, xinetd-hosts, rsyncd-hosts, apache2-hosts, wheezy mipsel-manda-01: address: 82.195.75.72 parents: gw-man-da @@ -730,11 +775,11 @@ servers: parry: address: 140.211.15.153 parents: gw-osuosl - hostgroups: computers, wheezy, buildd, hassrvfs + hostgroups: computers, wheezy, buildd, hassrvfs, sw-raid partch: address: 140.211.15.152 parents: gw-osuosl - hostgroups: computers, wheezy, hassrvfs, porterbox + hostgroups: computers, wheezy, hassrvfs, porterbox, sw-raid rietz: address: 140.211.166.43 parents: gw-osuosl @@ -756,7 +801,7 @@ servers: sibelius: address: 193.62.202.28 parents: gw-sanger - hostgroups: computers, acpid-hosts, postgres91-hosts, service, apache2-hosts, sw-raid, squeeze, rsyncd-hosts, xinetd-hosts, hasvarlogfs + hostgroups: computers, acpid-hosts, postgres91-hosts, service, apache2-hosts, sw-raid, wheezy, rsyncd-hosts, xinetd-hosts, hasvarlogfs contacts: tjrc1, dave smetana: address: 193.62.202.29 @@ -772,7 +817,7 @@ servers: villa: address: 212.211.132.32 parents: gw-scanplus-villa - hostgroups: computers, service, apache2-hosts, rsyncd-hosts, dl380, hasvarfs, hasusrfs, hasorgfs, xinetd-hosts, wheezy, security_mirror, no-bacula + hostgroups: computers, service, apache2-hosts, rsyncd-hosts, dl360, hassrvfs, xinetd-hosts, jessie, security_mirror, acpid-hosts # }}} # {{{ gw-sil eder: @@ -786,7 +831,7 @@ servers: eberlin: address: 86.59.118.155 parents: gw-sil - hostgroups: computers, buildd, wheezy + hostgroups: computers, buildd, wheezy, sw-raid # }}} # {{{ gw-ubcece sw-ubcece: @@ -847,10 +892,6 @@ servers: address: 206.12.19.118 parents: ganeti2 hostgroups: computers, general, apache2-hosts, hasbootfs, kvmdomains, apache-https, wheezy - wolkenstein: - address: 206.12.19.116 - parents: ganeti2 - hostgroups: computers, hasbootfs, hassrvfs, kvmdomains, service, xinetd-hosts, rsyncd-hosts, apache2-hosts, wheezy brahms: address: 206.12.19.115 parents: ganeti2 @@ -995,10 +1036,18 @@ servers: address: 143.106.167.145 parents: gw-unicamp hostgroups: layer3-infrastructure + prokofiev: + address: 143.106.167.147 + parents: gw-unicamp + hostgroups: computers, jessie, service powerpc-unicamp-01: address: 143.106.167.149 - parents: asgard + parents: prokofiev hostgroups: computers, hasbootfs, hassrvfs, buildd, wheezy + ppc64el-unicamp-01: + address: 143.106.167.135 + parents: prokofiev + hostgroups: computers, hasbootfs, hassrvfs, buildd, jessie # }}} # {{{ gw-utwente klecker: @@ -1019,11 +1068,15 @@ servers: hostgroups: secondary-IPs # }}} # {{{ gw-ynic - howells: + henze: + address: 144.32.168.74 + parents: gw-ynic + hostgroups: computers, hasbootfs, hassrvfs, armhf, wheezy, buildd + hasse: address: 144.32.168.75 parents: gw-ynic hostgroups: computers, hasbootfs, hassrvfs, armhf, wheezy, buildd - hummel: + antheil: address: 144.32.168.76 parents: gw-ynic hostgroups: computers, hasbootfs, hassrvfs, armhf, wheezy, buildd @@ -1058,6 +1111,11 @@ servers: parents: gw-zivit hostgroups: computers, porterbox, hassrvfs, wheezy # }}} + # {{{ gw-sakura + setoguchi: + address: 133.242.99.74 + parents: gw-sakura + hostgroups: computers, service, wheezy, no-bacula, hassrvfs, apache2-hosts, rsyncd-hosts, xinetd-hosts, security_mirror, acpid-hosts # }}} # {{{ ############################# host groups ############################# @@ -1065,6 +1123,9 @@ hostgroups: computers: alias: computers private: 1 + extinfo-icon_image: base/debian.png + extinfo-icon_image_alt: Debian GNU/Linux + extinfo-notes_url: https://db.debian.org/machines.cgi?host=%s layer2-infrastructure: alias: Layer 2 Devices extinfo-icon_image: base/switch40.png @@ -1079,6 +1140,7 @@ hostgroups: freebsd: alias: freebsd private: 1 + extinfo-icon_image_alt: Debian GNU/kFreeBSD armhf: alias: armhf private: 1 @@ -1088,24 +1150,12 @@ hostgroups: porterbox: alias: developer accessible porter machines - extinfo-icon_image: base/debian.png - extinfo-icon_image_alt: Debian GNU/Linux - extinfo-notes_url: http://db.debian.org/machines.cgi?host=%s service: alias: machines running services - extinfo-icon_image: base/debian.png - extinfo-icon_image_alt: Debian GNU/Linux - extinfo-notes_url: http://db.debian.org/machines.cgi?host=%s buildd: alias: buildd systems - extinfo-icon_image: base/debian.png - extinfo-icon_image_alt: Debian GNU/Linux - extinfo-notes_url: http://db.debian.org/machines.cgi?host=%s general: alias: general purpose developer accessible machines - extinfo-icon_image: base/debian.png - extinfo-icon_image_alt: Debian GNU/Linux - extinfo-notes_url: http://db.debian.org/machines.cgi?host=%s dl380: alias: HP DL380 hosts @@ -1145,6 +1195,8 @@ hostgroups: alias: Hosts running squeeze wheezy: alias: Hosts running wheezy + jessie: + alias: Hosts running jessie kvmdomains: alias: Hosts that are KVM domains @@ -1352,7 +1404,7 @@ services: - name: disk usage - all servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk -X devpts -X proc -X linprocfs -X devfs -X fdescfs -X sysfs -X nfs 95 98" + nrpe: "/usr/lib/nagios/plugins/check_disk -X devpts -X proc -X linprocfs -X devfs -X fdescfs -X sysfs -X nfs -x /srv/farm-snapshot/farm-misc 95 98" hosts: sibelius - @@ -1416,9 +1468,24 @@ services: nrpe: "/usr/lib/nagios/plugins/check_disk 90 94 /srv/snapshot.debian.org" hosts: sibelius - - name: disk usage on /srv/farm-snapshot/farm-misc + name: disk usage on /srv/farm-snapshot/farm-1 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 97 95 /srv/farm-snapshot/farm-1" + hosts: sibelius + - + name: disk usage on /srv/farm-snapshot/farm-2 servicegroups: diskspace - nrpe: "/usr/lib/nagios/plugins/check_disk 97 95 /srv/farm-snapshot/farm-misc" + nrpe: "/usr/lib/nagios/plugins/check_disk 97 95 /srv/farm-snapshot/farm-2" + hosts: sibelius + - + name: disk usage on /srv/farm-snapshot/farm-3 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 97 95 /srv/farm-snapshot/farm-3" + hosts: sibelius + - + name: disk usage on /srv/farm-snapshot/farm-4 + servicegroups: diskspace + nrpe: "/usr/lib/nagios/plugins/check_disk 97 95 /srv/farm-snapshot/farm-4" hosts: sibelius - name: disk usage on /var/lib/postgresql/9.1 @@ -1493,6 +1560,11 @@ services: name: processes - total nrpe: "/usr/lib/nagios/plugins/check_procs 620 700" hostgroups: computers + excludehosts: prokofiev + - + name: processes - total + nrpe: "/usr/lib/nagios/plugins/check_procs 1500 1700" + hosts: prokofiev - name: swap usage - percent nrpe: "/usr/lib/nagios/plugins/check_swap -w 20% -c 10%" @@ -1506,6 +1578,12 @@ services: nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C getty -a /sbin/getty" hostgroups: computers excludehosts: zelenka, zandonai + excludehostgroups: jessie + - + name: process - getty + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:8 -c 1: -u root -C agetty -a /sbin/agetty" + hostgroups: computers + hostgroups: jessie - name: processes - zombies nrpe: "/usr/lib/nagios/plugins/check_procs 5 10 -s Z" @@ -1529,7 +1607,7 @@ services: servicegroups: backup nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-dabackup" hostgroups: computers - excludehosts: backuphost + excludehosts: storace normal_check_interval: 60 max_check_attempts: 2 retry_check_interval: 5 @@ -1537,7 +1615,7 @@ services: name: backup server config servicegroups: backup nrpe: "/usr/lib/nagios/plugins/dsa-check-dabackup-server" - hosts: backuphost + hosts: storace normal_check_interval: 60 max_check_attempts: 2 retry_check_interval: 5 @@ -1592,6 +1670,15 @@ services: hostgroups: computers normal_check_interval: 60 retry_check_interval: 15 + - + name: upgraded libraries + servicegroups: security + nrpe: "sudo /usr/lib/nagios/plugins/dsa-check-libs" + hostgroups: computers + excludehostgroups: freebsd + normal_check_interval: 60 + retry_check_interval: 15 + notification_interval: 10080 - name: installed firewall nrpe: "/usr/lib/nagios/plugins/dsa-check-file -w -f /etc/ferm/ferm.conf" @@ -1606,7 +1693,12 @@ services: name: process - ulogd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C ulogd -a '/usr/sbin/ulogd -d'" hostgroups: computers - excludehostgroups: freebsd, sparc + excludehostgroups: freebsd, sparc, jessie + - + name: process - ulogd + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u ulog -C ulogd -a '/usr/sbin/ulogd --daemon --uid ulog'" + hostgroups: jessie + excludehostgroups: freebsd - name: unexpected process - ulogd nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C ulogd" @@ -1638,12 +1730,16 @@ services: name: process - syslog-ng nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C syslog-ng -a '/sbin/syslog-ng -p /var/run/syslog-ng.pid'" hostgroups: computers - excludehostgroups: freebsd + excludehostgroups: freebsd, jessie - name: process - syslog-ng nrpe: "/usr/lib/nagios/plugins/check_procs -w 2:2 -c 2: -u root -C syslog-ng -a '/sbin/syslog-ng -p /var/run/syslog-ng.pid'" hostgroups: freebsd + - + name: process - syslog-ng + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C syslog-ng -a '/sbin/syslog-ng -F'" + hostgroups: jessie - name: remote logging on lotti @@ -1702,7 +1798,7 @@ services: - name: process - munin-node nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:3 -c 1: -u root -C munin-node -a 'munin-node'" - hostgroups: wheezy + hostgroups: wheezy, jessie excludehostgroups: freebsd - name: network service - munin-node @@ -1750,7 +1846,7 @@ services: name: process - monit nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C monit -a '/usr/bin/monit -d 300 -I -c /etc/monit/monitrc -s /var/lib/monit/monit.state'" hostgroups: computers - excludehostgroups: squeeze, alioth + excludehostgroups: squeeze, alioth, jessie ### - name: MQ connection on rainier @@ -1791,7 +1887,11 @@ services: name: process - udevd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -p 1 -C udevd -a 'udevd'" hostgroups: computers - excludehostgroups: freebsd + excludehostgroups: freebsd, jessie + - + name: process - udevd + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -p 1 -C systemd-udevd -a '/lib/systemd/systemd-udevd'" + hostgroups: jessie - name: unexpected process - udev nrpe: "/usr/lib/nagios/plugins/check_procs -w 0:0 -C udevd" @@ -1820,7 +1920,7 @@ services: - name: process - stunnel4 - puppet-ekeyd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:6 -c 1: -u stunnel4 -C stunnel4 -a '/usr/bin/stunnel4 /etc/stunnel/puppet-ekeyd.conf'" - hostgroups: squeeze, wheezy + hostgroups: squeeze, wheezy, jessie excludehostgroups: freebsd, alioth # }}} # {{{ anti-services @@ -1866,21 +1966,26 @@ services: - name: process - mdadm monitor servicegroups: raid - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C mdadm -a '/sbin/mdadm --monitor --pid-file /var/run/mdadm/monitor.pid --daemonise --scan'" + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C mdadm -a '/sbin/mdadm --monitor --pid-file /run/mdadm/monitor.pid --daemonise --scan'" hostgroups: sw-raid - excludehostgroups: wheezy + excludehostgroups: jessie - - # wheezy: name: process - mdadm monitor servicegroups: raid - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C mdadm -a '/sbin/mdadm --monitor --pid-file /run/mdadm/monitor.pid --daemonise --scan'" + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -u root -C mdadm -a '/sbin/mdadm --monitor --scan'" hostgroups: sw-raid - excludehostgroups: squeeze + excludehostgroups: wheezy - name: RAID - sw raid servicegroups: raid nrpe: "/usr/lib/nagios/plugins/dsa-check-raid-sw" hostgroups: sw-raid + - + name: RAID - unexpected sw raid + servicegroups: raid + nrpe: "if [ -e /proc/mdstat ]; then echo 'Found /proc/mdstat'; exit 1; else echo 'No /proc/mdstat on this host.'; fi" + hostgroups: computers + excludehostgroups: sw-raid ### - name: HW - hpacucli status @@ -2295,7 +2400,7 @@ services: - name: postgresql backups nrpe: "/usr/bin/sudo -u debbackup /usr/lib/nagios/plugins/dsa-check-backuppg" - hosts: backuphost + hosts: storace # }}} # {{{ power # - @@ -2404,7 +2509,7 @@ services: # {{{ DNS - name: process - named - nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:15 -c 1: -u bind -C named -a '/usr/sbin/named -u bind'" + nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:15 -c 1: -u bind -C named -a '/usr/sbin/named '" hostgroups: bind9-hosts - name: network service - dns @@ -2529,6 +2634,14 @@ services: hostgroups: openstack-controller # }}} # {{{ misc +# - +# Disable this check until logind and binfmt_misc issues are fixed +# something unknown is triggering mount of binfmt_misc +# https://bugs.debian.org/772700 +# name: system - all services running +# nrpe: "/usr/bin/sudo /bin/systemctl is-system-running" +# hostgroups: jessie + ### - name: process - rngd nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1: -u root -C rngd -a '/usr/sbin/rngd -r /dev/hwrng'" @@ -2594,6 +2707,17 @@ services: hosts: gombert contact_groups: gobby #### + - + name: network service - sip-tls cert - 443 + check: dsa_check_cert!443 + normal_check_interval: 60 + hosts: vogler + - + name: network service - sip-tls cert - 5061 + check: dsa_check_cert!5061 + normal_check_interval: 60 + hosts: vogler + #### - name: puppetmaster cert nrpe: "/usr/lib/nagios/plugins/dsa-check-cert-expire /var/lib/puppet/ssl/certs/ca.pem"