zemlinsky.d.o is not a buildd anymore
[mirror/dsa-nagios.git] / config / nagios-master.cfg
index 999aa94..0e62c37 100644 (file)
@@ -447,7 +447,7 @@ servers:
   x86-bm-01:
     address: 5.153.231.32
     parents: ganeti-bytemark
-    hostgroups: computers, kvmdomains, stretch, no-bacula, systemd-timesyncd
+    hostgroups: computers, pybuildd, hassrvfs, kvmdomains, stretch, systemd-timesyncd
   tate:
     address: 5.153.231.33
     parents: ganeti-bytemark
@@ -643,7 +643,7 @@ servers:
   x86-grnet-01:
     address: 194.177.211.203
     parents: ganeti-grnet
-    hostgroups: computers, buildd, hassrvfs, kvmdomains, stretch, systemd-timesyncd
+    hostgroups: computers, pybuildd, hassrvfs, kvmdomains, stretch, systemd-timesyncd
   vittoria:
     address: 194.177.211.205
     parents: ganeti-grnet
@@ -721,7 +721,7 @@ servers:
   zemlinsky:
     address: 129.143.160.6
     parents: gw-karlsruhe
-    hostgroups: computers, buildd, stretch
+    hostgroups: computers, no-bacula, stretch
     contacts: pkern
   # }}}
   # {{{ gw-manda
@@ -818,7 +818,7 @@ servers:
   zani:
     address: 148.100.88.22
     parents: gw-marist
-    hostgroups: computers, buildd, hassrvfs, stretch, incomingmailrelayed
+    hostgroups: computers, pybuildd, hassrvfs, stretch, incomingmailrelayed
   # }}}
   # {{{ gw-osuosl
   byrd:
@@ -995,7 +995,7 @@ servers:
   godard:
     address: 209.87.16.44
     parents: ubc-gateway
-    hostgroups: computers, service, kvmdomains, stretch, hassrvfs, apache2-hosts, apache-https, systemd-timesyncd, postfix-hosts, postgres96-hosts, manyprocesses
+    hostgroups: computers, service, kvmdomains, stretch, hassrvfs, apache2-hosts, apache-https, systemd-timesyncd, postfix-hosts, postgres96-hosts, crazymanyprocesses
   debussy:
     address: 209.87.16.46
     parents: ubc-gateway
@@ -1095,13 +1095,8 @@ hostgroups:
   computers:
     alias: computers
     private: 1
-    extinfo-icon_image: base/debian.png
-    extinfo-icon_image_alt: Debian GNU/Linux
-    extinfo-notes_url: https://db.debian.org/machines.cgi?host=%s
   layer3-infrastructure:
     alias: Layer 3 Devices
-    extinfo-icon_image: base/switch40.png
-    extinfo-icon_image_alt: router
   notacomputer:
     alias: Systems that are not really systems.  Yeah :)
     private: 1
@@ -1118,6 +1113,8 @@ hostgroups:
     alias: machines running services
   buildd:
     alias: buildd systems
+  pybuildd:
+    alias: buildd systems running pybuildd
   general:
     alias: general purpose developer accessible machines
 
@@ -1232,6 +1229,8 @@ hostgroups:
     private: 1
   manyprocesses:
     alias: hosts with lots and lots of (kernel) processes
+  crazymanyprocesses:
+    alias: hosts with stupidly lots of processes
 
   no-bacula:
     alias: hosts which are not being backed up with bacula
@@ -1540,11 +1539,15 @@ services:
     name: processes - total
     nrpe: "/usr/lib/nagios/plugins/check_procs 620 700"
     hostgroups: computers
-    excludehostgroups: manyprocesses
+    excludehostgroups: manyprocesses, crazymanyprocesses
   -
     name: processes - total
     hostgroups: manyprocesses
     nrpe: "/usr/lib/nagios/plugins/check_procs 1500 1700"
+  -
+    name: processes - total
+    hostgroups: crazymanyprocesses
+    nrpe: "/usr/lib/nagios/plugins/check_procs 15000 25000"
   -
     name: free memory - mb
     nrpe: "/usr/lib/nagios/plugins/dsa-check-memory -m mb"
@@ -1604,7 +1607,7 @@ services:
     remotecheck: "/usr/lib/nagios/plugins/dsa-check-bacula $HOSTNAME$.debian.org"
     runfrom: dinis
     hostgroups: computers
-    excludehostgroups: buildd, porterbox, no-bacula
+    excludehostgroups: buildd, pybuildd, porterbox, no-bacula
     check_interval:  60
     retry_interval: 15
   -
@@ -1613,7 +1616,7 @@ services:
     remotecheck: "/usr/lib/nagios/plugins/dsa-check-bacula -w 1080 -c 1560 $HOSTNAME$.debian.org F"
     runfrom: dinis
     hostgroups: computers
-    excludehostgroups: buildd, porterbox, no-bacula
+    excludehostgroups: buildd, pybuildd, porterbox, no-bacula
     check_interval:  60
     retry_interval: 15
   -
@@ -2427,19 +2430,20 @@ services:
     nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:5 -u buildd -C buildd -a '/usr/bin/buildd'"
     hostgroups: buildd
     contact_groups: buildd
+  -
+    name: process - buildd
+    servicegroups: buildd
+    nrpe: "/usr/lib/nagios/plugins/check_procs -w 1:5 -u buildd -C python3 -a 'buildd.py'"
+    hostgroups: pybuildd
+    contact_groups: buildd
   -
     name: processes - zombie schroot
     nrpe: "(/usr/lib/nagios/plugins/check_procs -a schroot -s Zs -c 0 > /dev/null || /usr/lib/nagios/plugins/check_procs -a schroot -s Zs -c 0) && /usr/lib/nagios/plugins/check_procs -a schroot -s ZNs -c 0"
-    hostgroups: buildd
+    hostgroups: buildd, pybuildd
     contact_groups: +buildd
     check_interval: 5
     max_check_attempts: 24
     retry_interval: 5
-  -
-    name: processes - lvcreate
-    nrpe: "/usr/lib/nagios/plugins/check_procs -m 'ELAPSED' -c 500 -C lvcreate -u root -a 'lvcreate'"
-    hostgroups: buildd
-    contact_groups: +buildd
   # }}}
   # {{{ NFS Stuff
   -