add event_handlers to restart services
authorStephen Gran <steve@lobefin.net>
Sat, 14 May 2011 14:13:52 +0000 (15:13 +0100)
committerStephen Gran <steve@lobefin.net>
Sat, 14 May 2011 14:13:52 +0000 (15:13 +0100)
Signed-off-by: Stephen Gran <steve@lobefin.net>
dsa-nagios-checks/debian/changelog
dsa-nagios-checks/debian/rules
dsa-nagios-checks/event_handlers/dsa-eventhandler-restart-service [new file with mode: 0755]

index 6952eb8..02ef069 100644 (file)
@@ -1,5 +1,6 @@
 dsa-nagios-checks (9X) Xnstable; urgency=low
 
+  [ Peter Palfrader ]
   * dsa-check-entropy: 
     - document watermark default
     - fix off-by-one in output
@@ -7,7 +8,10 @@ dsa-nagios-checks (9X) Xnstable; urgency=low
     - ignore regular files in pg backup's root directory if they are still
       fresh.
 
- -- Peter Palfrader <weasel@debian.org>  Fri, 22 Apr 2011 14:24:16 +0200
+  [ Stephen Gran ]
+  * Add event_handler to restart services
+
+ -- Stephen Gran <sgran@debian.org>  Sat, 14 May 2011 15:12:41 +0100
 
 dsa-nagios-checks (89) unstable; urgency=low
 
index bfd6a65..bb2ec64 100755 (executable)
@@ -17,6 +17,10 @@ install:
                install -m 755 $$f $(CURDIR)/debian/dsa-nagios-checks/usr/lib/nagios/plugins; \
        done
 
+       for f in `ls -1 event_handlers/* | grep -v 'sample$$'`; do \
+               install -m 755 $$f $(CURDIR)/debian/dsa-nagios-checks/usr/lib/nagios/plugins; \
+       done
+
        for f in `ls -1 etc/*`; do \
                install -m 644 $$f $(CURDIR)/debian/dsa-nagios-checks/etc/nagios; \
        done
diff --git a/dsa-nagios-checks/event_handlers/dsa-eventhandler-restart-service b/dsa-nagios-checks/event_handlers/dsa-eventhandler-restart-service
new file mode 100755 (executable)
index 0000000..278ddee
--- /dev/null
@@ -0,0 +1,65 @@
+#!/bin/sh
+#
+# Event handler script for restarting a service on the local machine
+#
+# Note: This script will only restart the service if the service is
+#       retried 3 times (in a "soft" state) or if the service somehow
+#       manages to fall into a "hard" error state.
+
+# Args:
+# $1 $SERVICESTATE$
+# $2 $SERVICESTATETYPE$
+# $3 $SERVICEATTEMPT$
+# $4 init script name
+
+state="$1"
+type="$2"
+attempt="$3"
+service="$4"
+
+# What state is the service in?
+case "${state}" in
+       OK)
+       # The service just came back up, so don't do anything...
+       ;;
+       WARNING)
+       # We don't really care about warning states, since the service is probably still running...
+       ;;
+       UNKNOWN)
+       # We don't know what might be causing an unknown error, so don't do anything...
+       ;;
+       CRITICAL)
+       # Aha!  The service appears to have a problem - perhaps we should restart it
+       # Is this a "soft" or a "hard" state?
+               case "${type}" in
+               # We're in a "soft" state, meaning that Nagios is in the middle of retrying the
+               # check before it turns into a "hard" state and contacts get notified...
+                       SOFT)
+                       # What check attempt are we on?  We don't want to restart the service on the first
+                       # check, because it may just be a fluke!
+                               case "${attempt}" in
+                                       # Wait until the check has been tried 3 times before restarting the service
+                                       # If the check fails on the 4th time (after we restart the service), the state
+                                       # type will turn to "hard" and contacts will be notified of the problem.
+                                       # Hopefully this will restart the service successfully, so the 4th check will
+                                       # result in a "soft" recovery.  If that happens no one gets notified because we
+                                       # fixed the problem!
+                                       3)
+                                               # Call the init script to restart the HTTPD server
+                                               sudo /etc/init.d/${service} restart
+                                       ;;
+                               esac
+                       ;;
+                       # The service somehow managed to turn into a hard error without getting fixed.
+                       # It should have been restarted by the code above, but for some reason it didn't.
+                       # Let's give it one last try, shall we?  
+                       # Note: Contacts have already been notified of a problem with the service at this
+                       # point (unless you disabled notifications for this service)
+                       HARD)
+                               sudo /etc/init.d/${service} restart
+                       ;;
+       esac
+       ;;
+esac
+
+exit 0