dsa-nagios-checks/event_handlers/dsa-eventhandler-restart-service

   1 #!/bin/sh
   2 #
   3 # Event handler script for restarting a service on the local machine
   4 #
   5 # Note: This script will only restart the service if the service is
   6 #       retried 3 times (in a "soft" state) or if the service somehow
   7 #       manages to fall into a "hard" error state.
   8
   9 # Args:
  10 # $1 $SERVICESTATE$
  11 # $2 $SERVICESTATETYPE$
  12 # $3 $SERVICEATTEMPT$
  13 # $4 $HOSTADDRESS$
  14 # $4 init script name
  15
  16 state="$1"
  17 type="$2"
  18 attempt="$3"
  19 host="$4"
  20 service="$5"
  21
  22 # What state is the service in?
  23 case "${state}" in
  24         OK)
  25         # The service just came back up, so don't do anything...
  26         ;;
  27         WARNING)
  28         # We don't really care about warning states, since the service is probably still running...
  29         ;;
  30         UNKNOWN)
  31         # We don't know what might be causing an unknown error, so don't do anything...
  32         ;;
  33         CRITICAL)
  34         # Aha!  The service appears to have a problem - perhaps we should restart it
  35         # Is this a "soft" or a "hard" state?
  36                 case "${type}" in
  37                 # We're in a "soft" state, meaning that Nagios is in the middle of retrying the
  38                 # check before it turns into a "hard" state and contacts get notified...
  39                         SOFT)
  40                         # What check attempt are we on?  We don't want to restart the service on the first
  41                         # check, because it may just be a fluke!
  42                                 case "${attempt}" in
  43                                         # Wait until the check has been tried 3 times before restarting the service
  44                                         # If the check fails on the 4th time (after we restart the service), the state
  45                                         # type will turn to "hard" and contacts will be notified of the problem.
  46                                         # Hopefully this will restart the service successfully, so the 4th check will
  47                                         # result in a "soft" recovery.  If that happens no one gets notified because we
  48                                         # fixed the problem!
  49                                         3)
  50                                                 # Call the init script to restart the HTTPD server
  51                                                 /usr/lib/nagios/plugins/check_nrpe -n -H "${host}" -c "${service}"
  52                                         ;;
  53                                 esac
  54                         ;;
  55                         # The service somehow managed to turn into a hard error without getting fixed.
  56                         # It should have been restarted by the code above, but for some reason it didn't.
  57                         # Let's give it one last try, shall we?
  58                         # Note: Contacts have already been notified of a problem with the service at this
  59                         # point (unless you disabled notifications for this service)
  60                         HARD)
  61                                 /usr/lib/nagios/plugins/check_nrpe -n -H "${host}" -c "${service}"
  62                         ;;
  63         esac
  64         ;;
  65 esac
  66
  67 exit 0