3 # build nagios and nrpe config from a single master config file
5 # Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010 Peter Palfrader <peter@palfrader.org>
7 # Permission is hereby granted, free of charge, to any person obtaining
8 # a copy of this software and associated documentation files (the
9 # "Software"), to deal in the Software without restriction, including
10 # without limitation the rights to use, copy, modify, merge, publish,
11 # distribute, sublicense, and/or sell copies of the Software, and to
12 # permit persons to whom the Software is furnished to do so, subject to
13 # the following conditions:
15 # The above copyright notice and this permission notice shall be
16 # included in all copies or substantial portions of the Software.
18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
22 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 GENERATED_PREFIX="./generated/"
34 %w(hosts hostgroups services dependencies servicegroups).each{
35 |x| nagios_filename[x] = GENERATED_PREFIX+"auto-#{x}.cfg"
37 nagios_filename['nrpe'] = GENERATED_PREFIX+"nrpe_#{ ORG }.cfg"
40 MAX_CHECK_ATTEMPTS_DEFAULT=6
42 NRPE_CHECKNAME="#{ ORG }_check_nrpe" # check that takes one argument: service name to be checked
43 NRPE_CHECKNAME_HOST="#{ ORG }_check_nrpe_host" # check that takes two arguments: relay host on which to run check, service name to be checked
45 HOST_TEMPLATE_NAME='generic-host' # host templates that all our host definitions use
46 SERVICE_TEMPLATE_NAME='generic-service' # host templates that all our host definitions use
47 HOST_ALIVE_CHECK='check-host-alive' # host alive check if server is pingable
48 NRPE_PROCESS_SERVICE='process - nrpe' # nrpe checks will depend on this one
54 def set_if_unset(hash, key, value)
55 hash[key] = value unless hash.has_key?(key)
57 def set_complain_if_set(hash, key, value, type, name)
58 throw "#{type} definition '#{name}' has '#{key}' already defined" if hash.has_key?(key)
62 # Make an array out of something. If there is nothing, create an empty array
63 # if it is just a string, make a list with just that element, if it already is
65 def ensure_array(something)
68 elsif something.kind_of?(String)
69 result = [ something ]
70 elsif something.kind_of?(Array)
73 throw "Do now know how to make an array out of #{something}: " + something.to_yaml
79 # This class keeps track of the checks done via NRPE and makes sure
80 # each gets a unique name.
82 # Unforutunately NRPE limits check names to some 30 characters, so
83 # we need to mangle service names near the end.
89 def make_name( name, check )
90 name = name.tr_s("^a-zA-Z", "_").gsub("process", "ps")
92 result = "#{ SHORTORG }_" + name[0,19]
96 while (@checks.has_key?(result + hash))
97 # hash it, so that we don't lose uniqeness by cutting it off
98 hash = (check+skew).crypt("$1$")
99 hash = hash[-5,5] # 5 chars are enough
101 skew += ' ' # change it a bit so the hash changes
104 return result # max of 32 or so chars
107 def add( name, check )
108 if @checks.has_value? check
109 @checks.each_pair{ |key, value|
110 return key if value == check
113 key = make_name(name, check)
114 @checks[ key ] = check
125 # Prints the keys and values of hash to a file
126 # This is the function that prints the bodies of most our
127 # host/service/etc definitions
129 # It skips over such keys as are listed in exclude_keys
130 # and also skips private keys (those starting with an underscore)
131 def print_block(fd, kind, hash, exclude_keys)
132 fd.puts "define #{kind} {"
133 hash.keys.sort.each{ |key|
134 next if key[0,1] == '_'
135 next if exclude_keys.include? key
136 fd.puts " #{key} #{hash[key]}"
142 def merge_contacts(host, service)
143 %w{contacts contact_groups}.each{ |k|
145 [host, service].each{ |source|
146 contacts.push source[k] if source.has_key?(k)
148 service[k] = contacts.sort.join(",") unless contacts.empty?
152 # Add the service definition service to hosts
153 # f is the file for service definitions, deps the file for dependencies
154 def addService(hosts, service, files, servers)
156 set_if_unset service, 'use' , SERVICE_TEMPLATE_NAME
157 set_if_unset service, 'max_check_attempts', MAX_CHECK_ATTEMPTS_DEFAULT
159 service['max_check_attempts'] = MAX_CHECK_ATTEMPTS_DEFAULT + service['max_check_attempts'] if service['max_check_attempts'] < 0
162 throw "We already have a check_command (#{service['check_command']}) but we are in the NRPE block (nrpe: #{service['nrpe']})."+
163 " This should have been caught much earlier" if service.has_key?('check_command');
165 check = $nrpe.add(service['service_description'], service['nrpe'])
166 service['check_command'] = "#{ NRPE_CHECKNAME }!#{ check }"
168 service['depends'] = ensure_array( service['depends'] )
169 service['depends'] << NRPE_PROCESS_SERVICE unless service['service_description'] == NRPE_PROCESS_SERVICE # Depend on NRPE unless we are it
172 hosts.sort.each{ |host|
174 set_complain_if_set s, 'host_name', host, 'Service', s['service_description']
175 merge_contacts(servers[host], s)
177 exclude_keys = %w(nrpe runfrom remotecheck
179 hosts hostgroups excludehosts excludehostgroups)
180 if servers[host]['no-servicegroups']
181 exclude_keys << 'servicegroups'
183 print_block files['services'], 'service', s, exclude_keys
186 if service['depends']
187 service['depends'].sort.each{ |prerequisite|
188 hosts.sort.each{ |host|
189 prerequisite_host = host
191 # split off a hostname if there's one
192 bananasplit = prerequisite.split(':')
193 if bananasplit.size == 2
194 prerequisite_host = bananasplit[0]
196 elsif bananasplit.size > 2
197 throw "Cannot prase prerequisite #{prerequisite} for service #{service['service_description']} into host:service"
200 'host_name' => prerequisite_host,
201 'service_description' => pre,
202 'dependent_host_name' => host,
203 'dependent_service_description' => service['service_description'],
204 'execution_failure_criteria' => 'n',
205 'notification_failure_criteria' => 'w,u,c'
207 print_block files['dependencies'], 'servicedependency', dependency, %w()
214 # hostlists in services can be given as both, single hosts and hostgroups
215 # This functinn merges hostgroups and a simple list of hosts
217 # it also takes a prefix so that it can be used for excludelists as well
218 def merge_hosts_and_hostgroups(service, servers, hostgroups, prefix)
220 hosts = service[prefix+'hosts'].split(/,/).map{ |x| x.strip } if service[prefix+'hosts']
222 throw "host #{host} does not exist - used in service #{service['service_description']}" unless servers[host]
224 if service[prefix+'hostgroups']
225 service[prefix+'hostgroups'].split(/,/).map{ |x| x.strip }.each{ |hg|
226 throw "hostgroup #{hg} does not exist - used in service #{service['service_description']}" unless hostgroups[hg]
227 hosts = hosts.concat hostgroups[hg]['_memberlist']
234 # Figure out the hosts a given service applies to
236 # For a given service find the list of hosts minus excluded hosts that this service runs on
237 def find_hosts(service, servers, hostgroups)
238 hosts = merge_hosts_and_hostgroups service, servers, hostgroups, ''
239 excludehosts = merge_hosts_and_hostgroups service, servers, hostgroups, 'exclude'
241 excludehosts.uniq.each{ |host|
242 if hosts.delete(host) == nil
243 puts "info: [service #{service['service_description']}] not excluding host #{host} - it's not included anyway."
250 #############################################################################################
251 #############################################################################################
252 #############################################################################################
255 config = YAML::load( File.open( 'nagios-master.cfg' ) )
258 # Remove old created files
259 nagios_filename.each_pair{ |name, filename|
260 files[name] = File.new(filename, "w")
263 #################################
264 # create a few hostgroups
265 #################################
266 # create the "all" and "pingable" hostgroups
267 config['hostgroups']['all'] = {}
268 config['hostgroups']['all']['alias'] = "all servers"
269 config['hostgroups']['all']['private'] = true
270 config['hostgroups']['pingable'] = {}
271 config['hostgroups']['pingable']['alias'] = "pingable servers"
272 config['hostgroups']['pingable']['private'] = true
274 config['hostgroups'].each_pair{ |name, hg|
275 throw "Empty hostgroup or hostgroup #{name} not a hash" unless hg.kind_of?(Hash)
277 hg['_memberlist'] = []
280 config['servers'].each_pair{ |name, server|
281 throw "Empty server or server #{name} not a hash" unless server.kind_of?(Hash)
283 throw "No hostgroups defined for #{name}" unless server['hostgroups']
284 server['_hostgroups'] = server['hostgroups'].split(/,/).map{ |x| x.strip };
285 server['_hostgroups'] << 'all'
286 server['_hostgroups'] << 'pingable' unless server['pingable'] == false
288 server['_hostgroups'].each{ |hg|
289 throw "Hostgroup #{hg} is not defined" unless config['hostgroups'].has_key?(hg)
290 config['hostgroups'][hg]['_memberlist'] << name
294 config['servicegroups'] = {} unless config.has_key? 'servicegroups'
299 config['servers'].keys.sort.each{ |name|
300 server = config['servers'][name]
301 # Formerly we used 'ip' instead of 'address' in our source file
302 # Handle this change but warn XXX
303 if server.has_key?('ip')
304 STDERR.puts("Host definition for #{name} has an 'ip' field. Please use 'address' instead");
305 server['address'] = server['ip'];
309 set_complain_if_set server, 'host_name' , name, 'Host', name
310 set_if_unset server, 'alias' , name
311 set_if_unset server, 'use' , HOST_TEMPLATE_NAME
312 set_if_unset server, 'check_command', HOST_ALIVE_CHECK unless server['pingable'] == false
314 print_block files['hosts'] , 'host' , server , %w(hostgroups pingable no-servicegroups)
322 config['hostgroups'].keys.sort.each{ |name|
323 hg = config['hostgroups'][name]
325 next if hg['private']
326 throw "Empty hostgroup #{name}" if hg['_memberlist'].length == 0
328 set_complain_if_set hg, 'hostgroup_name', name , 'Hostgroup', name
329 set_complain_if_set hg, 'members' , hg['_memberlist'].sort.join(","), 'Hostgroup', name
331 print_block files['hostgroups'], 'hostgroup', hg, %w()
336 # SERVICES and DEPENDENCIES
338 config['services'].each{ |service|
339 throw "Empty service or service not a hash" unless service.kind_of?(Hash)
341 # make sure the depends list is an array
342 service['depends'] = ensure_array( service['depends'] )
344 # Both 'name' and 'service_description' are valid for a service's name
345 # Internally we only use service_description as that's nagios' official term
346 if service.has_key?('name')
347 throw "Service definition has both a name (#{service['name']})" +
348 "and a service_description (#{service['service_description']})" if service.has_key?('service_description')
349 #STDERR.puts("Service definition #{service['name']} has a 'name' field. Please use 'service_description' instead");
350 service['service_description'] = service['name'];
351 service.delete('name');
353 # Both 'check' and 'check_command' are valid for a service's check command
354 # Internally we only use check_command as that's nagios' official term
355 if service.has_key?('check')
356 throw "Service definition has both a check (#{service['check']})" +
357 "and a check_command (#{service['check_command']})" if service.has_key?('check_command')
358 #STDERR.puts("Service definition #{service['service_description']} has a 'check' field. Please use 'check_command' instead");
359 service['check_command'] = service['check'];
360 service.delete('check');
364 hosts = find_hosts service, config['servers'], config['hostgroups']
365 throw "no hosts for service #{service['service_description']}" if hosts.empty?
367 throw "nrpe, check, and remotecheck are mutually exclusive in service #{service['service_description']}" if
368 (service['nrpe'] ? 1 : 0) +
369 (service['check_command'] ? 1 : 0) +
370 (service['remotecheck'] ? 1 : 0) >= 2
372 if service['runfrom'] && service['remotecheck']
373 # If the service check is to be run from a remote monitor server ("relay")
374 # add that as an NRPE check to be run on the relay and make this
375 # service also depend on NRPE on the relay
376 relay = service['runfrom']
378 hosts.sort.each{ |host|
379 # how to recursively copy this thing?
380 hostservice = YAML::load( service.to_yaml )
381 host_ip = config['servers'][host]['address']
383 remotecheck = hostservice['remotecheck']
384 if remotecheck.include?('$HOSTADDRESS$') and not host_ip
385 throw "For some reason I do not have an address for #{host} yet I need it for the remote check. This shouldn't be."
387 remotecheck.gsub!(/\$HOSTADDRESS\$/, host_ip) if host_ip
388 remotecheck.gsub!(/\$HOSTNAME\$/, host)
389 check = $nrpe.add("#{host}_#{hostservice['service_description']}", remotecheck)
390 hostservice['check_command'] = "#{NRPE_CHECKNAME_HOST}!#{ config['servers'][ relay ]['address'] }!#{ check }"
392 # Make sure dependencies are an array. If there are none, create an empty array
393 # if depends is just a string, make a list with just that element
394 hostservice['depends'] = ensure_array( hostservice['depends'] )
395 # And append this new dependency
396 hostservice['depends'] << "#{ relay }:#{ NRPE_PROCESS_SERVICE }";
398 addService( [ host ], hostservice, files, config['servers'])
400 elsif service['runfrom'] || service['remotecheck']
401 throw "runfrom and remotecheck must either appear both or not at all in service #{service['service_description']}"
402 throw "must not remotecheck without runfrom" if service['remotecheck']
404 addService(hosts, service, files, config['servers'])
413 config['servicegroups'].keys.sort.each{ |name|
414 sg = config['servicegroups'][name]
416 set_complain_if_set sg, 'servicegroup_name', name , 'Servicegroup', name
418 print_block files['servicegroups'], 'servicegroup', sg, %w()
425 $nrpe.checks.keys.sort.each{ |name|
426 check = $nrpe.checks[name]
427 files['nrpe'].puts "command[#{ name }]=#{ check }"