Merge branch 'master' of git+ssh://db.debian.org/git/dsa-nagios
[mirror/dsa-nagios.git] / config / build-nagios
1 #!/usr/bin/ruby
2
3 # build nagios and nrpe config from a single master config file
4
5 # Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010 Peter Palfrader <peter@palfrader.org>
6 #
7 # Permission is hereby granted, free of charge, to any person obtaining
8 # a copy of this software and associated documentation files (the
9 # "Software"), to deal in the Software without restriction, including
10 # without limitation the rights to use, copy, modify, merge, publish,
11 # distribute, sublicense, and/or sell copies of the Software, and to
12 # permit persons to whom the Software is furnished to do so, subject to
13 # the following conditions:
14 #
15 # The above copyright notice and this permission notice shall be
16 # included in all copies or substantial portions of the Software.
17 #
18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
22 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26
27 require "yaml"
28
29 ORG="dsa"
30 SHORTORG="dsa"
31 GENERATED_PREFIX="./generated/"
32
33 nagios_filename = {};
34 %w(hosts hostgroups services dependencies hostextinfo serviceextinfo servicegroups).each{
35         |x| nagios_filename[x] = GENERATED_PREFIX+"auto-#{x}.cfg"
36 }
37 nagios_filename['nrpe'] = GENERATED_PREFIX+"nrpe_#{ ORG }.cfg"
38
39
40 MAX_CHECK_ATTEMPTS_DEFAULT=6
41
42 NRPE_CHECKNAME="#{ ORG }_check_nrpe"           # check that takes one argument:  service name to be checked
43 NRPE_CHECKNAME_HOST="#{ ORG }_check_nrpe_host" # check that takes two arguments: relay host on which to run check, service name to be checked
44
45 HOST_TEMPLATE_NAME='generic-host'          # host templates that all our host definitions use
46 SERVICE_TEMPLATE_NAME='generic-service'    # host templates that all our host definitions use
47 HOST_ALIVE_CHECK='check-host-alive'        # host alive check if server is pingable
48 NRPE_PROCESS_SERVICE='process - nrpe'      # nrpe checks will depend on this one
49 NRPE_LOAD_CHECK='load'
50
51
52 def warn (msg)
53         STDERR.puts msg
54 end
55 def set_if_unset(hash, key, value)
56         hash[key] = value unless hash.has_key?(key)
57 end
58 def set_complain_if_set(hash, key, value, type, name)
59         throw "#{type} definition '#{name}' has '#{key}' already defined" if hash.has_key?(key)
60         hash[key] = value
61 end
62
63 # Make an array out of something.  If there is nothing, create an empty array
64 # if it is just a string, make a list with just that element, if it already is
65 # an array keep it.
66 def ensure_array(something)
67         if (something == nil)
68                 result = []
69         elsif something.kind_of?(String)
70                 result = [ something ]
71         elsif something.kind_of?(Array)
72                 result = something
73         else
74                 throw "Do now know how to make an array out of #{something}: " + something.to_yaml
75         end
76         return result
77 end
78
79
80 # This class keeps track of the checks done via NRPE and makes sure
81 # each gets a unique name.
82 #
83 # Unforutunately NRPE limits check names to some 30 characters, so
84 # we need to mangle service names near the end.
85 class Nrpe
86         def initialize
87                 @checks = {}
88         end
89
90         def make_name( name, check )
91                 name = name.tr_s("^a-zA-Z", "_").gsub("process", "ps")
92
93                 result = "#{ SHORTORG }_" + name[0,19]
94
95                 hash = ''
96                 skew = ''
97                 while (@checks.has_key?(result + hash))
98                         # hash it, so that we don't lose uniqeness by cutting it off
99                         hash = (check+skew).crypt("$1$")
100                         hash = hash[-5,5]  # 5 chars are enough
101                         hash.tr!("/", "_")
102                         skew += ' ' # change it a bit so the hash changes
103                 end
104                 result += hash
105                 return result      # max of 32 or so chars
106         end
107
108         def add( name, check )
109                 if @checks.has_value? check
110                         @checks.each_pair{ |key, value|
111                                 return key if value == check
112                         }
113                 end
114                 key = make_name(name, check)
115                 @checks[ key ] = check
116                 return key
117         end
118
119         def checks
120                 return @checks
121         end
122 end
123 $nrpe = Nrpe.new()
124
125
126 # Prints the keys and values of hash to a file
127 # This is the function that prints the bodies of most our
128 # host/service/etc definitions
129 #
130 # It skips over such keys as are listed in exclude_keys
131 # and also skips private keys (those starting with an underscore)
132 def print_block(fd, kind, hash, exclude_keys)
133         fd.puts "define #{kind} {"
134         hash.each_pair{ |key, value|
135                 next if key == '_extinfo'
136                 next if key == '_hostgroups'
137                 next if key == '_memberlist'
138                 next if exclude_keys.include? key
139                 fd.puts "       #{key}          #{value}"
140         }
141         fd.puts "}"
142         fd.puts
143 end
144
145 def merge_contacts(host, service)
146         %w{contacts contact_groups}.each{ |k|
147                 contacts = []
148                 [host, service].each{ |source|
149                         contacts.push source[k] if source.has_key?(k)
150                 }
151                 service[k] = contacts.join(",") unless contacts.empty?
152         }
153 end
154
155 # Add the service definition service to hosts
156 # f is the file for service definitions, deps the file for dependencies
157 def addService(hosts, service, files, servers)
158
159         set_if_unset        service, 'use'               , SERVICE_TEMPLATE_NAME
160         set_if_unset        service, 'max_check_attempts', MAX_CHECK_ATTEMPTS_DEFAULT
161
162         service['max_check_attempts'] = MAX_CHECK_ATTEMPTS_DEFAULT + service['max_check_attempts'] if service['max_check_attempts'] < 0
163
164         if service['nrpe']
165                 throw "We already have a check_command (#{service['check_command']}) but we are in the NRPE block (nrpe: #{service['nrpe']})."+
166                         "  This should have been caught much earlier" if service.has_key?('check_command');
167
168                 check = $nrpe.add(service['service_description'], service['nrpe'])
169                 service['check_command'] = "#{ NRPE_CHECKNAME }!#{ check }"
170
171                 service['depends'] = ensure_array( service['depends'] )
172                 unless service['service_description'] == NRPE_PROCESS_SERVICE
173                         # Depend on the load check to reduce noise
174                         service['depends'] << NRPE_LOAD_CHECK unless service['service_description'] == NRPE_LOAD_CHECK
175                         # Depend on NRPE unless we are it
176                         service['depends'] << NRPE_PROCESS_SERVICE
177                 end
178         end
179
180         hosts.each{ |host|
181                 s = service.clone
182                 set_complain_if_set s, 'host_name', host, 'Service', s['service_description']
183                 merge_contacts(servers[host], s)
184
185                 print_block files['services'], 'service', s, %w(nrpe runfrom remotecheck
186                                                                 depends
187                                                                 hosts hostgroups excludehosts excludehostgroups)
188         }
189
190         if service['depends']
191                 service['depends'].each{ |prerequisite|
192                         hosts.each{ |host|
193                                 prerequisite_host = host
194                                 pre = prerequisite
195                                 # split off a hostname if there's one
196                                 bananasplit = prerequisite.split(':')
197                                 if bananasplit.size == 2
198                                         prerequisite_host = bananasplit[0]
199                                         pre = bananasplit[1]
200                                 elsif bananasplit.size > 2
201                                         throw "Cannot prase prerequisite #{prerequisite} for service #{service['service_description']} into host:service"
202                                 end
203                                 dependency = {
204                                         'host_name'                     => prerequisite_host,
205                                         'service_description'           => pre,
206                                         'dependent_host_name'           => host,
207                                         'dependent_service_description' => service['service_description'],
208                                         'execution_failure_criteria'    => 'n',
209                                         'notification_failure_criteria' => 'w,u,c'
210                                 };
211                                 print_block files['dependencies'], 'servicedependency', dependency, %w()
212                         }
213                 }
214         end
215
216
217         set_complain_if_set service['_extinfo'], 'service_description' , service['service_description'], 'serviceextinfo', service['service_description']
218         set_complain_if_set service['_extinfo'], 'host_name'           , hosts.join(',')               , 'serviceextinfo', service['service_description']
219
220         print_block files['serviceextinfo'], 'serviceextinfo', service['_extinfo'], %w()
221 end
222
223 # hostlists in services can be given as both, single hosts and hostgroups
224 # This functinn merges hostgroups and a simple list of hosts
225 #
226 # it also takes a prefix so that it can be used for excludelists as well
227 def merge_hosts_and_hostgroups(service, servers, hostgroups, prefix)
228         hosts = []
229         hosts = service[prefix+'hosts'].split(/,/).map{ |x| x.strip } if service[prefix+'hosts']
230         hosts.each{ |host|
231                 throw "host #{host} does not exist - used in service #{service['service_description']}" unless servers[host]
232         };
233         if service[prefix+'hostgroups']
234                 service[prefix+'hostgroups'].split(/,/).map{ |x| x.strip }.each{ |hg|
235                         throw "hostgroup #{hg} does not exist - used in service #{service['service_description']}" unless hostgroups[hg]
236                         hosts = hosts.concat hostgroups[hg]['_memberlist']
237                 }
238         end
239
240         return hosts
241 end
242
243 # Figure out the hosts a given service applies to
244 #
245 # For a given service find the list of hosts minus excluded hosts that this service runs on
246 def find_hosts(service, servers, hostgroups)
247         hosts        = merge_hosts_and_hostgroups service, servers, hostgroups, ''
248         excludehosts = merge_hosts_and_hostgroups service, servers, hostgroups, 'exclude'
249
250         excludehosts.uniq.each{ |host|
251                 if hosts.delete(host) == nil
252                         puts "info: [service #{service['service_description']}] not excluding host #{host} - it's not included anyway."
253                 end
254         }
255
256         return hosts
257 end
258
259 # Move all elements that have a key that starts with "extinfo-"
260 # into the _extinfo subhash
261 def split_away_extinfo(hash)
262         hash['_extinfo'] = {}
263         hash.keys.each{ |key|
264                 if key[0, 8] == 'extinfo-'
265                         hash['_extinfo'][ key[8, key.length-8] ] = hash[key]
266                         hash.delete(key);
267                 end
268         }
269 end
270
271
272 #############################################################################################
273 #############################################################################################
274 #############################################################################################
275
276 # Load the config
277 config = YAML::load( File.open( 'nagios-master.cfg' ) )
278
279 files = {}
280 # Remove old created files
281 nagios_filename.each_pair{ |name, filename|
282         files[name] = File.new(filename, "w")
283 }
284
285 #################################
286 # create a few hostgroups
287 #################################
288 # create the "all" and "pingable" hostgroups
289 config['hostgroups']['all'] = {}
290 config['hostgroups']['all']['alias'] = "all servers"
291 config['hostgroups']['all']['private'] = true
292 config['hostgroups']['pingable'] = {}
293 config['hostgroups']['pingable']['alias'] = "pingable servers"
294 config['hostgroups']['pingable']['private'] = true
295
296 config['hostgroups'].each_pair{ |name, hg|
297         throw "Empty hostgroup or hostgroup #{name} not a hash" unless hg.kind_of?(Hash)
298         split_away_extinfo hg
299
300         hg['_memberlist'] = []
301 }
302
303 config['servers'].each_pair{ |name, server|
304         throw "Empty server or server #{name} not a hash" unless server.kind_of?(Hash)
305
306         split_away_extinfo server
307
308         throw "No hostgroups defined for #{name}" unless server['hostgroups']
309         server['_hostgroups'] = server['hostgroups'].split(/,/).map{ |x| x.strip };
310         server['_hostgroups'] << 'all'
311         server['_hostgroups'] << 'pingable' unless server['pingable'] == false
312
313         server['_hostgroups'].each{ |hg|
314                 throw "Hostgroup #{hg} is not defined" unless config['hostgroups'].has_key?(hg)
315                 config['hostgroups'][hg]['_memberlist'] << name
316         };
317 }
318
319 config['servicegroups'] = {} unless config.has_key? 'servicegroups'
320
321 ##############
322 # HOSTS
323 ##############
324 config['servers'].each_pair{ |name, server|
325         # Formerly we used 'ip' instead of 'address' in our source file
326         # Handle this change but warn                                   XXX
327         if server.has_key?('ip')
328                 STDERR.puts("Host definition for #{name} has an 'ip' field.  Please use 'address' instead");
329                 server['address'] = server['ip'];
330                 server.delete('ip');
331         end
332
333         set_complain_if_set server, 'host_name'    , name, 'Host', name
334         set_if_unset        server, 'alias'        , name
335         set_if_unset        server, 'use'          , HOST_TEMPLATE_NAME
336         set_if_unset        server, 'check_command', HOST_ALIVE_CHECK    unless server['pingable'] == false
337
338         print_block files['hosts']      , 'host'       , server            , %w(hostgroups pingable)
339
340
341
342         # Handle hostextinfo
343         #config['hostgroups'][  server['_hostgroups'].first  ]['_extinfo'].each_pair{ |k, v|
344         # find the first hostgroup that has extinfo
345         extinfo = server['_hostgroups'].collect{ |hgname | config['hostgroups'][hgname]['_extinfo'] }.delete_if{ |ei| ei.size == 0 }.first
346         if extinfo then
347                 extinfo.each_pair do |k, v|
348                         # substitute hostname into the notes_url
349                         v = sprintf(v,name) if k == 'notes_url'
350
351                         set_if_unset server['_extinfo'], k ,v
352                 end
353         end
354
355         set_complain_if_set server['_extinfo'], 'host_name'       , name, 'hostextinfo', name
356         set_if_unset        server['_extinfo'], 'vrml_image'      , server['_extinfo']['icon_image'] if server['_extinfo'].has_key?('icon_image')
357         set_if_unset        server['_extinfo'], 'statusmap_image' , server['_extinfo']['icon_image'] if server['_extinfo'].has_key?('icon_image')
358
359         print_block files['hostextinfo'], 'hostextinfo', server['_extinfo'], %w()
360 }
361
362
363
364 ##############
365 # HOSTGROUPS
366 ##############
367 config['hostgroups'].each_pair{ |name, hg|
368         next if hg['private']
369         throw "Empty hostgroup #{name}" if hg['_memberlist'].length == 0
370
371         set_complain_if_set hg, 'hostgroup_name', name                       , 'Hostgroup', name
372         set_complain_if_set hg, 'members'       , hg['_memberlist'].join(","), 'Hostgroup', name
373
374         print_block files['hostgroups'], 'hostgroup', hg, %w()
375 }
376
377
378 ##############
379 # SERVICES and DEPENDENCIES
380 ##############
381 config['services'].each{ |service|
382         throw "Empty service or service not a hash" unless service.kind_of?(Hash)
383
384         split_away_extinfo service
385
386
387         # Both 'name' and 'service_description' are valid for a service's name
388         # Internally we only use service_description as that's nagios' official term
389         if service.has_key?('name')
390                 throw "Service definition has both a name (#{service['name']})" +
391                       "and a service_description (#{service['service_description']})" if service.has_key?('service_description')
392                 #STDERR.puts("Service definition #{service['name']} has a 'name' field.  Please use 'service_description' instead");
393                 service['service_description'] = service['name'];
394                 service.delete('name');
395         end
396         # Both 'check' and 'check_command' are valid for a service's check command
397         # Internally we only use check_command as that's nagios' official term
398         if service.has_key?('check')
399                 throw "Service definition has both a check (#{service['check']})" +
400                       "and a check_command (#{service['check_command']})" if service.has_key?('check_command')
401                 #STDERR.puts("Service definition #{service['service_description']} has a 'check' field.  Please use 'check_command' instead");
402                 service['check_command'] = service['check'];
403                 service.delete('check');
404         end
405
406
407         hosts = find_hosts service, config['servers'], config['hostgroups']
408         throw "no hosts for service #{service['service_description']}" if hosts.empty?
409
410         throw "nrpe, check, and remotecheck are mutually exclusive in service #{service['service_description']}" if 
411                 (service['nrpe'] ? 1 : 0) +
412                 (service['check_command'] ? 1 : 0) +
413                 (service['remotecheck'] ? 1 : 0)  >= 2
414
415         if service['runfrom'] && service['remotecheck']
416                 # If the service check is to be run from a remote monitor server ("relay")
417                 # add that as an NRPE check to be run on the relay and make this
418                 # service also depend on NRPE on the relay
419                 relay = service['runfrom']
420
421                 hosts.each{ |host|
422                         # how to recursively copy this thing?
423                         hostservice = YAML::load( service.to_yaml )
424                         host_ip = config['servers'][host]['address']
425
426                         remotecheck = hostservice['remotecheck']
427                         if remotecheck.include?('$HOSTADDRESS$') and not host_ip
428                                 throw "For some reason I do not have an address for #{host} yet I need it for the remote check.  This shouldn't be."
429                         end
430                         remotecheck.gsub!(/\$HOSTADDRESS\$/, host_ip) if host_ip
431                         remotecheck.gsub!(/\$HOSTNAME\$/, host)
432                         check = $nrpe.add("#{host}_#{hostservice['service_description']}", remotecheck)
433                         hostservice['check_command'] = "#{NRPE_CHECKNAME_HOST}!#{ config['servers'][ relay ]['address'] }!#{ check }"
434
435                         # Make sure dependencies are an array.  If there are none, create an empty array
436                         # if depends is just a string, make a list with just that element
437                         hostservice['depends'] = ensure_array( hostservice['depends'] )
438                         # And append this new dependency
439                         hostservice['depends'] << "#{ relay }:#{ NRPE_PROCESS_SERVICE }";
440
441                         addService( [ host ], hostservice, files, config['servers'])
442                 }
443         elsif service['runfrom'] || service['remotecheck']
444                 throw "runfrom and remotecheck must either appear both or not at all in service #{service['service_description']}"
445                 throw "must not remotecheck without runfrom" if service['remotecheck']
446         else
447                 addService(hosts, service, files, config['servers'])
448         end
449 }
450
451
452
453 ##############
454 # SERVICEGROUPS
455 ##############
456 config['servicegroups'].each_pair{ |name, sg|
457         set_complain_if_set sg, 'servicegroup_name', name                       , 'Servicegroup', name
458
459         print_block files['servicegroups'], 'servicegroup', sg, %w()
460 }
461
462
463 ##############
464 # NRPE config file
465 ##############
466 $nrpe.checks.each_pair{ |name, check|
467         files['nrpe'].puts "command[#{ name }]=#{ check }"
468 }
469
470