[project @ peter@palfrader.org-20080421155517-09ricm83llf2x8ds]
[mirror/dsa-nagios.git] / build-nagios
1 #!/usr/bin/ruby
2
3 # Copyright (c) 2004, 2005, 2006, 2007, 2008 Peter Palfrader <peter@palfrader.org>
4
5 require "yaml"
6
7 ORG="dsa"
8 SHORTORG="dsa"
9 GENERATED_PREFIX="./generated/"
10
11 nagios_filename = {};
12 %w(hosts hostgroups services dependencies hostextinfo serviceextinfo servicegroups).each{
13         |x| nagios_filename[x] = GENERATED_PREFIX+"auto-#{x}.cfg"
14 }
15 nagios_filename['nrpe'] = GENERATED_PREFIX+"nrpe_#{ ORG }.cfg"
16
17
18 MAX_CHECK_ATTEMPTS_DEFAULT=6
19
20 NRPE_CHECKNAME="#{ ORG }_check_nrpe"           # check that takes one argument:  service name to be checked
21 NRPE_CHECKNAME_HOST="#{ ORG }_check_nrpe_host" # check that takes two arguments: relay host on which to run check, service name to be checked
22
23 HOST_TEMPLATE_NAME='generic-host'          # host templates that all our host definitions use
24 SERVICE_TEMPLATE_NAME='generic-service'    # host templates that all our host definitions use
25 HOST_ALIVE_CHECK='check-host-alive'        # host alive check if server is pingable
26 NRPE_PROCESS_SERVICE='process - nrpe'      # nrpe checks will depend on this one
27
28
29 def warn (msg)
30         STDERR.puts msg
31 end
32 def set_if_unset(hash, key, value)
33         hash[key] = value unless hash.has_key?(key)
34 end
35 def set_complain_if_set(hash, key, value, type, name)
36         throw "#{type} definition '#{name}' has '#{key}' already defined" if hash.has_key?(key)
37         hash[key] = value
38 end
39
40 # Make an array out of something.  If there is nothing, create an empty array
41 # if it is just a string, make a list with just that element, if it already is
42 # an array keep it.
43 def ensure_array(something)
44         if (something == nil)
45                 result = []
46         elsif something.kind_of?(String)
47                 result = [ something ]
48         elsif something.kind_of?(Array)
49                 result = something
50         else
51                 throw "Do now know how to make an array out of #{something}: " + something.to_yaml
52         end
53         return result
54 end
55
56
57 # This class keeps track of the checks done via NRPE and makes sure
58 # each gets a unique name.
59 #
60 # Unforutunately NRPE limits check names to some 30 characters, so
61 # we need to mangle service names near the end.
62 class Nrpe
63         def initialize
64                 @checks = {}
65         end
66
67         def make_name( name, check )
68                 name = name.tr_s("^a-zA-Z", "_").gsub("process", "ps")
69
70                 result = "#{ SHORTORG }_" + name[0,19]
71
72                 hash = ''
73                 skew = ''
74                 while (@checks.has_key?(result + hash))
75                         # hash it, so that we don't lose uniqeness by cutting it off
76                         hash = (check+skew).crypt("$1$")
77                         hash = hash[-5,5]  # 5 chars are enough
78                         hash.tr!("/", "_")
79                         skew += ' ' # change it a bit so the hash changes
80                 end
81                 result += hash
82                 return result      # max of 32 or so chars
83         end
84
85         def add( name, check )
86                 if @checks.has_value? check
87                         @checks.each_pair{ |key, value|
88                                 return key if value == check
89                         }
90                 end
91                 key = make_name(name, check)
92                 @checks[ key ] = check
93                 return key
94         end
95
96         def checks
97                 return @checks
98         end
99 end
100 $nrpe = Nrpe.new()
101
102
103 # Prints the keys and values of hash to a file
104 # This is the function that prints the bodies of most our
105 # host/service/etc definitions
106 #
107 # It skips over such keys as are listed in exclude_keys
108 # and also skips private keys (those starting with an underscre)
109 def print_block(fd, kind, hash, exclude_keys)
110         fd.puts "define #{kind} {"
111         hash.each_pair{ |key, value|
112                 next if key[0,1] == '_'
113                 next if exclude_keys.include? key
114                 fd.puts "       #{key}          #{value}"
115         }
116         fd.puts "}"
117         fd.puts
118 end
119
120
121 # Add the service definition service to hosts
122 # f is the file for service definitions, deps the file for dependencies
123 def addService(hosts, service, files, servers)
124
125         set_if_unset        service, 'use'               , SERVICE_TEMPLATE_NAME
126         set_if_unset        service, 'max_check_attempts', MAX_CHECK_ATTEMPTS_DEFAULT
127
128         service['max_check_attempts'] = MAX_CHECK_ATTEMPTS_DEFAULT + service['max_check_attempts'] if service['max_check_attempts'] < 0
129
130         if service['nrpe']
131                 throw "We already have a check_command (#{service['check_command']}) but we are in the NRPE block (nrpe: #{service['nrpe']})."+
132                         "  This should have been caught much earlier" if service.has_key?('check_command');
133
134                 check = $nrpe.add(service['service_description'], service['nrpe'])
135                 service['check_command'] = "#{ NRPE_CHECKNAME }!#{ check }"
136
137                 service['depends'] = ensure_array( service['depends'] )
138                 service['depends'] << NRPE_PROCESS_SERVICE unless service['service_description'] == NRPE_PROCESS_SERVICE  # Depend on NRPE unless we are it
139         end
140
141         hosts.each{ |host|
142                 s = service.clone
143                 set_complain_if_set s, 'host_name', host, 'Service', s['service_description']
144                 %w{contacts contact_groups}.each{ |k|
145                         set_if_unset(s, k, servers[host][k]) if servers[host].has_key? k
146                 }
147
148                 print_block files['services'], 'service', s, %w(nrpe runfrom remotecheck
149                                                                 depends
150                                                                 hosts hostgroups excludehosts excludehostgroups)
151         }
152
153         if service['depends']
154                 service['depends'].each{ |prerequisite|
155                         hosts.each{ |host|
156                                 prerequisite_host = host
157                                 pre = prerequisite
158                                 # split off a hostname if there's one
159                                 bananasplit = prerequisite.split(':')
160                                 if bananasplit.size == 2
161                                         prerequisite_host = bananasplit[0]
162                                         pre = bananasplit[1]
163                                 elsif bananasplit.size > 2
164                                         throw "Cannot prase prerequisite #{prerequisite} for service #{service['service_description']} into host:service"
165                                 end
166                                 dependency = {
167                                         'host_name'                     => prerequisite_host,
168                                         'service_description'           => pre,
169                                         'dependent_host_name'           => host,
170                                         'dependent_service_description' => service['service_description'],
171                                         'execution_failure_criteria'    => 'n',
172                                         'notification_failure_criteria' => 'w,u,c'
173                                 };
174                                 print_block files['dependencies'], 'servicedependency', dependency, %w()
175                         }
176                 }
177         end
178
179
180         set_complain_if_set service['_extinfo'], 'service_description' , service['service_description'], 'serviceextinfo', service['service_description']
181         set_complain_if_set service['_extinfo'], 'host_name'           , hosts.join(',')               , 'serviceextinfo', service['service_description']
182
183         print_block files['serviceextinfo'], 'serviceextinfo', service['_extinfo'], %w()
184 end
185
186 # hostlists in services can be given as both, single hosts and hostgroups
187 # This functinn merges hostgroups and a simple list of hosts
188 #
189 # it also takes a prefix so that it can be used for excludelists as well
190 def merge_hosts_and_hostgroups(service, servers, hostgroups, prefix)
191         hosts = []
192         hosts = service[prefix+'hosts'].split(/,/).map{ |x| x.strip } if service[prefix+'hosts']
193         hosts.each{ |host|
194                 throw "host #{host} does not exist - used in service #{service['service_description']}" unless servers[host]
195         };
196         if service[prefix+'hostgroups']
197                 service[prefix+'hostgroups'].split(/,/).map{ |x| x.strip }.each{ |hg|
198                         throw "hostgroup #{hg} does not exist - used in service #{service['service_description']}" unless hostgroups[hg]
199                         hosts = hosts.concat hostgroups[hg]['_memberlist']
200                 }
201         end
202
203         return hosts
204 end
205
206 # Figure out the hosts a given service applies to
207 #
208 # For a given service find the list of hosts minus excluded hosts that this service runs on
209 def find_hosts(service, servers, hostgroups)
210         hosts        = merge_hosts_and_hostgroups service, servers, hostgroups, ''
211         excludehosts = merge_hosts_and_hostgroups service, servers, hostgroups, 'exclude'
212
213         excludehosts.each{ |host|
214                 if hosts.delete(host) == nil
215                         throw "Cannot remove host #{host} from service #{service['service_description']}: it's not included anyway or excluded twice."
216                 end
217         }
218
219         return hosts
220 end
221
222 # Move all elements that have a key that starts with "extinfo-"
223 # into the _extinfo subhash
224 def split_away_extinfo(hash)
225         hash['_extinfo'] = {}
226         hash.keys.each{ |key|
227                 if key[0, 8] == 'extinfo-'
228                         hash['_extinfo'][ key[8, key.length-8] ] = hash[key]
229                         hash.delete(key);
230                 end
231         }
232 end
233
234
235 #############################################################################################
236 #############################################################################################
237 #############################################################################################
238
239 # Load the config
240 config = YAML::load( File.open( 'nagios-master.cfg' ) )
241
242 files = {}
243 # Remove old created files
244 nagios_filename.each_pair{ |name, filename|
245         files[name] = File.new(filename, "w")
246 }
247
248 #################################
249 # create a few hostgroups
250 #################################
251 # create the "all" and "pingable" hostgroups
252 config['hostgroups']['all'] = {}
253 config['hostgroups']['all']['alias'] = "all servers"
254 config['hostgroups']['all']['private'] = true
255 config['hostgroups']['pingable'] = {}
256 config['hostgroups']['pingable']['alias'] = "pingable servers"
257 config['hostgroups']['pingable']['private'] = true
258
259 config['hostgroups'].each_pair{ |name, hg|
260         throw "Empty hostgroup or hostgroup #{name} not a hash" unless hg.kind_of?(Hash)
261         split_away_extinfo hg
262
263         hg['_memberlist'] = []
264 }
265
266 config['servers'].each_pair{ |name, server|
267         throw "Empty server or server #{name} not a hash" unless server.kind_of?(Hash)
268
269         split_away_extinfo server
270
271         throw "No hostgroups defined for #{name}" unless server['hostgroups']
272         server['_hostgroups'] = server['hostgroups'].split(/,/).map{ |x| x.strip };
273         server['_hostgroups'] << 'all'
274         server['_hostgroups'] << 'pingable' unless server['pingable'] == false
275
276         server['_hostgroups'].each{ |hg|
277                 throw "Hostgroup #{hg} is not defined" unless config['hostgroups'].has_key?(hg)
278                 config['hostgroups'][hg]['_memberlist'] << name
279         };
280 }
281
282 config['servicegroups'] = {} unless config.has_key? 'servicegroups'
283
284 ##############
285 # HOSTS
286 ##############
287 config['servers'].each_pair{ |name, server|
288         # Formerly we used 'ip' instead of 'address' in our source file
289         # Handle this change but warn                                   XXX
290         if server.has_key?('ip')
291                 STDERR.puts("Host definition for #{name} has an 'ip' field.  Please use 'address' instead");
292                 server['address'] = server['ip'];
293                 server.delete('ip');
294         end
295
296         set_complain_if_set server, 'host_name'    , name, 'Host', name
297         set_if_unset        server, 'alias'        , name
298         set_if_unset        server, 'use'          , HOST_TEMPLATE_NAME
299         set_if_unset        server, 'check_command', HOST_ALIVE_CHECK    unless server['pingable'] == false
300
301         print_block files['hosts']      , 'host'       , server            , %w(hostgroups pingable)
302
303
304
305         # Handle hostextinfo
306         #config['hostgroups'][  server['_hostgroups'].first  ]['_extinfo'].each_pair{ |k, v|
307         # find the first hostgroup that has extinfo
308         extinfo = server['_hostgroups'].collect{ |hgname | config['hostgroups'][hgname]['_extinfo'] }.delete_if{ |ei| ei.size == 0 }.first
309         if extinfo then
310                 extinfo.each_pair do |k, v|
311                         # substitute hostname into the notes_url
312                         v = sprintf(v,name) if k == 'notes_url'
313
314                         set_if_unset server['_extinfo'], k ,v
315                 end
316         end
317
318         set_complain_if_set server['_extinfo'], 'host_name'       , name, 'hostextinfo', name
319         set_if_unset        server['_extinfo'], 'vrml_image'      , server['_extinfo']['icon_image'] if server['_extinfo'].has_key?('icon_image')
320         set_if_unset        server['_extinfo'], 'statusmap_image' , server['_extinfo']['icon_image'] if server['_extinfo'].has_key?('icon_image')
321
322         print_block files['hostextinfo'], 'hostextinfo', server['_extinfo'], %w()
323 }
324
325
326
327 ##############
328 # HOSTGROUPS
329 ##############
330 config['hostgroups'].each_pair{ |name, hg|
331         next if hg['private']
332
333         set_complain_if_set hg, 'hostgroup_name', name                       , 'Hostgroup', name
334         set_complain_if_set hg, 'members'       , hg['_memberlist'].join(","), 'Hostgroup', name
335
336         print_block files['hostgroups'], 'hostgroup', hg, %w()
337 }
338
339
340 ##############
341 # SERVICES and DEPENDENCIES
342 ##############
343 config['services'].each{ |service|
344         throw "Empty service or service not a hash" unless service.kind_of?(Hash)
345
346         split_away_extinfo service
347
348
349         # Both 'name' and 'service_description' are valid for a service's name
350         # Internally we only use service_description as that's nagios' official term
351         if service.has_key?('name')
352                 throw "Service definition has both a name (#{service['name']})" +
353                       "and a service_description (#{service['service_description']})" if service.has_key?('service_description')
354                 #STDERR.puts("Service definition #{service['name']} has a 'name' field.  Please use 'service_description' instead");
355                 service['service_description'] = service['name'];
356                 service.delete('name');
357         end
358         # Both 'check' and 'check_command' are valid for a service's check command
359         # Internally we only use check_command as that's nagios' official term
360         if service.has_key?('check')
361                 throw "Service definition has both a check (#{service['check']})" +
362                       "and a check_command (#{service['check_command']})" if service.has_key?('check_command')
363                 #STDERR.puts("Service definition #{service['service_description']} has a 'check' field.  Please use 'check_command' instead");
364                 service['check_command'] = service['check'];
365                 service.delete('check');
366         end
367
368
369         hosts = find_hosts service, config['servers'], config['hostgroups']
370         throw "no hosts for service #{service['service_description']}" if hosts.empty?
371
372         throw "nrpe, check, and remotecheck are mutually exclusive in service #{service['service_description']}" if 
373                 (service['nrpe'] ? 1 : 0) +
374                 (service['check_command'] ? 1 : 0) +
375                 (service['remotecheck'] ? 1 : 0)  >= 2
376
377         if service['runfrom'] && service['remotecheck']
378                 # If the service check is to be run from a remote monitor server ("relay")
379                 # add that as an NRPE check to be run on the relay and make this
380                 # service also depend on NRPE on the relay
381                 relay = service['runfrom']
382
383                 hosts.each{ |host|
384                         # how to recursively copy this thing?
385                         hostservice = YAML::load( service.to_yaml )
386                         host_ip = config['servers'][host]['address']
387                         throw "For some reason I do not have an address for #{host}.  This shouldn't be." unless host_ip
388
389                         check = $nrpe.add("#{host}_#{hostservice['service_description']}", hostservice['remotecheck'].gsub(/\$HOSTADDRESS\$/, host_ip))
390                         hostservice['check_command'] = "#{NRPE_CHECKNAME_HOST}!#{ config['servers'][ relay ]['address'] }!#{ check }"
391
392                         # Make sure dependencies are an array.  If there are none, create an empty array
393                         # if depends is just a string, make a list with just that element
394                         hostservice['depends'] = ensure_array( hostservice['depends'] )
395                         # And append this new dependency
396                         hostservice['depends'] << "#{ relay }:#{ NRPE_PROCESS_SERVICE }";
397
398                         addService( [ host ], hostservice, files, config['servers'])
399                 }
400         elsif service['runfrom'] || service['remotecheck']
401                 throw "runfrom and remotecheck must either appear both or not at all in service #{service['service_description']}"
402                 throw "must not remotecheck without runfrom" if service['remotecheck']
403         else
404                 addService(hosts, service, files, config['servers'])
405         end
406 }
407
408
409
410 ##############
411 # SERVICEGROUPS
412 ##############
413 config['servicegroups'].each_pair{ |name, sg|
414         set_complain_if_set sg, 'servicegroup_name', name                       , 'Servicegroup', name
415
416         print_block files['servicegroups'], 'servicegroup', sg, %w()
417 }
418
419
420 ##############
421 # NRPE config file
422 ##############
423 $nrpe.checks.each_pair{ |name, check|
424         files['nrpe'].puts "command[#{ name }]=#{ check }"
425 }
426
427