[project @ peter@palfrader.org-20080203230950-4fr2onl7p8dgn7hm]
[mirror/dsa-nagios.git] / build-nagios
1 #!/usr/bin/ruby
2
3 # Copyright (c) 2004, 2005, 2006, 2007 Peter Palfrader <peter@palfrader.org>
4
5 require "yaml"
6
7 ORG="relativity"
8 SHORTORG="rela"
9 CONTACTGROUP="weaselgroup"
10 GENERATED_PREFIX="/etc/NOREPLY/generated/nagios/"
11
12 nagios_filename = {};
13 %w(hosts hostgroups services dependencies hostextinfo serviceextinfo).each{
14         |x| nagios_filename[x] = GENERATED_PREFIX+"auto-#{x}.cfg"
15 }
16 nagios_filename['nrpe'] = GENERATED_PREFIX+"nrpe_#{ ORG }.cfg"
17
18
19 MAX_CHECK_ATTEMPTS_DEFAULT=6
20
21 NRPE_CHECKNAME="#{ ORG }_check_nrpe"           # check that takes one argument:  service name to be checked
22 NRPE_CHECKNAME_HOST="#{ ORG }_check_nrpe_host" # check that takes two arguments: relay host on which to run check, service name to be checked
23
24 HOST_TEMPLATE_NAME='generic-host'          # host templates that all our host definitions use
25 SERVICE_TEMPLATE_NAME='generic-service'    # host templates that all our host definitions use
26 HOST_ALIVE_CHECK='check-host-alive'        # host alive check if server is pingable
27 NRPE_PROCESS_SERVICE='process - nrpe'      # nrpe checks will depend on this one
28
29
30 def warn (msg)
31         STDERR.puts msg
32 end
33 def set_if_unset(hash, key, value)
34         hash[key] = value unless hash.has_key?(key)
35 end
36 def set_complain_if_set(hash, key, value, type, name)
37         throw "#{type} definition '#{name}' has '#{key}' already defined" if hash.has_key?(key)
38         hash[key] = value
39 end
40
41 # Make an array out of something.  If there is nothing, create an empty array
42 # if it is just a string, make a list with just that element, if it already is
43 # an array keep it.
44 def ensure_array(something)
45         if (something == nil)
46                 result = []
47         elsif something.kind_of?(String)
48                 result = [ something ]
49         elsif something.kind_of?(Array)
50                 result = something
51         else
52                 throw "Do now know how to make an array out of #{something}: " + something.to_yaml
53         end
54         return result
55 end
56
57
58 # This class keeps track of the checks done via NRPE and makes sure
59 # each gets a unique name.
60 #
61 # Unforutunately NRPE limits check names to some 30 characters, so
62 # we need to mangle service names near the end.
63 class Nrpe
64         def initialize
65                 @checks = {}
66         end
67
68         def make_name( name, check )
69                 name = name.tr_s("^a-zA-Z", "_").gsub("process", "ps")
70
71                 result = "#{ SHORTORG }_" + name[0,19]
72
73                 hash = ''
74                 skew = ''
75                 while (@checks.has_key?(result + hash))
76                         # hash it, so that we don't lose uniqeness by cutting it off
77                         hash = (check+skew).crypt("$1$")
78                         hash = hash[-5,5]  # 5 chars are enough
79                         hash.tr!("/", "_")
80                         skew += ' ' # change it a bit so the hash changes
81                 end
82                 result += hash
83                 return result      # max of 32 or so chars
84         end
85
86         def add( name, check )
87                 if @checks.has_value? check
88                         @checks.each_pair{ |key, value|
89                                 return key if value == check
90                         }
91                 end
92                 key = make_name(name, check)
93                 @checks[ key ] = check
94                 return key
95         end
96
97         def checks
98                 return @checks
99         end
100 end
101 $nrpe = Nrpe.new()
102
103
104 # Prints the keys and values of hash to a file
105 # This is the function that prints the bodies of most our
106 # host/service/etc definitions
107 #
108 # It skips over such keys as are listed in exclude_keys
109 # and also skips private keys (those starting with an underscre)
110 def print_block(fd, kind, hash, exclude_keys)
111         fd.puts "define #{kind} {"
112         hash.each_pair{ |key, value|
113                 next if key[0,1] == '_'
114                 next if exclude_keys.include? key
115                 fd.puts "       #{key}          #{value}"
116         }
117         fd.puts "}"
118         fd.puts
119 end
120
121
122 # Add the service definition service to hosts
123 # f is the file for service definitions, deps the file for dependencies
124 def addService(hosts, service, files)
125
126         set_if_unset        service, 'use'               , SERVICE_TEMPLATE_NAME
127         set_complain_if_set service, 'host_name'         , hosts.join(',')      , 'Service', service['service_description']
128         set_if_unset        service, 'max_check_attempts', MAX_CHECK_ATTEMPTS_DEFAULT
129
130         service['max_check_attempts'] = MAX_CHECK_ATTEMPTS_DEFAULT + service['max_check_attempts'] if service['max_check_attempts'] < 0
131
132         if service['nrpe']
133                 throw "We already have a check_command (#{service['check_command']}) but we are in the NRPE block (nrpe: #{service['nrpe']})."+
134                         "  This should have been caught much earlier" if service.has_key?('check_command');
135
136                 check = $nrpe.add(service['service_description'], service['nrpe'])
137                 service['check_command'] = "#{ NRPE_CHECKNAME }!#{ check }"
138
139                 service['depends'] = ensure_array( service['depends'] )
140                 service['depends'] << NRPE_PROCESS_SERVICE unless service['service_description'] == NRPE_PROCESS_SERVICE  # Depend on NRPE unless we are it
141         end
142
143         print_block files['services'], 'service', service, %w(nrpe runfrom remotecheck
144                                                               depends
145                                                               hosts hostgroups excludehosts excludehostgroups)
146
147         if service['depends']
148                 service['depends'].each{ |prerequisite|
149                         hosts.each{ |host|
150                                 prerequisite_host = host
151                                 pre = prerequisite
152                                 # split off a hostname if there's one
153                                 bananasplit = prerequisite.split(':')
154                                 if bananasplit.size == 2
155                                         prerequisite_host = bananasplit[0]
156                                         pre = bananasplit[1]
157                                 elsif bananasplit.size > 2
158                                         throw "Cannot prase prerequisite #{prerequisite} for service #{service['service_description']} into host:service"
159                                 end
160                                 dependency = {
161                                         'host_name'                     => prerequisite_host,
162                                         'service_description'           => pre,
163                                         'dependent_host_name'           => host,
164                                         'dependent_service_description' => service['service_description'],
165                                         'execution_failure_criteria'    => 'n',
166                                         'notification_failure_criteria' => 'w,u,c'
167                                 };
168                                 print_block files['dependencies'], 'servicedependency', dependency, %w()
169                         }
170                 }
171         end
172
173
174         set_complain_if_set service['_extinfo'], 'service_description' , service['service_description'], 'serviceextinfo', service['service_description']
175         set_complain_if_set service['_extinfo'], 'host_name'           , hosts.join(',')               , 'serviceextinfo', service['service_description']
176
177         print_block files['serviceextinfo'], 'serviceextinfo', service['_extinfo'], %w()
178 end
179
180 # hostlists in services can be given as both, single hosts and hostgroups
181 # This functinn merges hostgroups and a simple list of hosts
182 #
183 # it also takes a prefix so that it can be used for excludelists as well
184 def merge_hosts_and_hostgroups(service, servers, hostgroups, prefix)
185         hosts = []
186         hosts = service[prefix+'hosts'].split(/,/).map{ |x| x.strip } if service[prefix+'hosts']
187         hosts.each{ |host|
188                 throw "host #{host} does not exist - used in service #{service['service_description']}" unless servers[host]
189         };
190         if service[prefix+'hostgroups']
191                 service[prefix+'hostgroups'].split(/,/).map{ |x| x.strip }.each{ |hg|
192                         throw "hostgroup #{hg} does not exist - used in service #{service['service_description']}" unless hostgroups[hg]
193                         hosts = hosts.concat hostgroups[hg]['_memberlist']
194                 }
195         end
196
197         return hosts
198 end
199
200 # Figure out the hosts a given service applies to
201 #
202 # For a given service find the list of hosts minus excluded hosts that this service runs on
203 def find_hosts(service, servers, hostgroups)
204         hosts        = merge_hosts_and_hostgroups service, servers, hostgroups, ''
205         excludehosts = merge_hosts_and_hostgroups service, servers, hostgroups, 'exclude'
206
207         excludehosts.each{ |host|
208                 if hosts.delete(host) == nil
209                         throw "Cannot remove host #{host} from service #{service['service_description']}: it's not included anyway or excluded twice."
210                 end
211         }
212
213         return hosts
214 end
215
216 # Move all elements that have a key that starts with "extinfo-"
217 # into the _extinfo subhash
218 def split_away_extinfo(hash)
219         hash['_extinfo'] = {}
220         hash.keys.each{ |key|
221                 if key[0, 8] == 'extinfo-'
222                         hash['_extinfo'][ key[8, key.length-8] ] = hash[key]
223                         hash.delete(key);
224                 end
225         }
226 end
227
228
229 #############################################################################################
230 #############################################################################################
231 #############################################################################################
232
233 # Load the config
234 config = YAML::load( File.open( 'nagios-master.cfg' ) )
235
236 files = {}
237 # Remove old created files
238 nagios_filename.each_pair{ |name, filename|
239         files[name] = File.new(filename, "w")
240 }
241
242 #################################
243 # create a few hostgroups
244 #################################
245 # create the "all" and "pingable" hostgroups
246 config['hostgroups']['all'] = {}
247 config['hostgroups']['all']['alias'] = "all servers"
248 config['hostgroups']['pingable'] = {}
249 config['hostgroups']['pingable']['alias'] = "pingable servers"
250
251 config['hostgroups'].each_pair{ |name, hg|
252         throw "Empty hostgroup or hostgroup #{name} not a hash" unless hg.kind_of?(Hash)
253         split_away_extinfo hg
254
255         hg['_memberlist'] = []
256 }
257
258 config['servers'].each_pair{ |name, server|
259         throw "Empty server or server #{name} not a hash" unless server.kind_of?(Hash)
260
261         split_away_extinfo server
262
263         throw "No hostgroups defined for #{name}" unless server['hostgroups']
264         server['_hostgroups'] = server['hostgroups'].split(/,/).map{ |x| x.strip };
265         server['_hostgroups'] << 'all'
266         server['_hostgroups'] << 'pingable' unless server['pingable'] == false
267
268         server['_hostgroups'].each{ |hg|
269                 throw "Hostgroup #{hg} is not defined" unless config['hostgroups'].has_key?(hg)
270                 config['hostgroups'][hg]['_memberlist'] << name
271         };
272 }
273
274 ##############
275 # HOSTS
276 ##############
277 config['servers'].each_pair{ |name, server|
278         # Formerly we used 'ip' instead of 'address' in our source file
279         # Handle this change but warn                                   XXX
280         if server.has_key?('ip')
281                 STDERR.puts("Host definition for #{name} has an 'ip' field.  Please use 'address' instead");
282                 server['address'] = server['ip'];
283                 server.delete('ip');
284         end
285
286         set_complain_if_set server, 'host_name'    , name, 'Host', name
287         set_if_unset        server, 'alias'        , name
288         set_if_unset        server, 'use'          , HOST_TEMPLATE_NAME
289         set_if_unset        server, 'check_command', HOST_ALIVE_CHECK    unless server['pingable'] == false
290
291         print_block files['hosts']      , 'host'       , server            , %w(hostgroups pingable)
292
293
294
295         # Handle hostextinfo
296         config['hostgroups'][  server['_hostgroups'].first  ]['_extinfo'].each_pair{ |k, v|
297                 # substitute hostname into the notes_url
298                 v = sprintf(v,name) if k == 'notes_url'
299
300                 set_if_unset server['_extinfo'], k ,v
301         }
302
303         set_complain_if_set server['_extinfo'], 'host_name'       , name, 'hostextinfo', name
304         set_if_unset        server['_extinfo'], 'vrml_image'      , server['_extinfo']['icon_image'] if server['_extinfo'].has_key?('icon_image')
305         set_if_unset        server['_extinfo'], 'statusmap_image' , server['_extinfo']['icon_image'] if server['_extinfo'].has_key?('icon_image')
306
307         print_block files['hostextinfo'], 'hostextinfo', server['_extinfo'], %w()
308 }
309
310
311
312 ##############
313 # HOSTGROUPS
314 ##############
315 config['hostgroups'].each_pair{ |name, hg|
316         next if hg['private']
317
318         set_complain_if_set hg, 'hostgroup_name', name                       , 'Hostgroup', name
319         set_complain_if_set hg, 'members'       , hg['_memberlist'].join(","), 'Hostgroup', name
320         set_if_unset        hg, 'contact_groups', CONTACTGROUP
321
322         print_block files['hostgroups'], 'hostgroup', hg, %w()
323 }
324
325
326 ##############
327 # SERVICES and DEPENDENCIES
328 ##############
329 config['services'].each{ |service|
330         throw "Empty service or service not a hash" unless service.kind_of?(Hash)
331
332         split_away_extinfo service
333
334
335         # Both 'name' and 'service_description' are valid for a service's name
336         # Internally we only use service_description as that's nagios' official term
337         if service.has_key?('name')
338                 throw "Service definition has both a name (#{service['name']})" +
339                       "and a service_description (#{service['service_description']})" if service.has_key?('service_description')
340                 #STDERR.puts("Service definition #{service['name']} has a 'name' field.  Please use 'service_description' instead");
341                 service['service_description'] = service['name'];
342                 service.delete('name');
343         end
344         # Both 'check' and 'check_command' are valid for a service's check command
345         # Internally we only use check_command as that's nagios' official term
346         if service.has_key?('check')
347                 throw "Service definition has both a check (#{service['check']})" +
348                       "and a check_command (#{service['check_command']})" if service.has_key?('check_command')
349                 #STDERR.puts("Service definition #{service['service_description']} has a 'check' field.  Please use 'check_command' instead");
350                 service['check_command'] = service['check'];
351                 service.delete('check');
352         end
353
354
355         hosts = find_hosts service, config['servers'], config['hostgroups']
356         throw "no hosts for service #{service['service_description']}" if hosts.empty?
357
358         throw "nrpe, check, and remotecheck are mutually exclusive in service #{service['service_description']}" if 
359                 (service['nrpe'] ? 1 : 0) +
360                 (service['check_command'] ? 1 : 0) +
361                 (service['remotecheck'] ? 1 : 0)  >= 2
362
363         if service['runfrom'] && service['remotecheck']
364                 # If the service check is to be run from a remote monitor server ("relay")
365                 # add that as an NRPE check to be run on the relay and make this
366                 # service also depend on NRPE on the relay
367                 relay = service['runfrom']
368
369                 hosts.each{ |host|
370                         # how to recursively copy this thing?
371                         hostservice = YAML::load( service.to_yaml )
372                         host_ip = config['servers'][host]['address']
373                         throw "For some reason I do not have an address for #{host}.  This shouldn't be." unless host_ip
374
375                         check = $nrpe.add("#{host}_#{hostservice['service_description']}", hostservice['remotecheck'].gsub(/\$HOSTADDRESS\$/, host_ip))
376                         hostservice['check_command'] = "#{NRPE_CHECKNAME_HOST}!#{ config['servers'][ relay ]['address'] }!#{ check }"
377
378                         # Make sure dependencies are an array.  If there are none, create an empty array
379                         # if depends is just a string, make a list with just that element
380                         hostservice['depends'] = ensure_array( hostservice['depends'] )
381                         # And append this new dependency
382                         hostservice['depends'] << "#{ relay }:#{ NRPE_PROCESS_SERVICE }";
383
384                         addService( [ host ], hostservice, files)
385                 }
386         elsif service['runfrom'] || service['remotecheck']
387                 throw "runfrom and remotecheck must either appear both or not at all in service #{service['service_description']}"
388                 throw "must not remotecheck without runfrom" if service['remotecheck']
389         else
390                 addService(hosts, service, files)
391         end
392 }
393
394
395 ##############
396 # NRPE config file
397 ##############
398 $nrpe.checks.each_pair{ |name, check|
399         files['nrpe'].puts "command[#{ name }]=#{ check }"
400 }
401
402