2 ## THIS FILE IS UNDER PUPPET CONTROL. DON'T EDIT IT HERE.
3 ## USE: git clone git+ssh://$USER@puppet.debian.org/srv/puppet.debian.org/git/dsa-puppet.git
6 # Copyright (c) 2006-2012 Erik Mugele. All rights reserved.
8 # Redistribution and use in source and binary forms, with or without
9 # modification, are permitted provided that the following conditions
11 # 1. Redistributions of source code must retain the above copyright
12 # notice, this list of conditions and the following disclaimer.
13 # 2. Redistributions in binary form must reproduce the above copyright
14 # notice, this list of conditions and the following disclaimer in the
15 # documentation and/or other materials provided with the distribution.
17 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 # Designed and written by Erik Mugele, 2004-2010,1http://www.teuton.org/~ejm
34 # Please see the following website for details on usage of
35 # this script: http://www.teuton.org/~ejm/exim_surbl
37 # The following variable is the full path to the file containing the
38 # two-level top level domains (TLD).
39 # ---------------------------------------------------------------------
40 # THIS VARIABLE MUST BE SET TO THE FULL PATH AND NAME OF THE FILE
41 # CONTAINING THE TWO LEVEL TLD!
42 # ---------------------------------------------------------------------
43 my $twotld_file = "/etc/exim4/two-level-tlds";
45 # The following variable is the full path to the file containing the
46 # three-level top level domains (TLD).
47 # ---------------------------------------------------------------------
48 # THIS VARIABLE MUST BE SET TO THE FULL PATH AND NAME OF THE FILE
49 # CONTAINING THE THREE LEVEL TLD!
50 # ---------------------------------------------------------------------
51 my $threetld_file = "/etc/exim4/three-level-tlds";
53 # The following variable is the full path to the file containing
55 # ---------------------------------------------------------------------
56 # THIS VARIABLE MUST BE SET TO THE FULL PATH AND NAME OF THE FILE
57 # CONTAINING THE WHITELIST DOMAINS!
58 # ---------------------------------------------------------------------
59 my $whitelist_file = "/etc/exim4/surbl_whitelist.txt";
61 # This variable defines the maximum MIME file size that will be checked
62 # if this script is called by the MIME ACL. This is primarily to
63 # keep the load down on the server. Size is in bytes.
64 my $max_file_size = 50000;
66 # The following variables enable or disable the SURBL, URIBL and DBL
67 # lookups. Set to 1 to enable and 0 to disable.
72 # Check to see if a decode MIME attachment is being checked or
73 # just a plain old text message with no attachments
75 my $mime_filename = Exim::expand_string('$mime_decoded_filename');
78 #warn ("MIME FILENAME: $mime_filename\n");
79 # If the MIME file is too large, skip it.
80 if (-s $mime_filename <= $max_file_size) {
81 open(fh,"<$mime_filename");
83 while (read(fh,$buff,1024)) {
91 $exim_body = Exim::expand_string('$message_body');
95 # This subroutine does the actual DNS lookup and builds and returns
96 # the return message for the SURBL lookup.
98 my $surbldomain = ".multi.surbl.org";
99 @dnsbladdr=gethostbyname($params[0].$surbldomain);
100 # If gethostbyname() returned anything, build a return message.
102 if (scalar(@dnsbladdr) != 0) {
103 $return_string = "Blacklisted URL in message. (".$params[0].") in";
104 @surblipaddr = unpack('C4',($dnsbladdr[4])[0]);
105 if ($surblipaddr[3] & 64) {
106 $return_string .= " [jp]";
108 if ($surblipaddr[3] & 32) {
109 $return_string .= " [ab]";
111 if ($surblipaddr[3] & 16) {
112 $return_string .= " [ob]";
114 if ($surblipaddr[3] & 8) {
115 $return_string .= " [ph]";
117 if ($surblipaddr[3] & 4) {
118 $return_string .= " [ws]";
120 if ($surblipaddr[3] & 2) {
121 $return_string .= " [sc]";
123 $return_string .= ". See http://www.surbl.org/lists.html.";
125 return $return_string;
129 # This subroutine does the actual DNS lookup and builds and returns
130 # the return message for the URIBL check.
132 my $uribldomain = ".black.uribl.com";
133 @dnsbladdr=gethostbyname($params[0].$uribldomain);
134 # If gethostbyname() returned anything, build a return message.
136 if (scalar(@dnsbladdr) != 0) {
137 $return_string = "Blacklisted URL in message. (".$params[0].") in";
138 @ipaddr = unpack('C4',($dnsbladdr[4])[0]);
139 if ($ipaddr[3] & 8) {
140 $return_string .= " [red]";
142 if ($ipaddr[3] & 4) {
143 $return_string .= " [grey]";
145 if ($ipaddr[3] & 2) {
146 $return_string .= " [black]";
148 $return_string .= ". See http://lookup.uribl.com.";
150 return $return_string;
154 # This subroutine does the actual DNS lookup and builds and returns
155 # the return message for the Spamhaus DBL check.
157 my $dbldomain = ".dbl.spamhaus.org";
158 @dnsbladdr=gethostbyname($params[0].$dbldomain);
159 # If gethostbyname() returned anything, build a return message.
161 if (scalar(@dnsbladdr) != 0) {
162 $return_string = "Blacklisted URL in message: ".$params[0];
163 $return_string .= ". See http://www.spamhaus.org/lookup.lasso?dnsbl=domain.";
165 return $return_string;
169 # This subroutine takes a list of domain parts
170 # (e.g. ["www","example","com"]) and a number (e.g. 2) and returns a
171 # the address of the given number of parts (e.g. example.com).
172 my $numparts = @_[-1];
175 my $address = $domain[-1];
176 for (my $i=2; $i<=$numparts; $i++) {
177 $address = $domain[-$i].".".$address;
183 # This subroutine converts two hex characters to an ASCII character.
184 # It is called when ASCII obfuscation or Printed-Quatable characters
185 # are found (i.e. %AE or =AE).
186 # It should return a converted/plain address after splitting off
187 # everything that isn't part of the address portion of the URL.
189 my $address = $ob_parts[0];
190 for (my $j=1; $j < scalar(@ob_parts); $j++) {
191 $address .= chr(hex(substr($ob_parts[$j],0,2)));
192 $address .= substr($ob_parts[$j],2,);
194 $address = (split(/[^A-Za-z0-9._\-]/,$address))[0];
203 # Find all the URLs in the message by finding the HTTP string
204 @parts = split(/[hH][tT][tT][pP](:|=3[aA])(\/|=2[Ff])(\/|=2[Ff])/,$exim_body);
205 if (scalar(@parts) > 1) {
206 # Read the entries from the two-level TLD file.
207 open (twotld_handle,$twotld_file) or die "Can't open $twotld_file.\n";
208 while (<twotld_handle>) {
209 next if (/^#/ || /^$/ || /^\s$/);
212 close (twotld_handle) or die "Close: $!\n";
213 # Read the entries from the three-level TLD file.
214 open (threetld_handle,$threetld_file) or die "Can't open $threetld_file.\n";
215 while (<threetld_handle>) {
216 next if (/^#/ || /^$/ || /^\s$/);
219 close (threetld_handle) or die "Close: $!\n";
220 # Read the entries from the whitelist file.
221 open (whitelist_handle,$whitelist_file) or die "Can't open $whitelist_file.\n";
222 while (<whitelist_handle>) {
223 next if (/^#/ || /^$/ || /^\s$/);
226 close (whitelist_handle) or die "Close: $!\n";
232 # Go through each of the HTTP parts that were found in the message
233 for ($i=1; $i < scalar(@parts); $i++) {
234 # Special case of Quoted Printable EOL marker
235 $parts[$i] =~ s/=\n//g;
237 # Split the parts and find the address portion of the URL.
238 # Address SHOULD be either a FQDN, IP address, or encoded address.
239 $address = (split(/[^A-Za-z0-9\._\-%=]/,$parts[$i]))[0];
241 # Check for an =. If it exists, we assume the URL is doing
242 # Quoted-Printable. Decode it and redefine $address
243 if ($address =~ /=/) {
244 @ob_parts = split(/=/,$address);
245 $address = converthex(@ob_parts);
248 # Check for a %. If it exists the URL is using % ASCII
249 # obfuscation. Decode it and redefine $address.
250 if ($address =~ /%/) {
251 @ob_parts = split(/%/,$address);
252 $address = converthex(@ob_parts);
255 # Convert the address to lower case.
256 $address = lc($address);
258 # Split the the address into the elements separated by periods.
259 @domain = split(/\./,$address);
261 # Check the length of the domain name. If less then two elements
262 # at this point it is probably bogus or there is a bug in one of
263 # the decoding/converting routines above.
264 if (scalar(@domain) >=2) {
265 $spamcheckdomain = "";
268 #warn ("FOUND DOMAIN: ".mkaddress(@domain,scalar(@domain))."\n");
270 # Domain has two or more than four elements.
271 if ((scalar(@domain) == 2) || (scalar(@domain) >=5)) {
272 # Add two elements of the domain to the list(s).
273 $spamcheckdomain=mkaddress(@domain,2);
274 # Check if $spamcheckdomain is not in the whitelist.
275 if (! grep(/^$spamcheckdomain$/i,@whitelist)) {
276 # If SURBL checks are enabled and the domain is
277 # not in the SURBL list, add it.
278 if (! grep(/^$spamcheckdomain$/i,@surbl_list) &&
279 ($surbl_enable == 1)) {
280 push(@surbl_list,$spamcheckdomain);
282 # If URIBL checks are enabled and the domain is
283 # not in the URIBL list, add it.
284 if (! grep(/^$spamcheckdomain$/i,@uribl_list) &&
285 ($uribl_enable == 1)) {
286 push(@uribl_list,$spamcheckdomain);
288 # If DBL checks are enabled and the domain is
289 # not in the DBL list, add it.
290 if (! grep(/^$spamcheckdomain$/i,@dbl_list) &&
291 ($dbl_enable == 1)) {
292 push(@dbl_list,$spamcheckdomain);
297 # Domain has three elements.
298 if (scalar(@domain) == 3) {
299 # Set $spamcheckdomain to two elements.
300 $spamcheckdomain = mkaddress(@domain,2);
301 $two_checkdomain = $spamcheckdomain;
302 if (grep(/^$spamcheckdomain$/i,@twotlds)) {
303 # $spamcheckdomain is in the two-level TLD list.
304 # Reset $spamcheckdomain to three elements.
305 $spamcheckdomain = mkaddress(@domain,3);
306 # Check if $spamcheckdomain is not in the whitelist.
307 if ((! grep(/^$spamcheckdomain$/i,@whitelist)) &&
308 (! grep(/^two_checkdomain$/i,@whitelist))) {
309 # If SURBL checks are enabled and the domain is
310 # not in the SURBL list, add it.
311 if (! grep(/^$spamcheckdomain$/i,@surbl_list) &&
312 ($surbl_enable == 1)) {
313 push(@surbl_list,$spamcheckdomain);
315 # If URIBL checks are enabled and the domain is
316 # not in the URIBL list, add it.
317 if (! grep(/^$spamcheckdomain$/i,@uribl_list) &&
318 ($uribl_enable == 1)) {
319 push(@uribl_list,$spamcheckdomain);
321 # If DBL checks are enabled and the domain is
322 # not in the DBL list, add it.
323 if (! grep(/^$spamcheckdomain$/i,@dbl_list) &&
324 ($dbl_enable == 1)) {
325 push(@dbl_list,$spamcheckdomain);
329 # $spamcheckdomain is not in the two-level TLD list.
330 # $spamcheckdomain is still two elements.
331 # Check if $spamcheckdomain is not in the whitelist.
332 if (! grep(/^$spamcheckdomain$/i,@whitelist)) {
333 # If SURBL checks are enabled and the domain is
334 # not in the SURBL list, add it.
335 if (! grep(/^$spamcheckdomain$/i,@surbl_list) &&
336 ($surbl_enable == 1)) {
337 push(@surbl_list,$spamcheckdomain);
339 # If URIBL checks are enabled and the domain is
340 # not in the URIBL list, add it.
341 if (! grep(/^$spamcheckdomain$/i,@uribl_list) &&
342 ($uribl_enable == 1)) {
343 push(@uribl_list,$spamcheckdomain);
345 # If DBL checks are enabled and the domain is
346 # not in the DBL list, add it.
347 if (! grep(/^$spamcheckdomain$/i,@dbl_list) &&
348 ($dbl_enable == 1)) {
349 push(@dbl_list,$spamcheckdomain);
352 # Reset $spamcheckdomain to three elements.
353 $spamcheckdomain = mkaddress(@domain,3);
354 # Check if $spamcheckdomain is not in the whitelist.
355 if ((! grep(/^$spamcheckdomain$/i,@whitelist)) &&
356 (! grep(/^$two_checkdomain$/i,@whitelist))) {
357 # If URIBL checks are enabled and the domain is
358 # not in the URIBL list, add it.
359 if (! grep(/^$spamcheckdomain$/i,@uribl_list) &&
360 ($uribl_enable == 1)) {
361 push(@uribl_list,$spamcheckdomain);
363 # If DBL checks are enabled and the domain is
364 # not in the DBL list, add it.
365 if (! grep(/^$spamcheckdomain$/i,@dbl_list) &&
366 ($dbl_enable == 1)) {
367 push(@dbl_list,$spamcheckdomain);
373 # Domain has four elements.
374 if (scalar(@domain) == 4) {
375 if ($domain[-1] =~ /^(\d){1,3}$/) {
376 # Domain is an IP address
377 # Set $spamcheckdomain to the IP address in reverse.
378 $spamcheckdomain = $domain[3].".".$domain[2].
379 ".".$domain[1].".".$domain[0];
381 # Do NOT check IP addresses against the Spamhaus DBL list.
383 # If SURBL checks are enabled and the IP is
384 # not in the SURBL list, add it.
385 if (! grep(/^$spamcheckdomain$/i,@surbl_list) &&
386 ($surbl_enable == 1)) {
387 push(@surbl_list,$spamcheckdomain);
389 # If URIBL checks are enabled and the IP is
390 # not in the URIBL list, add it.
391 if (! grep(/^$spamcheckdomain$/i,@uribl_list) &&
392 ($uribl_enable == 1)) {
393 push(@uribl_list,$spamcheckdomain);
396 # Domain is not an IP address.
397 # Check if the last three elements of the domain are
398 # in the three-level TLD list.
399 $three_checkdomain = mkaddress(@domain,3);
400 $two_checkdomain = mkaddress(@domain,2);
401 if (grep(/^$three_checkdomain$/i,@threetlds)) {
402 # Set $spamcheckdomain to four elements.
403 $spamcheckdomain = mkaddress(@domain,4);
404 # Check if $spamcheckdomain is not in the whitelist.
405 if ((! grep(/^$spamcheckdomain$/i,@whitelist)) &&
406 (! grep(/^three_checkdomain$/i,@whitelist)) &&
407 (! grep(/^two_checkdomain$/i,@whitelist))) {
408 # If SURBL checks are enabled and the domain is
409 # not in the SURBL list, add it.
410 if (! grep(/^$spamcheckdomain$/i,@surbl_list) &&
411 ($surbl_enable == 1)) {
412 push(@surbl_list,$spamcheckdomain);
414 # If URIBL checks are enabled and the domain is
415 # not in the URIBL list, add it.
416 if (! grep(/^$spamcheckdomain$/i,@uribl_list) &&
417 ($uribl_enable == 1)) {
418 push(@uribl_list,$spamcheckdomain);
420 # If DBL checks are enabled and the domain is
421 # not in the DBL list, add it.
422 if (! grep(/^$spamcheckdomain$/i,@dbl_list) &&
423 ($dbl_enable == 1)) {
424 push(@dbl_list,$spamcheckdomain);
429 # Check if the last two elements of the domain are
430 # in the two-level TLD list.
431 elsif (grep(/^$two_checkdomain$/i,@twotlds)) {
432 # Reset $spamcheckdomain to three elements.
433 $spamcheckdomain = mkaddress(@domain,3);
434 # Check if $spamcheckdomain is not in the whitelist.
435 if ((! grep(/^$spamcheckdomain$/i,@whitelist)) &&
436 (! grep(/^two_checkdomain$/i,@whitelist))) {
437 # If SURBL checks are enabled and the domain is
438 # not in the SURBL list, add it.
439 if (! grep(/^$spamcheckdomain$/i,@surbl_list) &&
440 ($surbl_enable == 1)) {
441 push(@surbl_list,$spamcheckdomain);
443 # If URIBL checks are enabled and the domain is
444 # not in the URIBL list, add it.
445 if (! grep(/^$spamcheckdomain$/i,@uribl_list) &&
446 ($uribl_enable == 1)) {
447 push(@uribl_list,$spamcheckdomain);
449 # If DBL checks are enabled and the domain is
450 # not in the DBL list, add it.
451 if (! grep(/^$spamcheckdomain$/i,@dbl_list) &&
452 ($dbl_enable == 1)) {
453 push(@dbl_list,$spamcheckdomain);
457 # Reset $spamcheckdomain to four elements.
458 $spamcheckdomain = mkaddress(@domain,4);
459 # Check if $spamcheckdomain is not in the whitelist.
460 if ((! grep(/^$spamcheckdomain$/i,@whitelist)) &&
461 (! grep(/^three_checkdomain$/i,@whitelist)) &&
462 (! grep(/^two_checkdomain$/i,@whitelist))) {
463 # If SURBL checks are enabled and the domain is
464 # not in the SURBL list, add it.
465 if (! grep(/^$spamcheckdomain$/i,@surbl_list) &&
466 ($surbl_enable == 1)) {
467 push(@surbl_list,$spamcheckdomain);
469 # If URIBL checks are enabled and the domain is
470 # not in the URIBL list, add it.
471 if (! grep(/^$spamcheckdomain$/i,@uribl_list) &&
472 ($uribl_enable == 1)) {
473 push(@uribl_list,$spamcheckdomain);
475 # If DBL checks are enabled and the domain is
476 # not in the DBL list, add it.
477 if (! grep(/^$spamcheckdomain$/i,@dbl_list) &&
478 ($dbl_enable == 1)) {
479 push(@dbl_list,$spamcheckdomain);
484 # Set $spamcheckdomain to two elements
485 $spamcheckdomain = mkaddress(@domain,2);
486 # Check if $spamcheckdomain is not in the whitelist.
487 if (! grep(/^$spamcheckdomain$/i,@whitelist)) {
488 # If SURBL checks are enabled and the domain is
489 # not in the SURBL list, add it.
490 if (! grep(/^$spamcheckdomain$/i,@surbl_list) &&
491 ($surbl_enable == 1)) {
492 push(@surbl_list,$spamcheckdomain);
494 # If URIBL checks are enabled and the domain is
495 # not in the URIBL list, add it.
496 if (! grep(/^$spamcheckdomain$/i,@uribl_list) &&
497 ($uribl_enable == 1)) {
498 push(@uribl_list,$spamcheckdomain);
500 # If DBL checks are enabled and the domain is
501 # not in the DBL list, add it.
502 if (! grep(/^$spamcheckdomain$/i,@dbl_list) &&
503 ($dbl_enable == 1)) {
504 push(@dbl_list,$spamcheckdomain);
507 # Reset $spamcheckdomain to three elements
508 $spamcheckdomain = mkaddress(@domain,3);
509 # Check if $spamcheckdomain is not in the whitelist.
510 if ((! grep(/^$spamcheckdomain$/i,@whitelist)) &&
511 (! grep(/^$two_checkdomain$/i,@whitelist))) {
512 # If URIBL checks are enabled and the domain is
513 # not in the URIBL list, add it.
514 if (! grep(/^$spamcheckdomain$/i,@uribl_list) &&
515 ($uribl_enable == 1)) {
516 push(@uribl_list,$spamcheckdomain);
518 # If DBL checks are enabled and the domain is
519 # not in the DBL list, add it.
520 if (! grep(/^$spamcheckdomain$/i,@dbl_list) &&
521 ($dbl_enable == 1)) {
522 push(@dbl_list,$spamcheckdomain);
525 # Set $spamcheckdomain to four elements
526 $spamcheckdomain = mkaddress(@domain,4);
527 # Check if $spamcheckdomain is not in the whitelist.
528 if ((! grep(/^$spamcheckdomain$/i,@whitelist)) &&
529 (! grep(/^$three_checkdomain$/i,@whitelist)) &&
530 (! grep(/^$two_checkdomain$/i,@whitelist))) {
531 # If URIBL checks are enabled and the domain is
532 # not in the URIBL list, add it.
533 if (! grep(/^$spamcheckdomain$/i,@uribl_list) &&
534 ($uribl_enable == 1)) {
535 push(@uribl_list,$spamcheckdomain);
537 # If DBL checks are enabled and the domain is
538 # not in the DBL list, add it.
539 if (! grep(/^$spamcheckdomain$/i,@dbl_list) &&
540 ($dbl_enable == 1)) {
541 push(@dbl_list,$spamcheckdomain);
545 } # End: if ($domain[-1] =~ /^(\d){1,3}$/)
546 } # End: if (scalar(@domain) == 4)
547 } # End: if (scalar(@domain) >=2)
548 } # End: for ($i=1; $i < scalar(@parts); $i++)
550 # If there are items in the SURBL list and the SURBL check
551 # is enabled then perform lookups on them.
552 if ((scalar(@surbl_list) > 0) &&
553 ($surbl_enable == 1)) {
554 foreach $i (@surbl_list) {
556 #warn ("CHECKING DOMAIN ($mime_filename): $i in SURBL list.\n");
557 $return_result = surbllookup($i);
558 if ($return_result ne "") {
559 return $return_result;
564 # If there are items in the URIBL list and the URIBL check
565 # is enabled and the previous lookup did not return a result
566 # then perform lookups on them.
567 if ((scalar(@uribl_list) > 0) &&
568 ($uribl_enable == 1) &&
569 ($return_result eq "")) {
570 foreach $i (@uribl_list) {
572 #warn ("CHECKING DOMAIN ($mime_filename): $i in URIBL list.\n");
573 $return_result = uribllookup($i);
574 if ($return_result ne "") {
575 return $return_result;
580 # If there are items in the DBL list and the DBL check
581 # is enabled and the previous lookups did not return a result
582 # then perform lookups on them.
583 if ((scalar(@dbl_list) > 0) &&
584 ($dbl_enable == 1) &&
585 ($return_result eq "")) {
586 foreach $i (@dbl_list) {
588 #warn ("CHECKING DOMAIN ($mime_filename): $i in DBL list.\n");
589 $return_result = dbllookup($i);
590 if ($return_result ne "") {
591 return $return_result;
595 } # End: if (scalar(@parts) > 1)
596 } # End: if ($exim_body)
598 # No URLs were found or the URLs that were found were not
599 # listed in any list so return false.
602 } # End Main: - sub surblspamcheck