2 ## THIS FILE IS UNDER PUPPET CONTROL. DON'T EDIT IT HERE.
3 ## USE: git clone git+ssh://$USER@puppet.debian.org/srv/puppet.debian.org/git/dsa-puppet.git
6 # Copyright (c) 2006-2007 Erik Mugele. All rights reserved.
8 # Redistribution and use in source and binary forms, with or without
9 # modification, are permitted provided that the following conditions
11 # 1. Redistributions of source code must retain the above copyright
12 # notice, this list of conditions and the following disclaimer.
13 # 2. Redistributions in binary form must reproduce the above copyright
14 # notice, this list of conditions and the following disclaimer in the
15 # documentation and/or other materials provided with the distribution.
17 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 # 1. This script makes use of the Country Code Top Level
32 # Domains (ccTLD) provided by the SURBL group at
33 # http://spamcheck.freeapp.net/two-level-tlds
34 # THE VARIABLE $cctld_file MUST BE SET TO THE FULL PATH AND
35 # NAME OF THE FILE CONTAINING THE CCTLD LIST! (see below)
37 # 2. This script makes use of whitelisting of popular domains. The
38 # source of the list can be found here:
39 # http://spamassassin.apache.org/full/3.1.x/dist/rules/25_uribl.cf
40 # These are domains that are whitelisted by the SURBL group so it
41 # doesn't make sense to waste resources doing lookups on them.
42 # THE VARIABLE $whitelist_file MUST BE SET TO THE FULL PATH AND
43 # NAME OF THE FILE CONTAINING THE WHITE LIST! (see below)
45 # 3. Per the guidelines at http://www.surbl.org, if your site processes
46 # more than 100,000 messages per day, you should NOT be using the
47 # public SURBL name servers but should be rsync-ing from them and
48 # running your own. See http://www3.surbl.org/rsync-signup.html
52 # Designed and written by Erik Mugele, 2004-2006
53 # http://www.teuton.org/~ejm
56 # The following variable is the full path to the file containing the
57 # list of Country Code Top Level Domains (ccTLD).
58 # ---------------------------------------------------------------------
59 # THIS VARIABLE MUST BE SET TO THE FULL PATH AND NAME OF THE FILE
60 # CONTAINING THE CCTLD LIST!
61 # ---------------------------------------------------------------------
62 my $cctld_file = "/etc/exim4/ccTLD.txt";
64 # The following variable is the full path to the file containing
66 # ---------------------------------------------------------------------
67 # THIS VARIABLE MUST BE SET TO THE FULL PATH AND NAME OF THE FILE
68 # CONTAINING THE WHITELIST DOMAINS!
69 # ---------------------------------------------------------------------
70 my $whitelist_file = "/etc/exim4/surbl_whitelist.txt";
72 # This variable defines the maximum MIME file size that will be checked
73 # if this script is called by the MIME ACL. This is primarily to
74 # keep the load down on the server. Size is in bytes.
75 my $max_file_size = 50000;
77 # The following two variables enable or disable the SURBL and URIBL
78 # lookups. Set to 1 to enable and 0 to disable.
82 # Check to see if a decode MIME attachment is being checked or
83 # just a plain old text message with no attachments
85 my $mime_filename = Exim::expand_string('$mime_decoded_filename');
88 #warn ("MIME FILENAME: $mime_filename\n");
89 # If the MIME file is too large, skip it.
90 if (-s $mime_filename <= $max_file_size) {
91 open(fh,"<$mime_filename");
93 while (read(fh,$buff,1024)) {
101 $exim_body = Exim::expand_string('$message_body');
105 # This subroutine does the actual DNS lookup and builds and returns
106 # the return message for the SURBL lookup.
108 my $surbldomain = ".multi.surbl.org";
109 @dnsbladdr=gethostbyname($params[0].$surbldomain);
110 # If gethostbyname() returned anything, build a return message.
112 if (scalar(@dnsbladdr) != 0) {
113 $return_string = "Blacklisted URL in message. (".$params[0].") in";
114 @surblipaddr = unpack('C4',($dnsbladdr[4])[0]);
115 if ($surblipaddr[3] & 64) {
116 $return_string .= " [jp]";
118 if ($surblipaddr[3] & 32) {
119 $return_string .= " [ab]";
121 if ($surblipaddr[3] & 16) {
122 $return_string .= " [ob]";
124 if ($surblipaddr[3] & 8) {
125 $return_string .= " [ph]";
127 if ($surblipaddr[3] & 4) {
128 $return_string .= " [ws]";
130 if ($surblipaddr[3] & 2) {
131 $return_string .= " [sc]";
133 $return_string .= ". See http://www.surbl.org/lists.html.";
135 return $return_string;
139 # This subroutine does the actual DNS lookup and builds and returns
140 # the return message for the URIBL check.
142 my $surbldomain = ".black.uribl.com";
143 @dnsbladdr=gethostbyname($params[0].$surbldomain);
144 # If gethostbyname() returned anything, build a return message.
146 if (scalar(@dnsbladdr) != 0) {
147 $return_string = "Blacklisted URL in message. (".$params[0].") in";
148 @surblipaddr = unpack('C4',($dnsbladdr[4])[0]);
149 if ($surblipaddr[3] & 8) {
150 $return_string .= " [red]";
152 if ($surblipaddr[3] & 4) {
153 $return_string .= " [grey]";
155 if ($surblipaddr[3] & 2) {
156 $return_string .= " [black]";
158 $return_string .= ". See http://lookup.uribl.com.";
160 return $return_string;
164 # This subroutin converts two hex characters to an ASCII character.
165 # It is called when ASCII obfuscation or Printed-Quatable characters
166 # are found (i.e. %AE or =AE).
167 # It should return a converted/plain address after splitting off
168 # everything that isn't part of the address portion of the URL.
170 my $address = $ob_parts[0];
171 for (my $j=1; $j < scalar(@ob_parts); $j++) {
172 $address .= chr(hex(substr($ob_parts[$j],0,2)));
173 $address .= substr($ob_parts[$j],2,);
175 $address = (split(/[^A-Za-z0-9._\-]/,$address))[0];
184 # Find all the URLs in the message by finding the HTTP string
185 @parts = split /[hH][tT][tT][pP]:\/\//,$exim_body;
186 if (scalar(@parts) > 1) {
187 # Read the entries from the ccTLD file.
188 open (cctld_handle,$cctld_file) or die "Can't open $cctld_file.\n";
189 while (<cctld_handle>) {
190 next if (/^#/ || /^$/ || /^\s$/);
193 close (cctld_handle) or die "Close: $!\n";
194 # Read the entries from the whitelist file.
195 open (whitelist_handle,$whitelist_file) or die "Can't open $whitelist_file.\n";
196 while (<whitelist_handle>) {
197 next if (/^#/ || /^$/ || /^\s$/);
200 close (whitelist_handle) or die "Close: $!\n";
201 # Go through each of the HTTP parts that were found in the message
202 for ($i=1; $i < scalar(@parts); $i++) {
203 # Special case of Quoted Printable EOL marker
204 $parts[$i] =~ s/=\n//g;
205 # Split the parts and find the address portion of the URL.
206 # Address SHOULD be either a FQDN, IP address, or encoded address.
207 $address = (split(/[^A-Za-z0-9\._\-%=]/,$parts[$i]))[0];
208 # Check for an =. If it exists, we assume the URL is doing
209 # Quoted-Printable. Decode it and redine $address
210 if ($address =~ /=/) {
211 @ob_parts = split /=/,$address;
212 $address = converthex(@ob_parts);
214 # Check for a %. If it exists the URL is using % ASCII
215 # obfuscation. Decode it and redefine $address.
216 if ($address =~ /%/) {
217 @ob_parts = split /%/,$address;
218 $address = converthex(@ob_parts);
220 # Split the the address into the elements separated by periods.
221 @domain = split /\./,$address;
222 # Check the length of the domain name. If less then two elements
223 # at this point it is probably bogus or there is a bug in one of
224 # the decoding/converting routines above.
225 if (scalar(@domain) >= 2) {
227 # By default, assume that the domain check is on a
228 # "standard" two level domain
229 $spamcheckdomain=$domain[-2].".".$domain[-1];
230 # Check for a two level domain
231 if (((scalar(@domain) == 2) || (scalar(@domain) >= 5)) &&
232 (grep(/^$spamcheckdomain$/i,@cctlds))) {
233 $return_result="cctld";
235 # Check for a three level domain
236 if (scalar(@domain) == 3) {
237 if (grep(/^$spamcheckdomain$/i,@cctlds)) {
238 $spamcheckdomain=$domain[-3].".".$spamcheckdomain;
239 if (grep(/^$spamcheckdomain$/,@cctlds)) {
240 $return_result="cctld";
244 # Check for a four level domain
245 if (scalar(@domain) == 4) {
246 # Check to see if the domain is an IP address
247 if ($domain[-1] =~ /[a-zA-Z]/) {
248 if (grep(/^$spamcheckdomain$/i,@cctlds)) {
249 $spamcheckdomain=$domain[-3].".".$spamcheckdomain;
250 if (grep(/^$spamcheckdomain$/i,@cctlds)) {
251 $spamcheckdomain=$domain[-4].".".$spamcheckdomain;
255 # Domain is an IP address
256 $spamcheckdomain=$domain[3].".".$domain[2].
257 ".".$domain[1].".".$domain[0];
261 #warn ("FOUND DOMAIN ($mime_filename): $spamcheckdomain\n");
262 # If whitelisting is enabled check domain against the
264 if ($whitelist_file ne "") {
265 foreach $whitelist_entry (@whitelist) {
266 chomp($whitelist_entry);
267 if ($spamcheckdomain =~ m/^$whitelist_entry$/i) {
268 $return_result="whitelisted";
273 # If the domain is whitelisted or in the cctld skip adding
274 # it to the lookup list.
275 if ($return_result eq "") {
276 if (scalar(@lookupdomains) > 0) {
277 # Check so see if the domain already is in the list.
278 if (not grep(/^$spamcheckdomain$/i,@lookupdomains)) {
279 push(@lookupdomains,$spamcheckdomain);
282 push(@lookupdomains,$spamcheckdomain);
287 # If there are items in the lookupdomains list then
288 # perform lookups on them. If there are not, something is wrong
289 # and just return false. There should always be something in the list.
290 if (scalar(@lookupdomains) > 0) {
291 foreach $i (@lookupdomains) {
293 #warn ("CHECKING DOMAIN ($mime_filename): $i\n");
294 # If SURBL lookups are enabled do an SURBL lookup
295 if ($surbl_enable == 1) {
296 $return_result = surbllookup($i);
298 # If URIBL lookups are enabled and the SURBL lookup failed
300 if (($uribl_enable == 1) && ($return_result eq "")) {
301 $return_result = uribllookup($i);
303 # If we got a hit return the result to Exim
304 if ($return_result ne "") {
307 return $return_result;
313 # We didn't find any URLs or the URLs we did find were not
314 # listed so return false.