#!/usr/bin/perl -w # Version 1.107 use strict; use usleep; my $usage="usage: check4spam [-dhju] [-ADSpt <#>] [-acins ] [-bg ] [-P ]\n"; my $safecat="/usr/local/bin/safecat"; my $threshold=3; my $changesubject=0; my $debug=-t STDERR; my $dir=""; my $spamdir=""; my $maxspamsize=-1; my $autospamsize=-1; my $maxprobsize=-1; my $maxdnsdemerits=3; my $sender=""; # Will be set to From: email address my @lines; my @badmailer=(); my @probprog=(); my %headers=(); my %badfrom=(); my %badip=(); my %nocheck=(); my %skip=(); parseargs(@ARGV); die "check4spam: no spamdir specified; aborting\n" unless($spamdir); my $line; @lines=keys %nocheck; %nocheck=(); while(@lines) { open(NOCHECK,"<".shift @lines) or die "open: $!"; while(defined($line=)) { chomp $line; $nocheck{$1}=1 while($line =~ /(\d+\.\d+\.\d+\.(\d+|\*))/g); } close(NOCHECK); } @lines=keys %skip; %skip=(); while(@lines) { open(SKIP,"<".shift @lines) or die "open: $!"; while(defined($line=)) { chomp $line; $skip{$1}=1 while($line =~ /(\d+\.\d+\.\d+\.(\d+|\*))/g); } close(SKIP); } @lines=keys %badfrom; %badfrom=(); while(@lines) { open(BADFROM,"<".shift @lines) or die "open: $!"; while(defined($line=)) { chomp $line; $badfrom{lc $1}=(defined($2)?$2:$1) if($line =~ /^(\S+)(?:\s+(.*))?$/s); } close(BADFROM); } @lines=keys %badip; %badip=(); while(@lines) { open(BADIP,"<".shift @lines) or die "open: $!"; while(defined($line=)) { chomp $line; $badip{$1}=1 while($line =~ /(\d+\.\d+\.\d+\.(\d+|\*))/g); } close(BADIP); } @lines=@badmailer; @badmailer=(); while(@lines) { open(BADMAILER,"<".shift @lines) or die "open: $!"; while(defined($line=)) { chomp $line; push @badmailer,$1 if($line =~ /^\s*(.*)\s*$/s); } close(BADMAILER); } my ($ip,$classc,$revip,$curlinelen,@tmp); my $subjnum=-1; my $endhead=-1; my $foundcleanone=0; my $demerits=0; my $dnsdemerits=0; my $maxlinelen_notms=0; my $maxlinelen=0; my $emailsize=-1; my $bodyread=0; my $firsthost=1; my $header=":"; my @heads=(); my %notes=(); my %ipschecked=(); @tmp=stat(STDIN); $emailsize=$tmp[7] if(@tmp); while(defined($line=)) { push @lines,$line; $line =~ s/\r?\n$//s; if(not length $line) { $endhead=$#lines; last; } if($line =~ /^(\S+):\s*(.*)$/s) { $header=lc($1); if(exists $headers{lc $1}) { $headers{$header}.=" $2"; } else { $headers{$header}=$2; } } elsif($line =~ /^\S/s) { $endhead=$#lines; push @lines,"\n",pop @lines; last; } $curlinelen=0 unless($line =~ /^\s/s); $curlinelen+=length $line; $maxlinelen=$curlinelen if($maxlinelen<$curlinelen); $maxlinelen_notms=$curlinelen if($maxlinelen_notms<$curlinelen and "x-microsoft-exchange-diagnostics" ne $header and "x-ymail-osg" ne $header); $subjnum=$#lines if($subjnum==-1 and "subject" eq $header); # We used to do a generic IP-anywhere-in-the-line search: # if($line =~ /^Received: .*?\b(\d+)\.(\d+)\.(\d+)\.(\d+)\b/i) { # But it was fooled so now we only look for some known formats: if($line =~ /^Received: .*?[@(](\d+)\.(\d+)\.(\d+)\.(\d+)\)$/i or $line =~ /^Received: .*?\(\S+ \[(\d+)\.(\d+)\.(\d+)\.(\d+)\]\)$/i or $line =~ /^Received: from \S* \(\[(\d+)\.(\d+)\.(\d+)\.(\d+)\]/i or $line =~ /^Received: from.*?\(\[(\d+)\.(\d+)\.(\d+)\.(\d+)\]/i or $line =~ /^Received: from \[(\d+)\.(\d+)\.(\d+)\.(\d+)\] \(helo=.*\)$/i # And if all those fail we try a limited version of the original: or $line =~ /^Received: .*?\b(\d+)\.(\d+)\.(\d+)\.(\d+)(?:$|[^0-9.])/i) { next unless($1<256 and $2<256 and $3<256 and $4<256 and $1 != 10 and ($1 != 192 or $2 != 168) and ($1 != 172 or $2<16 or $2>31)); # Some mail servers have a version number that looks like an IP address: next if(($1 and "0" eq substr($1,0,1)) or ($2 and "0" eq substr($2,0,1)) or ($3 and "0" eq substr($3,0,1)) or ($4 and "0" eq substr($4,0,1))); $ip="$1.$2.$3.$4"; next if($ipschecked{$ip}); $ipschecked{$ip}=1; $classc="$1.$2.$3.*"; $revip="$4.$3.$2.$1"; next if($skip{$ip} or $skip{$classc}); if($nocheck{$ip} or $nocheck{$classc}) { $foundcleanone=1; } else { my $notes="X-SpamCheck-$ip:"; $line=&checkrevdns($ip); unless($line) { $notes.=" DNS"; if($foundcleanone) { $notes.="(ignored)"; $dnsdemerits=1 if($dnsdemerits<1); } else { $notes{"{DNS} "}=1; $dnsdemerits+=2; } } $notes{"{badip} "}=1,$notes.=" badip",$dnsdemerits+=3 if($badip{$ip} or $badip{$classc}); # $line=&checkrevip("$revip.dialups.mail-abuse.org."); # a.k.a. dul.maps.vix.com. # if($line) { # $notes.=" DUL"; # if($foundcleanone) { # $notes.="(ignored)"; # $dnsdemerits=1 if($dnsdemerits<1); # } else { # $notes{"{DUL} "}=1; # $dnsdemerits+=2; # } # } # $line=&checkrevip("$revip.relays.mail-abuse.org."); # $notes{"{RSS} "}=1,$notes.=" RSS",$dnsdemerits+=3 if($line); # $line=&checkrevip("$revip.blackholes.mail-abuse.org."); # $notes{"{BHMA} "}=1,$notes.=" BHMA",$dnsdemerits+=3 if($line); $line=&checkrevip("$revip.bl.spamcop.net."); $notes{"{SC} "}=1,$notes.=" SC",$dnsdemerits+=3 if($line); if($firsthost) { $line=&checkrevip("$revip.zen.spamhaus.org."); if($line) { $line =~ s/^127\.0\.0\.(\d+)$/$1/s; $notes{"{ZSH} "}=1,$notes.=" ZSH($line)"; $demerits+=(10<=$line and $line<=11)?2:3; } } $line=&checkrevip("$revip.blackholes.easynet.nl."); $notes{"{BEN} "}=1,$notes.=" BEN",$dnsdemerits+=3 if($line); $line=&checkrevip("$revip.dynablock.easynet.nl."); if($line) { $notes.=" DEN"; if($foundcleanone) { $notes.="(ignored)"; $dnsdemerits=1 if($dnsdemerits<1); } else { $notes{"{DEN} "}=1; $dnsdemerits+=2; } } $line=&checkrevip("$revip.spam.pedantic.org."); $notes{"{SPO} "}=1,$notes.=" SPO",$dnsdemerits+=3 if($line); # $line=&checkrevip("$revip.netblock.pedantic.org."); # if($line) { # $notes.=" NPO"; # if($foundcleanone) { # $notes.="(ignored)"; # $dnsdemerits=1 if($dnsdemerits<1); # } else { # $notes{"{NPO} "}=1; # $dnsdemerits+=2; # } # } $foundcleanone=1 if(substr($notes,-1) eq ":"); $notes.="\n"; push @heads,$notes; } $firsthost=0; } elsif($line =~ /^(?:To|From):[ \t]+.*?<([^<>]+)>/i) { $sender=$1 if(-1()]+)$/i) { $sender=$1 if(-1/i) { $sender=""; push @heads,"X-SpamCheck-from: blank from\n"; $notes{"{badfrom} "}=1; $demerits+=3; } elsif($line =~ /^Content-Type:[ \t]+text\/html/i) { push @heads,"X-SpamCheck-Content-Type: HTML only\n"; $notes{"{HTML} "}=1; $demerits+=2; } elsif($line =~ /^Content-Type:.*charset ?="?(iso-2022-jp|Big5|euc-jp)/i) { push @heads,"X-SpamCheck-Content-Type: Japanese\n"; $notes{"{CT=JP} "}=1; $demerits+=3; } elsif($line =~ /^Content-Type:.*charset ?="?(koi8-r)/i) { push @heads,"X-SpamCheck-Content-Type: Russian\n"; $notes{"{CT=KR} "}=1; $demerits+=3; } elsif($line =~ /^Content-Type:.*charset ?="?(gb2312|ks_c_5601-1987)/i) { push @heads,"X-SpamCheck-Content-Type: charset\n"; $notes{"{CT=CS} "}=1; $demerits+=3; } elsif($line =~ /^Content-Type:.*charset ?="?(windows-1251)/i) { push @heads,"X-SpamCheck-Content-Type: Windows\n"; $notes{"{CT=W} "}=1; $demerits+=2; } elsif($line =~ /^Content-Transfer-Encoding:.*(base64)/i) { push @heads,"X-SpamCheck-Content-Transfer-Encoding: $1\n"; $notes{"{CTE} "}=1; $demerits+=1; } elsif($line =~ /^X-Mailer:\s+(.*)$/i) { $line=$1; foreach my $tmp (@badmailer) { if($line =~ /$tmp/is) { push @heads,"X-SpamCheck-X-Mailer: $line\n"; $notes{"{X-Mailer} "}=1; $demerits+=3; } } } elsif($line =~ /^X-Converted-To-Plain-Text:/i and not $notes{"{XCTPT} "}) { push @heads,"X-SpamCheck-XCTPT: XCTPT\n"; $notes{"{XCTPT} "}=1; $demerits+=1; } elsif($line =~ /^X-Bulkmail:/i and not $notes{"{XBM} "}) { push @heads,"X-SpamCheck-XBM: XBM\n"; $notes{"{XBM} "}=1; $demerits+=3; } } exit(99) if(-1==$endhead); if($maxlinelen_notms>1536) { push @heads,"X-SpamCheck-Line-Too-Long: $maxlinelen_notms\n"; $notes{"{X-LTL} "}=1; $demerits+=2; } elsif($maxlinelen>1536) { push @heads,"X-SpamCheck-Line-Too-Long: $maxlinelen (really $maxlinelen_notms)\n"; $notes{"{X-LTL} "}=1; $demerits+=0; } unless($headers{date}) { push @heads,"X-SpamCheck-nodate: nodate\n"; $notes{"{nodate} "}=1; $demerits+=2; } if(-1==$emailsize) { push @lines,; $bodyread=1; $emailsize=0; map {$emailsize+=length} @lines; } $dnsdemerits=$maxdnsdemerits if($dnsdemerits>$maxdnsdemerits); $demerits+=$dnsdemerits; if(-1 != $autospamsize and -1 != $emailsize and $autospamsize<$emailsize) { $demerits+=$threshold; } if(@probprog and $demerits<$threshold and (-1==$maxprobsize or $emailsize<=$maxprobsize)) { use FileHandle; use IPC::Open2; my $pid=open2(\*READER,\*WRITER,@probprog); WRITER->autoflush(); unless($bodyread) { push @lines,; $bodyread=1; } $SIG{PIPE}="IGNORE"; print WRITER @lines; close(WRITER); my $prob=; $prob=int(100*$prob); push @heads,"X-SpamProbability: $prob\%\n"; close(READER); if($prob==100) { $demerits+=1; } $demerits+=2 if($prob>97); $demerits-=1 if($prob<25); $demerits-=1 if($prob<3); } $line=join('',sort keys %notes); if($demerits>=$threshold) { ($dir,$spamdir)=($spamdir,$dir); push @heads,"X-IsSpam: $demerits\n"; exit 99 if(-1 != $maxspamsize and $maxspamsize<$emailsize); } elsif(not $dir) { exit 0; } if($changesubject) { if($subjnum>-1) { $lines[$subjnum] =~ s/^(Subject: )/$1$line/i; } else { push @heads,"Subject: $line\n"; } } splice @lines,$endhead,0,@heads; $endhead+=scalar(@heads); unless($bodyread) { push @lines,; $bodyread=1; } if($debug or $dir eq "-") { print @lines; } else { open(SAFECAT,"|$safecat '$dir/tmp' '$dir/new'") or exit 111; print SAFECAT @lines or exit 100; close(SAFECAT); # No exit here; we catch exit code later if($spamdir or $?>>8) { exit($?>>8); } exit(99); } sub checkrevip { my $revip=shift; print STDERR "Checking $revip..." if($debug); my $addr=gethostbyname($revip); if(defined $addr) { $addr=join('.',unpack("C".length($addr),$addr)); } else { $addr=""; } print STDERR "$addr\n" if($debug); return $addr; } sub checkrevdns { use Socket; my $addr=shift; my $ip=pack("C4",split /\./,$addr); print STDERR "Checking $addr..." if($debug); my $name=gethostbyaddr($ip,AF_INET); $name="" unless(defined $name); $name="" unless($name =~ /\./s); # 204.188.196.44 returns "No-RDNS-Record" print STDERR "$name\n" if($debug); return $name; } sub parseargs { my @args=@_; while(@args and ($args[0] =~ /^-(([ADPSabcgimnpst])|[dhj])$/s) and @args>($2?1:0)) { my $tmp=$1; shift @args; if($tmp eq "A") { $autospamsize=0+shift @args; } elsif($tmp eq "D") { $maxdnsdemerits=0+shift @args; } elsif($tmp eq "P") { @probprog=@args; @args=(); } elsif($tmp eq "S") { $maxspamsize=0+shift @args; } elsif($tmp eq "a") { $badfrom{shift @args}=1; } elsif($tmp eq "b") { die $usage if($spamdir); $spamdir=shift @args; } elsif($tmp eq "c") { open(CONFIG,"<".shift(@args)) or die "open: $!"; my @newargs=grep !/^#/,; close(CONFIG); chomp(@newargs); parseargs(@newargs); } elsif($tmp eq "d") { $debug=not $debug; } elsif($tmp eq "g") { die $usage if($dir); $dir=shift @args; } elsif($tmp eq "h") { showusage(); exit 0; } elsif($tmp eq "i") { $badip{shift @args}=1; } elsif($tmp eq "j") { $changesubject=not $changesubject; } elsif($tmp eq "m") { push @badmailer,shift @args; } elsif($tmp eq "n") { $nocheck{shift @args}=1; } elsif($tmp eq "p") { die $usage unless($args[0] =~ /^\d+$/s); $maxprobsize=0+shift @args; } elsif($tmp eq "s") { $skip{shift @args}=1; } elsif($tmp eq "t") { die $usage unless($args[0] =~ /^\d+$/s); $threshold=0+shift @args; } else { die $usage; } } shift @args if(@args and $args[0] eq "--"); die $usage if(@args); } sub showusage { print $usage,<<"EOD"; -A <#> Automatically mark as spam any emails of at least <#> bytes -D <#> Maximum number of demerits allowed due to DNS/RBL lookups -P Run as spam-probability filter (must be last args) -S <#> Throw away emails categorized as spam at least <#> bytes -a File containing list of bad "From:"s -b Directory in which to store mail categorized as spam -c Break up file by lines and insert into current command line -d Toggle debug mode -g Directory in which to store mail not categorized as spam -h Show this help message -i File containing list of bad IPs -j Prefix subject with spam categorization(s) -m File containing list of bad X-Mailers -n File containing list of IPs to ignore (intermediate MXs) -p <#> Don't run spam-probability filter on emails >= <#> bytes -s File containing list of IPs to not check (known good IPs) -t <#> Set demerit threshold to <#> EOD }