#!/usr/bin/perl
#
# Simple script to find blacklist candidates.
#
# Written by Simon Carr <code@simoncarr.com>, 2004
#
# Parses Postfix 1.x logs for variables.  upon identification
# it creates an epoch timestamped file for examination by 
# the other script called execute.
#

#### Configuration variables. 

# This is how many times someone can hit us with a 
# rejected peice of mail before we take action
$thresh0 = 100; # 25 is low, 200 is high.  I like between 50 and 100

# Our logfile, of course.  
$file = '/Users/scarr/tmp/mail.log'; 

# Verbosity.  Either on or off. 
$verbose = 1; 
# Debugging. Either on or off, or not there at all! 
#$debug = 1; 

# This is where we store the files created by the script. 
$temp_dir = "/Users/scarr/tmp/hitterblock";

## Positive score and negative score defined here.  Positive should have a
# large number, negative should have a smaller number, incremental should have
# the smallest number because we haven't uniquely identified why they are being
# blocked.  Sub and exc scores should be negative numbers.
#
# Something you should know about this; Each score type adds on, it doesn't just match once 
# per line then moves on.  So let's say below we've got a spammer who's sending to 12@12.com, but
# they're also in an RBL so the log reads "blocked using".  That'll be a score of 4 rather than 2.
# If they're also doing it from a clearly identified DSL line, it'll be a score of 6.
#
$posscore = 2; 
$negscore = 0.1; 
$subscore = -0.1;
$excscore = -100; 
$incremental = 0.05;

## Here we define what we're looking for other than 
# the reject: mail.
## Positive means we score $posscore based on these variables.  We have 
# high confidence that these are spammers.
@positive = (
"blocked\ using",  # RBL caught it
"12\.com",  # Oft spammed domain
"agin\.com", # Oft spammed domain
"([0-9]{1,3})\-([0-9]{1,3})\-([0-9]{1,3})" # matches 64-252-43 for example, for DSL hosts and the like.
);

## Negative means we score $negscore based on these variables, trying to 
# cut down on false positives.
@negative = (
"Domain\ not\ found",  # Pretty innocent error, could come from badly configured apache.
"Access\ denied", # Some goof trying to send from an @hotmail.com addy outside of hotmail, for example.
"need\ fully\-qualified\ address", # Sorta like domain not found.  
);

## Subtract means we'll actually remove points from the score
# if the IP matches any of these.  
@subtract = ( 
"testestets"  #placeholder
);

## These are outright exceptions.  We never want to see hosts that match these blocked.
# so we'll subtract a big number from the score.
@exceptions = (
"redhat.com",
"bellnexxia.net",
#"rogers.com",
"apache.org",
"apple.com",
"sourceforge.net"
); 

## Configurations end here. 

#### -- MAIN -- ####

## We open the mail log
open(FH,"$file") or die "can't open $file $!"; 

## This loop goes on forever, or at least 'till the log is rotated, then it
# just sits there.  Keep that in mind for any automation attempts.
for (;;) {
    while (<FH>) { 
		$string = $_; 
		## We split the log output based on spaces here.
		@crap = split(" ",$string);

		if($crap[5] =~ /reject\:/) { 
			## In this instance $crap[8] will hold data like; 
			# adsl-68-73-204-114.dsl.sfldmi.ameritech.net[68.73.204.114]:
			# for example, so we cut the data up based on the brackets. 
			@fozzy = split("\\[|\\]",$crap[8]); 
			
			## We pass the string to the score calculating subroutine here
			# if the score is eg 0 we know we haven't already created a blackhole file.
			if($count{$fozzy[1]} >= 0){$score = calculate_score($string);}#if
			
			## So $fozzy[1] is the external IP, which becomes a variable
			# in a hash.  The value of the hash is the count. 
			$count{$fozzy[1]} = $count{$fozzy[1]}+$score;  
			
			## Just want to make sure nobody gets a score higher than $thresh0.  I don't 
			# want to match on >=, I want to match on exactly $thresh0.
			if($count{$fozzy[1]} > $thresh0) { $count{$fozzy[1]} = $thresh0; }#if 
			
			# Debugging
			#if($debug){print "$fozzy[1] = $count{$fozzy[1]} + $score\n";} 

			## This just prints us out a fancy rotating thinge...you'll see it.  
			# Pointless?  Why yes!  Verbose mode only.  
			if($verbose){print "\\\r\|\r\/\r\-\r\/\r";}#if
			
			if($count{$fozzy[1]} == $thresh0) { 
				##define the time in epoch
				$time = time(); 
				
				## Some verbose output here.  Some data, and a line from the logs so I can 
				# verify it's all doing what it should.
				if($verbose){print "\nblackhole $fozzy[1]\n$crap[2] - $count{$fozzy[1]}\n";}#if
				if($verbose){print "-- reported by $crap[3]\n$string\n"; }#if 
				
				## Formatting the filename here.
				$filename = "blackhole_".$time."_".$fozzy[1];
				
				# Debugging
				if($debug){print $filename."\n";}
				
				## Here we open the file and write the contents, then close (of course)
				open(FH2,">$temp_dir/$filename") or die "can't open $temp_dir/$filename $!";
				print FH2 $time." ".$fozzy[1]."\n";
				close(FH2) or die "wtf"; 

				## Here we set the value of $count{$fozzy[1]} to -1000000 because 
				# we don't want to create duplicate files. 
				$count{$fozzy[1]} = -1000000;
				
			}#if  
			else { null; }#else 
		}#if  
	}#while
	
    ## This continually "tails" the file.  An effective method, in Perl anyway.
    # if the file rotates the tail will end.  Gotta figure that one out. 
    sleep 2;
    seek(FH, 0, 1);
}#for


sub calculate_score()  {
	my ($string) = @_; 
	my $total_score; 

	## We start with exceptions.
	foreach $exceptional (@exceptions) {
		if($string =~ /$exceptional/) {
			$total_score = $total_score+$excscore;
			if($debug){print "caught an exception $exceptional $string\n";}
		}#if
	}#foreach
	
	# subtractions, of course.
	foreach $subtractions (@subtract) {
		if($string =~ /$subtractions/) {
			$total_score = $total_score+$subscore;
			if($debug){print "caught a subtraction.. $subtractions $string\n";}
		}#if
	}#foreach
	
	# Negative (lower) score.  I know, confusing naming convention.
	foreach $negatory (@negative) { 
		if($string =~ /$negatory/) { 
			$total_score = $total_score+$negscore; 
		}#if	
	}#foreach
	
	# Pository?  Well, anyway we're sure they're spammers so they get a higher score. 
	foreach $pository (@positive) {
		if($string =~ /$pository/) {
			$total_score = $total_score+$posscore;
		}#if
	}#foreach

	## We don't know what's going on with these guys, but they're getting rejected.	
	# so we're assigning them a score anyway, just a really low one. 
	if(!$total_score) { 
		$total_score = $incremental; 
	} 
	return($total_score); 
}#sub
