#!/usr/bin/perl -w 
use strict;

sub parse {
    my ($save, $file) = @_;

    my $state;

    if ($file =~ /\.gz$/) {
	open(F, "zcat '$file' |") or die "cannot open $file: $!";
    } else {
	open(F, "<$file") or die "cannot open $file: $!";
    }
    while (<F>) {
	chomp;
	my ($ts, $pid, $msg) = split(/ +/, $_, 3);

	next unless ($pid && ($pid =~ /^[.0-9a-f]+$/));
	next unless ($msg);
	my $currentstate = $state->{$pid}{state} || '';
	if ($msg eq 'loading plugins from ./plugins') {
	    &$save($state->{$pid}) if ($currentstate && $currentstate ne 'RSET');
	    $state->{$pid} = { state => 'new', start => $ts };
	} elsif ($msg =~ /Connection from ([\S]*) \[([\d.]+)\]/) {
	    save($state->{$pid}) if ($currentstate && $currentstate ne 'RSET');
	    $state->{$pid} = {
				state => 'new',
				cid => $pid,	# connection id, isn't really a process id
				start => $ts,
				hostname => $1,
				ipaddr => $2,
				end => $ts,
			     };
	} elsif ($currentstate eq 'new' && $msg =~ /dispatching EHLO (\S+)/i) {
	    $state->{$pid}{state} = 'EHLO';
	    $state->{$pid}{ehlo} = $1;
	    $state->{$pid}{end} = $ts;
	} elsif ($currentstate eq 'EHLO' && $msg =~ /250-(\S+) Hi (\S*) \[([\d.]+)\]/) {
	    $state->{$pid}{server} = $1;
	    $state->{$pid}{hostname} = $2;
	    $state->{$pid}{ipaddr} = $3;
	    $state->{$pid}{end} = $ts;
	} elsif ($currentstate eq 'EHLO' && $msg =~ /^(\d\d\d) /) {
	    $state->{$pid}{ehloresult} = $msg;
	    $state->{$pid}{finalresult} = $msg;
	} elsif ($currentstate eq 'new' && $msg =~ /dispatching HELO (\S+)/i) {
	    $state->{$pid}{state} = 'HELO';
	    $state->{$pid}{helo} = $1;
	    $state->{$pid}{end} = $ts;
	} elsif ($currentstate eq 'HELO' && $msg =~ /250 (\S+) Hi (\S*) \[([\d.]+)\]/) {
	    $state->{$pid}{server} = $1;
	    $state->{$pid}{hostname} = $2;
	    $state->{$pid}{ipaddr} = $3;
	    $state->{$pid}{end} = $ts;
	    $state->{$pid}{heloresult} = $msg;
	    $state->{$pid}{finalresult} = $msg;
	} elsif ($currentstate eq 'HELO' && $msg =~ /^(\d\d\d) /) {
	    $state->{$pid}{heloresult} = $msg;
	    $state->{$pid}{finalresult} = $msg;
	} elsif ($msg =~ /dispatching MAIL FROM:\s*(\S+)/i) {
	    $state->{$pid}{state} = 'MAIL';
	    $state->{$pid}{mail} = $1;
	    $state->{$pid}{end} = $ts;
	} elsif ($currentstate eq 'MAIL' && $msg =~ /^(\d\d\d) (.*)/i) {
	    $state->{$pid}{mailresult} = $msg;
	    $state->{$pid}{end} = $ts;
	    $state->{$pid}{finalresult} = $msg 
		unless (($state->{$pid}{finalresult} || "") =~ m/^[45]/);
	} elsif ($msg =~ /dispatching RCPT TO:\s*(\S+)/i) {
	    $state->{$pid}{state} = 'RCPT';
	    $state->{$pid}{current_rcpt} = $1;
	    $state->{$pid}{rcpts}{$1}{xrcpts} = [];
	    $state->{$pid}{end} = $ts;
	} elsif ($currentstate eq 'RCPT' && $msg =~ /^(\d\d\d) (.*)/i) {
	    my $current_rcpt = $state->{$pid}{current_rcpt};
	    $state->{$pid}{rcpts}{$current_rcpt}{rcptresult} = $msg;
	    $state->{$pid}{rcpts}{$current_rcpt}{finalresult} =
		(($state->{$pid}{finalresult} || '') =~ m/^[45]/) 
		? $state->{$pid}{finalresult} : $msg;
	    $state->{$pid}{end} = $ts;
	} elsif ($currentstate eq 'RCPT' && $msg =~ /^aliases: (.*) expanded to 1 recipients/i) {
	    my $current_rcpt = $state->{$pid}{current_rcpt};
	    push(@{$state->{$pid}{rcpts}{$current_rcpt}{xrcpts}}, $1);
	} elsif ($currentstate eq 'RCPT' && $msg =~ /^aliases_check plugin: expn: (\S+) ->(( \S+@\S+)+)/) {
	    my $cr2 = $1;
	    my $list = $2;
	    my $current_rcpt = $state->{$pid}{current_rcpt};
	    my @xr = split(" ", $list);
	    $state->{$pid}{rcpts}{$current_rcpt}{xrcpts} = [@xr];
	} elsif ($msg =~ /dispatching DATA/i) {
	    $state->{$pid}{state} = 'DATA';
	    $state->{$pid}{end} = $ts;
	    $state->{$pid}{spamassassin} = [];
	} elsif ($currentstate eq 'DATA' && $msg =~ /^354 /i) {
	    # more to come
	} elsif ($currentstate eq 'DATA' && $msg =~ /^(\d\d\d) (.*)/i) {
	    # print "data result: $msg\n";
	    $state->{$pid}{dataresult} = $msg;
	    for my $r (keys %{$state->{$pid}{rcpts}}) {
		# print "data result: $r => $msg\n";
		$state->{$pid}{rcpts}{$r}{dataresult} = $msg;
		$state->{$pid}{rcpts}{$r}{finalresult} = $msg 
		    if ($state->{$pid}{rcpts}{$r}{finalresult} =~ /^2/);
	    }
	    $state->{$pid}{end} = $ts;
	    # DATA is an implicit RSET:
	    &$save($state->{$pid});
	    # reset mail and rcpts, but not helo!
	    $state->{$pid}{start} = $ts;
	    $state->{$pid}{mail} = undef;
	    $state->{$pid}{mailresult} = undef;
	    $state->{$pid}{current_rcpt} = undef;
	    $state->{$pid}{rcpts} = undef;
	    $state->{$pid}{qrcpts} = [];
	    $state->{$pid}{state} = 'RSET';
	} elsif ($currentstate eq 'DATA' && $msg =~ /^aliases: adding (.*)/i) {
	    push(@{$state->{$pid}{qrcpts}}, $1);
	} elsif ($msg =~ /dispatching QUIT/i) {
	    $state->{$pid}{end} = $ts;
	    &$save($state->{$pid}) if ($state->{$pid}{state} && $state->{$pid}{state} ne 'RSET');
	    $state->{$pid} = undef;
	} elsif ($msg =~ /dispatching RSET/i) {
	    $state->{$pid}{end} = $ts;
	    &$save($state->{$pid}) if ($state->{$pid}{state} ne 'RSET');
	    # reset mail and rcpts, but not helo!
	    $state->{$pid}{start} = $ts;
	    $state->{$pid}{mail} = undef;
	    $state->{$pid}{mailresult} = undef;
	    $state->{$pid}{current_rcpt} = undef;
	    $state->{$pid}{rcpts} = undef;
	    $state->{$pid}{qrcpts} = [];
	    $state->{$pid}{state} = 'RSET';
	} elsif ($msg =~ /spamassassin_rcpt plugin: check_spam: (.*)/) {
	    push @{$state->{$pid}{spamassassin}}, $1;
	}
    }
    close(F);

    for my $pid (keys %$state) {
	&$save($state->{$pid}) if ($state->{$pid}{state} && $state->{$pid}{state} ne 'RSET');
	# delete($state->{$pid});
    }
    undef $state;

}

my @pat = (
    [ 'queued',			'250 Queued', ],
    [ 'badmailfrom',		'550 Mail from .* not accepted here', ],
    [ 'mail-from-temp',		'450 Could not resolve ', ],
    [ 'spamcop',		'550 Blocked - see http://www.spamcop.net/bl.shtml', ],
    [ 'dsn.rfc-ignorant',	'550 Mail from .* rejected because it does not accept bounces. This violates RFC .* http://www.rfc-ignorant.org/' ],
    [ 'whois',			'550 Inaccurate or missing WHOIS data', ],
    [ 'badrcptto',		'550 mail to .* not accepted here', ],
    [ 'relay',			'550 relaying denied', ],
    [ 'spamassassin',		'552 spam score exceeded', ],
    [ 'ordb',			'550 This mail was handled by an open relay - please visit <http://ORDB.org/lookup/', ],
    [ 'greylisting',		'45[02] This mail is temporarily denied', ],
    [ 'netcetera',		'550 Blocked. Contact spam@netcetera.dk Include this in the subject:' ],
    [ 'klez',			'552 Klez Virus Detected', ],
    [ 'no-such-user',		'550 no such user', ],
    [ 'early-talker',		'450 Don\'t be rude and talk before I say hello\!', ],
    [ 'queue-error',		'451 Unable to queue message', ],
    [ 'spamhaus',		'550 http://www.spamhaus.org/SBL/sbl.lasso', ],
    [ 'fqdn-required',		'450 FQDN required in the envelope sender', ],
    [ 'badhelo',		'550 Uh-huh. You\'re .* and I\'m a boil on the bottom of the Marquess of Queensbury\'s great-aunt.' ],
    [ 'message-too-big',	'552 Message too big' ],
    [ 'message-denied',		'552 Message denied' ],
    [ 'mail-from-parse',	'501 could not parse your mail from command' ],
    [ 'rcpt-to-parse',		'501 could not parse recipient' ],
    [ 'virus',			'552 Virus Found:' ],
    [ 'no-rcpt',		'250 .*, sender OK - how exciting to get mail from you' ],
    [ 'no-data',		'250 .*, recipient ok' ],
    [ 'empty',			'500 empty transaction' ],
    [ 'data-incomplete',	'451 Incomplete DATA' ],
    [ 'space-in-mail-from',	'501 syntax error in from: parameter \(no space allowed\)' ],
    [ 'count_denies',		'550 Closing connection. \d+ denied commands.' ],
    [ 'no-mail',		'503 Use MAIL before RCPT' ],
    [ 'space-in-mail-from',	'550 Syntax error in MAIL FROM \(no space after colon allowed\) detected.' ],
    [ 'spamhaus',		'550 http://www.spamhaus.org/query/bl' ],
    [ 'badhelo',		'550 Uh-huh.  You.re .*, and I.m a boil on the bottom of the Marquess of Queensbury.s great-aunt' ],
    [ 'notsubscribed',		'552 .* is not subscribed to ' ],
    [ 'noptr',			'450 Reverse lookup for .* failed' ],
    [ 'cf_wrapper-data',	'452 recipients disagree on whether they want to accept or reject the message. Please try again with a subset.' ],
    [ 'fabel',			'550 Blocked. Contact spam@fabel.dk Include this in the subject:' ],
    [ 'ptrmismatch',		'450 Host name .* doesn.t match IP address ' ],
    [ 'spf-servfail',		'SPF error: .*: DNS error while looking up .*: SERVFAIL' ],
    [ 'cf_wrapper-rcpt',	'450 Mail to .* and .* not accepted in the same transaction. Please try again' ],
    [ 'spf-timeout',		'450 SPF error: .*: DNS error while looking up .*: query timed out' ],
    [ 'count_denies',		'421 Closing connection. \d+ denied commands.' ],
    [ 'badhelo',		'550 Sorry, I don.t believe that you are ' ],
    [ 'helo',			'250 .* Hi .*; I am so happy to meet you.' ],
    [ 'spamassassin',		'552 spamassassin score [.\d]+/[.\d]+ \(#5.7.1\)' ],
    [ 'smtp_callback',		'^550 .* doesn\'t exist according to ' ],
    [ 'smtp_callback-temp',	'^450 .* cannot be verified \(server unreachable\?\) \(#4.1.8\)' ],
    [ 'no-message',		'^$' ],
);

sub msg2key {
    my ($msg) = @_;
    return 'no-message' unless defined $msg;
    for my $pat (@pat) {
	if ($msg =~ m/$pat->[1]/) {
	    return $pat->[0];
	}
    }
    return $msg;
}
1;
