From 7b90af06118505756f17895cecbefc395b394920 Mon Sep 17 00:00:00 2001 From: "Bradley M. Kuhn" Date: Wed, 25 Apr 2018 13:09:12 -0700 Subject: [PATCH] Support Spam Assassin; begin debug of memory leak. I added support for spam assassin, but when I tried to run the script as in on my mail server, it eats up 512MB/ram just processing the first 50 emails or so. I can't figure out why that is. --- remove-spam-high-confidence-maildir.plx | 73 +++++++++++++++++-------- 1 file changed, 49 insertions(+), 24 deletions(-) diff --git a/remove-spam-high-confidence-maildir.plx b/remove-spam-high-confidence-maildir.plx index 645d499..c1fe634 100755 --- a/remove-spam-high-confidence-maildir.plx +++ b/remove-spam-high-confidence-maildir.plx @@ -22,17 +22,18 @@ use warnings; use Mail::Header; use Date::Manip; +use autodie qw(open close); #use File::Copy; my $VERBOSE = 1; -if (@ARGV < 4 or @ARGV > 5) { - print STDERR "usage: $0 []\n"; +if (@ARGV < 5 or @ARGV > 6) { + print STDERR "usage: $0 []\n"; exit 1; } -my($MAILDIR_FOLDER, $DSPAM_PROB_MIN, $DSPAM_CONF_MIN, $DAYS, $COUNT_ONLY) = @ARGV; +my($MAILDIR_FOLDER, $DSPAM_PROB_MIN, $DSPAM_CONF_MIN, $SPAM_ASSASSIN_SCORE, $DAYS, $COUNT_ONLY) = @ARGV; my($total, $countDeleted, $totalInDate) = (0, 0, 0); @@ -46,16 +47,17 @@ foreach my $dir (@msgDirs) { } foreach my $dir (@msgDirs) { opendir(MAILDIR, $dir) or die "Unable to open directory $dir for reading: $!"; -MAIL: while (my $file = readdir MAILDIR) { +while (my $file = readdir MAILDIR) { + print STDERR "."; next if -d $file; # skip directories my $fullFileName = "$dir/$file"; - unless (open(MAIL_MESSAGE, "<", $fullFileName)) { - print STDERR "File, $fullFileName, appears to have disappeared during processing ($!).\n (Ignoring that fact, but counts may be off.)\n"; - next MAIL; + my $fh; + unless (open($fh, "<", $fullFileName)) { + print STDERR "File, $fullFileName, appears to have disappeared during processing ($!).\n"; } - my $header = new Mail::Header(\*MAIL_MESSAGE); + my $header = new Mail::Header($fh); my $fields = $header->header_hashref; my $mailDate; @@ -68,18 +70,18 @@ MAIL: while (my $file = readdir MAILDIR) { } if (not defined $mailDate) { print STDERR "File $file has no Date: header. Skipping.\n"; - next MAIL; + next; } my $parsedDate = ParseDate($mailDate); unless (defined $parsedDate) { print STDERR "File $file has Unparsable Date header $mailDate"; - next MAIL; + next; } $total++; print "\nDate: $parsedDate" if ($VERBOSE > 2); - next MAIL if ($parsedDate gt $nDaysAgo); + next if ($parsedDate gt $nDaysAgo); $totalInDate++; print " Not skipping over date, $nDaysAgo\n" if ($VERBOSE > 2); @@ -93,27 +95,50 @@ MAIL: while (my $file = readdir MAILDIR) { $dspamVal{$val} = $dv if $dv < $dspamVal{$val}; } } - if (not defined $dspamVal{$val}) { - print STDERR "File $file has no X-Dspam-$val header. Skipping.\n"; - next MAIL; - } } - print " Confidence: $dspamVal{Confidence}, Probability: $dspamVal{Probability}\n" if ($VERBOSE > 2); + my $isReadyToDelete = 0; + + if (defined $dspamVal{Confidence} and defined $dspamVal{Probability}) { + $isReadyToDelete = 1 + if ($dspamVal{Confidence} >= $DSPAM_CONF_MIN and + $dspamVal{Probability} >= $DSPAM_PROB_MIN); - if ($dspamVal{Confidence} >= $DSPAM_CONF_MIN and - $dspamVal{Probability} >= $DSPAM_PROB_MIN) { - $countDeleted++; - print " counting this one\n" if ($VERBOSE > 2); - unless (defined $COUNT_ONLY and $COUNT_ONLY) { - warn "unable to unlink $fullFileName: $!" - unless unlink("$fullFileName") == 1; + } else { + my $spamStatusVal; + foreach my $dv (@{$fields->{"X-Spam-Status"}}) { + chomp $dv; + print " X-Spam-Status found: $dv\n" if ($VERBOSE > 3); + if ($dv =~ /Yes.*score\s*=\s*([\d\.]+)\s+/i) { + my $newVal = $1; + if (not defined $spamStatusVal) { + $spamStatusVal = $newVal; + } else { + $spamStatusVal = $newVal if $newVal < $spamStatusVal; + } + } + } + print " Final Spam Status from Spam Assassin: $spamStatusVal\n" if ($VERBOSE > 2); + if (not defined $spamStatusVal) { + print STDERR "File $file has no headers for Spam. Skipping.\n"; + } else { + $isReadyToDelete = 1 if ($spamStatusVal >= $SPAM_ASSASSIN_SCORE); + } + if ($isReadyToDelete) { + $countDeleted++; + print " counting this one\n" if ($VERBOSE > 2); + unless (defined $COUNT_ONLY and $COUNT_ONLY) { + warn "unable to unlink $fullFileName: $!" + unless unlink("$fullFileName") == 1; + } } } - close MAIL_MESSAGE; + $fh->close(); + $fh = $header = $fields = undef; } close MAILDIR; + print STDERR ".\n"; } my $percent = ($countDeleted / $total) * 100.00;