2010-11-20 17:18:13 -05:00
|
|
|
#!/usr/bin/perl
|
|
|
|
# Copyright (C) 2010, Bradley M. Kuhn
|
|
|
|
#
|
|
|
|
# This program gives you software freedom; you can copy, modify, convey,
|
|
|
|
# and/or redistribute it under the terms of the GNU General Public License
|
|
|
|
# as published by the Free Software Foundation; either version 3 of the
|
|
|
|
# License, or (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful, but
|
|
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
# General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License along
|
|
|
|
# with this program in a file called 'GPLv3'. If not, write to the:
|
|
|
|
# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor
|
|
|
|
# Boston, MA 02110-1301, USA.
|
|
|
|
|
|
|
|
|
|
|
|
use strict;
|
|
|
|
use warnings;
|
|
|
|
|
|
|
|
use Mail::Header;
|
2011-05-28 15:21:28 -04:00
|
|
|
use Date::Manip;
|
|
|
|
|
2010-11-20 17:18:13 -05:00
|
|
|
#use File::Copy;
|
|
|
|
|
2010-11-21 12:10:22 -05:00
|
|
|
if (@ARGV < 3 or @ARGV > 4) {
|
2011-05-28 15:21:28 -04:00
|
|
|
print STDERR "usage: $0 <MAILDIR_DIRECTORY> <DSPAM_PROBABILITY_MIN> <DSPAM_CONFIDENCE_LEVEL_MIN> <DAYS> [<COUNT_ONLY_DONT_DELETE>]\n";
|
2010-11-20 17:18:13 -05:00
|
|
|
exit 1;
|
|
|
|
}
|
|
|
|
|
2011-05-28 15:21:28 -04:00
|
|
|
my($MAILDIR_FOLDER, $DSPAM_PROB_MIN, $DSPAM_CONF_MIN, $DAYS, $COUNT_ONLY) = @ARGV;
|
2010-11-20 17:18:13 -05:00
|
|
|
|
|
|
|
my($total, $countDeleted) = (0, 0);
|
|
|
|
|
2011-05-28 15:21:28 -04:00
|
|
|
my $nDaysAgo = ParseDate("$DAYS days ago");
|
|
|
|
|
2010-11-20 17:20:38 -05:00
|
|
|
my @msgDirs = ("$MAILDIR_FOLDER/cur", "$MAILDIR_FOLDER/new");
|
|
|
|
|
|
|
|
foreach my $dir (@msgDirs) {
|
|
|
|
die "$MAILDIR_FOLDER must not be a maildir folder (or is unreadable by you), since $dir isn't a readable directory: $!"
|
|
|
|
unless (-d $dir);
|
|
|
|
}
|
2010-11-20 17:32:03 -05:00
|
|
|
MAIL: foreach my $dir (@msgDirs) {
|
2010-11-20 17:20:38 -05:00
|
|
|
opendir(MAILDIR, $dir) or die "Unable to open directory $dir for reading: $!";
|
|
|
|
while (my $file = readdir MAILDIR) {
|
|
|
|
next if -d $file; # skip directories
|
2010-11-21 12:18:41 -05:00
|
|
|
my $fullFileName = "$dir/$file";
|
|
|
|
|
|
|
|
unless (open(MAIL_MESSAGE, "<", $fullFileName)) {
|
|
|
|
print STDERR "File, $fullFileName, appears to have disappeared during processing ($!).\n (Ignoring that fact, but counts may be off.)\n";
|
|
|
|
next MAIL;
|
|
|
|
}
|
2010-11-20 17:18:13 -05:00
|
|
|
|
2010-11-20 17:20:38 -05:00
|
|
|
my $header = new Mail::Header(\*MAIL_MESSAGE);
|
|
|
|
my $fields = $header->header_hashref;
|
2010-11-20 17:18:13 -05:00
|
|
|
|
2011-05-28 15:21:28 -04:00
|
|
|
my $mailDate;
|
|
|
|
foreach my $dt (@{$fields->{"Date"}}) {
|
|
|
|
if (not defined $mailDate) {
|
|
|
|
$mailDate = $dt;
|
|
|
|
} else {
|
|
|
|
$mailDate = $dt if $dt lt $maileDate;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (not defined $mailDate) {
|
|
|
|
print STDERR "File $file has no Date: header. Skipping.\n";
|
|
|
|
next MAIL;
|
|
|
|
}
|
|
|
|
$parsedDate = ParseDate($mailDate);
|
|
|
|
unless (defined $parseDate) {
|
|
|
|
print STDERR "File $file has Unparsable Date header $mailDate";
|
|
|
|
next MAIL;
|
|
|
|
}
|
|
|
|
|
|
|
|
next MAIL if ($parseDate gt $nDaysAgo);
|
|
|
|
|
2010-11-20 17:20:38 -05:00
|
|
|
my %dspamVal;
|
|
|
|
foreach my $val ('Confidence', 'Probability') {
|
2010-11-20 17:32:03 -05:00
|
|
|
foreach my $dv (@{$fields->{"X-Dspam-$val"}}) {
|
2010-11-20 17:18:13 -05:00
|
|
|
if (not defined $dspamVal{$val}) {
|
2010-11-20 17:20:38 -05:00
|
|
|
$dspamVal{$val} = $dv;
|
|
|
|
} else {
|
2010-11-20 17:21:32 -05:00
|
|
|
$dspamVal{$val} = $dv if $dv < $dspamVal{$val};
|
2010-11-20 17:18:13 -05:00
|
|
|
}
|
|
|
|
}
|
2010-11-20 17:20:38 -05:00
|
|
|
if (not defined $dspamVal{$val}) {
|
2010-11-20 17:32:03 -05:00
|
|
|
print STDERR "File $file has no X-Dspam-$val header. Skipping.\n";
|
|
|
|
next MAIL;
|
2010-11-20 17:18:13 -05:00
|
|
|
}
|
|
|
|
}
|
2010-11-20 17:20:38 -05:00
|
|
|
$total++;
|
|
|
|
|
2010-11-21 09:18:04 -05:00
|
|
|
if ($dspamVal{Confidence} >= $DSPAM_CONF_MIN and
|
|
|
|
$dspamVal{Probability} >= $DSPAM_PROB_MIN) {
|
2010-11-20 17:20:38 -05:00
|
|
|
$countDeleted++;
|
2010-11-21 19:52:46 -05:00
|
|
|
unless (defined $COUNT_ONLY and $COUNT_ONLY) {
|
2010-11-21 12:18:41 -05:00
|
|
|
warn "unable to unlink $fullFileName: $!"
|
|
|
|
unless unlink("$fullFileName") == 1;
|
2010-11-21 12:10:22 -05:00
|
|
|
}
|
2010-11-20 17:20:38 -05:00
|
|
|
}
|
2010-11-20 17:18:13 -05:00
|
|
|
close MAIL_MESSAGE;
|
|
|
|
}
|
|
|
|
close MAILDIR;
|
|
|
|
}
|
|
|
|
|
2010-11-21 12:13:02 -05:00
|
|
|
my $percent = ($countDeleted / $total) * 100.00;
|
2010-11-20 17:18:13 -05:00
|
|
|
|
2010-11-21 19:52:46 -05:00
|
|
|
print sprintf("%.2f", $percent), "% ($countDeleted/$total) ",
|
|
|
|
(defined $COUNT_ONLY and $COUNT_ONLY ?
|
|
|
|
sprintf("were deleted.\nThis leaves %d in the folder.\n",
|
|
|
|
$total - $countDeleted) : " would be deleted.\n");
|
2010-11-20 17:18:13 -05:00
|
|
|
###############################################################################
|
|
|
|
#
|
|
|
|
# Local variables:
|
2010-11-20 17:20:38 -05:00
|
|
|
# compile-command: "perl -c remove-spam-high-confidence-maildir.plx"
|
2010-11-20 17:18:13 -05:00
|
|
|
# End:
|