| 
									
										
										
										
											2010-11-20 17:18:13 -05:00
										 |  |  | #!/usr/bin/perl | 
					
						
							|  |  |  | # Copyright (C) 2010, Bradley M. Kuhn | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # This program gives you software freedom; you can copy, modify, convey, | 
					
						
							|  |  |  | # and/or redistribute it under the terms of the GNU General Public License | 
					
						
							|  |  |  | # as published by the Free Software Foundation; either version 3 of the | 
					
						
							|  |  |  | # License, or (at your option) any later version. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # This program is distributed in the hope that it will be useful, but | 
					
						
							|  |  |  | # WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
					
						
							|  |  |  | # General Public License for more details. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # You should have received a copy of the GNU General Public License along | 
					
						
							|  |  |  | # with this program in a file called 'GPLv3'.  If not, write to the: | 
					
						
							|  |  |  | #    Free Software Foundation, Inc., 51 Franklin St, Fifth Floor | 
					
						
							|  |  |  | #                                    Boston, MA 02110-1301, USA. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | use strict; | 
					
						
							|  |  |  | use warnings; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | use Mail::Header; | 
					
						
							| 
									
										
										
										
											2011-05-28 15:21:28 -04:00
										 |  |  | use Date::Manip; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-11-20 17:18:13 -05:00
										 |  |  | #use File::Copy; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-11-25 18:25:11 -05:00
										 |  |  | my $VERBOSE = 1; | 
					
						
							| 
									
										
										
										
											2011-11-05 16:43:43 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | if (@ARGV < 4 or @ARGV > 5) { | 
					
						
							| 
									
										
										
										
											2011-05-28 15:21:28 -04:00
										 |  |  |   print STDERR "usage: $0 <MAILDIR_DIRECTORY> <DSPAM_PROBABILITY_MIN> <DSPAM_CONFIDENCE_LEVEL_MIN> <DAYS> [<COUNT_ONLY_DONT_DELETE>]\n"; | 
					
						
							| 
									
										
										
										
											2010-11-20 17:18:13 -05:00
										 |  |  |   exit 1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-05-28 15:21:28 -04:00
										 |  |  | my($MAILDIR_FOLDER, $DSPAM_PROB_MIN, $DSPAM_CONF_MIN, $DAYS, $COUNT_ONLY) = @ARGV; | 
					
						
							| 
									
										
										
										
											2010-11-20 17:18:13 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-25 11:22:44 -04:00
										 |  |  | my($total, $countDeleted, $totalInDate) = (0, 0, 0); | 
					
						
							| 
									
										
										
										
											2010-11-20 17:18:13 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-05-28 15:21:28 -04:00
										 |  |  | my $nDaysAgo = ParseDate("$DAYS days ago"); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-11-20 17:20:38 -05:00
										 |  |  | my @msgDirs = ("$MAILDIR_FOLDER/cur", "$MAILDIR_FOLDER/new"); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | foreach my $dir (@msgDirs) { | 
					
						
							|  |  |  |   die "$MAILDIR_FOLDER must not be a maildir folder (or is unreadable by you), since $dir isn't a readable directory: $!" | 
					
						
							|  |  |  |     unless  (-d $dir); | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2011-11-25 18:25:11 -05:00
										 |  |  | foreach my $dir (@msgDirs) { | 
					
						
							| 
									
										
										
										
											2010-11-20 17:20:38 -05:00
										 |  |  |   opendir(MAILDIR, $dir) or die "Unable to open directory $dir for reading: $!"; | 
					
						
							| 
									
										
										
										
											2011-11-25 18:25:11 -05:00
										 |  |  | MAIL:  while (my $file = readdir MAILDIR) { | 
					
						
							| 
									
										
										
										
											2010-11-20 17:20:38 -05:00
										 |  |  |     next if -d $file;    # skip directories | 
					
						
							| 
									
										
										
										
											2010-11-21 12:18:41 -05:00
										 |  |  |     my $fullFileName = "$dir/$file"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     unless (open(MAIL_MESSAGE, "<", $fullFileName)) { | 
					
						
							|  |  |  |       print STDERR "File, $fullFileName, appears to have disappeared during processing ($!).\n    (Ignoring that fact, but counts may be off.)\n"; | 
					
						
							|  |  |  |       next MAIL; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2010-11-20 17:18:13 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-11-20 17:20:38 -05:00
										 |  |  |     my $header = new Mail::Header(\*MAIL_MESSAGE); | 
					
						
							|  |  |  |     my $fields = $header->header_hashref; | 
					
						
							| 
									
										
										
										
											2010-11-20 17:18:13 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-05-28 15:21:28 -04:00
										 |  |  |     my $mailDate; | 
					
						
							|  |  |  |     foreach my $dt (@{$fields->{"Date"}}) { | 
					
						
							|  |  |  |       if (not defined $mailDate) { | 
					
						
							|  |  |  |         $mailDate = $dt; | 
					
						
							|  |  |  |       } else { | 
					
						
							| 
									
										
										
										
											2011-08-11 07:58:17 -04:00
										 |  |  |         $mailDate = $dt if $dt lt $mailDate; | 
					
						
							| 
									
										
										
										
											2011-05-28 15:21:28 -04:00
										 |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (not defined $mailDate) { | 
					
						
							|  |  |  |       print STDERR "File $file has no Date: header. Skipping.\n"; | 
					
						
							|  |  |  |       next MAIL; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2011-08-11 07:58:17 -04:00
										 |  |  |     my $parsedDate = ParseDate($mailDate); | 
					
						
							|  |  |  |     unless (defined $parsedDate) { | 
					
						
							| 
									
										
										
										
											2011-05-28 15:21:28 -04:00
										 |  |  |       print STDERR "File $file has Unparsable Date header $mailDate"; | 
					
						
							|  |  |  |       next MAIL; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2014-03-25 11:20:59 -04:00
										 |  |  |     $total++; | 
					
						
							| 
									
										
										
										
											2011-05-28 15:21:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-11-25 18:25:11 -05:00
										 |  |  |     print "\nDate: $parsedDate" if ($VERBOSE > 2); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-08-11 07:58:17 -04:00
										 |  |  |     next MAIL if ($parsedDate gt $nDaysAgo); | 
					
						
							| 
									
										
										
										
											2014-03-25 11:22:44 -04:00
										 |  |  |     $totalInDate++; | 
					
						
							| 
									
										
										
										
											2011-05-28 15:21:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-11-25 18:25:11 -05:00
										 |  |  |     print "    Not skipping over date, $nDaysAgo\n" if ($VERBOSE > 2); | 
					
						
							| 
									
										
										
										
											2010-11-20 17:20:38 -05:00
										 |  |  |     my %dspamVal; | 
					
						
							|  |  |  |     foreach my $val ('Confidence', 'Probability') { | 
					
						
							| 
									
										
										
										
											2010-11-20 17:32:03 -05:00
										 |  |  |       foreach my $dv (@{$fields->{"X-Dspam-$val"}}) { | 
					
						
							| 
									
										
										
										
											2011-11-25 18:25:11 -05:00
										 |  |  |         chomp $dv; | 
					
						
							| 
									
										
										
										
											2010-11-20 17:18:13 -05:00
										 |  |  |         if (not defined $dspamVal{$val}) { | 
					
						
							| 
									
										
										
										
											2010-11-20 17:20:38 -05:00
										 |  |  |           $dspamVal{$val} = $dv; | 
					
						
							|  |  |  |         } else { | 
					
						
							| 
									
										
										
										
											2010-11-20 17:21:32 -05:00
										 |  |  |           $dspamVal{$val} = $dv if $dv < $dspamVal{$val}; | 
					
						
							| 
									
										
										
										
											2010-11-20 17:18:13 -05:00
										 |  |  |         } | 
					
						
							|  |  |  |       } | 
					
						
							| 
									
										
										
										
											2010-11-20 17:20:38 -05:00
										 |  |  |       if (not defined $dspamVal{$val}) { | 
					
						
							| 
									
										
										
										
											2010-11-20 17:32:03 -05:00
										 |  |  |         print STDERR "File $file has no X-Dspam-$val header. Skipping.\n"; | 
					
						
							|  |  |  |         next MAIL; | 
					
						
							| 
									
										
										
										
											2010-11-20 17:18:13 -05:00
										 |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2010-11-20 17:20:38 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-11-25 18:25:11 -05:00
										 |  |  |     print " Confidence: $dspamVal{Confidence}, Probability: $dspamVal{Probability}\n" | 
					
						
							|  |  |  |       if ($VERBOSE > 2); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-11-21 09:18:04 -05:00
										 |  |  |     if ($dspamVal{Confidence}  >= $DSPAM_CONF_MIN and | 
					
						
							|  |  |  |         $dspamVal{Probability} >= $DSPAM_PROB_MIN) { | 
					
						
							| 
									
										
										
										
											2010-11-20 17:20:38 -05:00
										 |  |  |       $countDeleted++; | 
					
						
							| 
									
										
										
										
											2011-11-25 18:25:11 -05:00
										 |  |  |       print "    counting this one\n" if ($VERBOSE > 2); | 
					
						
							| 
									
										
										
										
											2010-11-21 19:52:46 -05:00
										 |  |  |       unless (defined $COUNT_ONLY and $COUNT_ONLY) { | 
					
						
							| 
									
										
										
										
											2010-11-21 12:18:41 -05:00
										 |  |  |         warn "unable to unlink $fullFileName: $!" | 
					
						
							|  |  |  |           unless unlink("$fullFileName") == 1; | 
					
						
							| 
									
										
										
										
											2010-11-21 12:10:22 -05:00
										 |  |  |       } | 
					
						
							| 
									
										
										
										
											2010-11-20 17:20:38 -05:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2010-11-20 17:18:13 -05:00
										 |  |  |     close MAIL_MESSAGE; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   close MAILDIR; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-11-21 12:13:02 -05:00
										 |  |  | my $percent = ($countDeleted / $total) * 100.00; | 
					
						
							| 
									
										
										
										
											2010-11-20 17:18:13 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-11-21 19:52:46 -05:00
										 |  |  | print sprintf("%.2f", $percent), "% ($countDeleted/$total) ", | 
					
						
							| 
									
										
										
										
											2014-03-25 11:20:46 -04:00
										 |  |  |   ((defined $COUNT_ONLY and $COUNT_ONLY) ? | 
					
						
							|  |  |  |    " would be deleted.\n" : | 
					
						
							| 
									
										
										
										
											2010-11-21 19:52:46 -05:00
										 |  |  |    sprintf("were deleted.\nThis leaves %d in the folder.\n", | 
					
						
							| 
									
										
										
										
											2014-03-25 11:20:46 -04:00
										 |  |  |           $total - $countDeleted)); | 
					
						
							| 
									
										
										
										
											2014-03-25 11:22:44 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-07-16 08:26:33 -04:00
										 |  |  | my $percentInDate = ($totalInDate / $totalInDate) * 100.00; | 
					
						
							| 
									
										
										
										
											2014-04-03 18:56:49 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | print sprintf("Of those matching the date range, %.2f", $percentInDate), "% ($countDeleted/$totalInDate) ", | 
					
						
							| 
									
										
										
										
											2014-03-25 11:22:44 -04:00
										 |  |  |   ((defined $COUNT_ONLY and $COUNT_ONLY) ? | 
					
						
							|  |  |  |    " would be deleted.\n" : | 
					
						
							|  |  |  |    sprintf("were deleted.\n")); | 
					
						
							|  |  |  | print sprintf("%d in the folder don't match that date range.\n", | 
					
						
							|  |  |  |           $total - $totalInDate); | 
					
						
							| 
									
										
										
										
											2010-11-20 17:18:13 -05:00
										 |  |  | ############################################################################### | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Local variables: | 
					
						
							| 
									
										
										
										
											2010-11-20 17:20:38 -05:00
										 |  |  | # compile-command: "perl -c remove-spam-high-confidence-maildir.plx" | 
					
						
							| 
									
										
										
										
											2010-11-20 17:18:13 -05:00
										 |  |  | # End: |