small-hacks/remove-dup-mails-from-maildir.plx

128 lines
3.9 KiB
Perl
Executable file

#!/usr/bin/perl
use strict;
use warnings;
use Mail::Header;
use File::Copy;
if (@ARGV < 2) {
print STDERR "usage: $0 <TYPE> <SOURCE_MAILDIR_FOLDER_PATH> [<MALDIRS_LOOK_FOR_DUPS_IN> ...]\n";
exit 1;
}
my($TYPE, $MAILDIR_FOLDER) = ($ARGV[0], $ARGV[1]);
die "Bad type, $TYPE" unless $TYPE =~ /^(unlink|svn|print)$/;
my (@dupFolders) = @ARGV[2..$#ARGV];
my %msgs; # indexed by Message-Id
foreach my $folder (@dupFolders) {
my @msgDirs = ("$folder/cur", "$folder/new");
foreach my $dir (@msgDirs) {
die "$MAILDIR_FOLDER must not be a maildir folder (or is unreadable by you), since $dir isn't a readable directory: $!"
unless (-d $dir);
}
foreach my $dir (@msgDirs) {
opendir(MAILDIR, $dir) or die "Unable to open directory $dir for reading: $!";
while (my $file = readdir MAILDIR) {
next if -d $file; # skip directories
my $existing_file = "$dir/$file";
open(MAIL_MESSAGE, "<", $existing_file) or
die "unable to open $existing_file for reading: $!";
my $header = new Mail::Header(\*MAIL_MESSAGE);
my $fields = $header->header_hashref;
my $id = $fields->{'Message-ID'}[0];
chomp $id;
if ($id !~ s/^\s*\<?\s*(\S+)\s*\>?.*$/$1/) {
$id = $fields->{'Resent-Message-ID'}[0];
chomp $id;
unless ($id =~ s/^\s*\<?\s*(\S+)\s*\>?.*$/$1/) {
warn "weirdly formatted, or missing Message-ID (or Resent-Message-ID), \"$id\" in $dir/$file";
next;
}
}
if (not defined $id or $id =~ /^\s*$/) {
warn "$dir/$file has no message ID";
next;
}
$id =~ s/^[\<\s]+//; $id =~ s/[\>\s]$//;
# die "Duplicate message ID's $id\n" if defined $msgs{$id};
$msgs{$id} = $fields;
}
close MAIL_MESSAGE;
}
close MAILDIR;
}
# This code shouldn't be all cut-and-pasty from the above, but I was in a hurry.
my @msgDirs = ("$MAILDIR_FOLDER/cur", "$MAILDIR_FOLDER/new");
foreach my $dir (@msgDirs) {
die "$MAILDIR_FOLDER must not be a maildir folder (or is unreadable by you), since $dir isn't a readable directory: $!"
unless (-d $dir);
}
foreach my $dir (@msgDirs) {
opendir(MAILDIR, $dir) or die "Unable to open directory $dir for reading: $!";
while (my $file = readdir MAILDIR) {
next if -d $file; # skip directories
my $existing_file = "$dir/$file";
open(MAIL_MESSAGE, "<", $existing_file) or
die "unable to open $existing_file for reading: $!";
my $header = new Mail::Header(\*MAIL_MESSAGE);
my $fields = $header->header_hashref;
my $id = $fields->{'Message-ID'}[0];
chomp $id;
if ($id !~ s/^\s*\<?\s*(\S+)\s*\>?.*$/$1/) {
$id = $fields->{'Resent-Message-ID'}[0];
chomp $id;
unless ($id =~ s/^\s*\<?\s*(\S+)\s*\>?.*$/$1/) {
warn "weirdly formatted, or missing Message-ID (or Resent-Message-ID), \"$id\" in $dir/$file";
next;
}
}
$id =~ s/^[\<\s]+//; $id =~ s/[\>\s]$//;
if (not defined $id or $id =~ /^\s*$/) {
warn "$dir/$file has no message ID";
next;
}
# If we already have this message elsehwere, then we simply remove it
# from this folder here. Otherwise, we note that we have it by adding
# it to %msgs.
if (defined $msgs{$id}) {
if ($TYPE eq "print") {
print "$id\n";
} elsif ($TYPE eq "svn") {
system("svn rm \"$existing_file\"");
die "Unable to unlink file $existing_file: $!"
unless $? == 0;
} elsif ($TYPE eq 'unlink') {
print STDERR "Removing $existing_file\n";
die "Unable to unlink $existing_file: $!"
unless unlink($existing_file) == 1;
} else {
die "doing nothing here, type operation not known: $TYPE";
}
} else {
$msgs{$id} = $fields;
}
close MAIL_MESSAGE;
}
close MAILDIR;
}
###############################################################################
#
# Local variables:
# compile-command: "perl -c remove-dup-mails-from-maildir.plx"
# End: