Better handling of UTF-8 subject lines & shorten file names
The UTF-8 strings, when not decoded from the MIME header in the 'Subject' field, were making very long file names. Fix both the UTF-8 strings to actually be decoded (which makes them shorter anyway when processed here), but also set a 128 character limit on the amount of the subject line of emails that go into the filenames.
This commit is contained in:
parent
111f569b9c
commit
8fc1f5371f
1 changed files with 3 additions and 1 deletions
|
@ -25,6 +25,7 @@ use Email::Address::XS;
|
||||||
use File::Copy;
|
use File::Copy;
|
||||||
use Date::Manip::DM6 qw(ParseDate UnixDate);
|
use Date::Manip::DM6 qw(ParseDate UnixDate);
|
||||||
use Text::CSV; # libtext-csv-perl in Debian
|
use Text::CSV; # libtext-csv-perl in Debian
|
||||||
|
use Encode qw/encode decode/;
|
||||||
|
|
||||||
my %GROUP_NAMES_BY_DIR = ( confidential => 'CONFIDENTIAL', privilege => 'PRIVILEGE', privileged => 'PRIVILEGE',
|
my %GROUP_NAMES_BY_DIR = ( confidential => 'CONFIDENTIAL', privilege => 'PRIVILEGE', privileged => 'PRIVILEGE',
|
||||||
'journalist-privilege' => 'PRIVILEGE' );
|
'journalist-privilege' => 'PRIVILEGE' );
|
||||||
|
@ -121,7 +122,7 @@ sub ProcessMailDir($$$) {
|
||||||
$parsed{Date} = ParseDate($item);
|
$parsed{Date} = ParseDate($item);
|
||||||
die "$item is in the date field but doesn't parse" unless defined $parsed{Date} and $parsed{Date} !~ /^\s*$/;
|
die "$item is in the date field but doesn't parse" unless defined $parsed{Date} and $parsed{Date} !~ /^\s*$/;
|
||||||
} else {
|
} else {
|
||||||
$parsed{$fieldName} = $item;
|
$parsed{$fieldName} = decode('MIME-Header', $item);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -129,6 +130,7 @@ sub ProcessMailDir($$$) {
|
||||||
$parsed{Subject} =~ s/[\r\n]/ /gm;
|
$parsed{Subject} =~ s/[\r\n]/ /gm;
|
||||||
my $subjectDashes = $parsed{Subject};
|
my $subjectDashes = $parsed{Subject};
|
||||||
$subjectDashes =~ s/[^a-zA-Z0-9]/_/g; $subjectDashes =~ s/_+/_/g; $subjectDashes =~ s/_+$//g; $subjectDashes =~ s/^_+//g;
|
$subjectDashes =~ s/[^a-zA-Z0-9]/_/g; $subjectDashes =~ s/_+/_/g; $subjectDashes =~ s/_+$//g; $subjectDashes =~ s/^_+//g;
|
||||||
|
$subjectDashes = substr($subjectDashes, 0, 127);
|
||||||
my $fileName = $upiFull . '-' . $GROUP_NAMES_BY_DIR{$GROUP} . '-' .
|
my $fileName = $upiFull . '-' . $GROUP_NAMES_BY_DIR{$GROUP} . '-' .
|
||||||
UnixDate($parsed{Date}, '%Y%m%d-%H%M-') . $subjectDashes . '.eml';
|
UnixDate($parsed{Date}, '%Y%m%d-%H%M-') . $subjectDashes . '.eml';
|
||||||
die "$fileName has no subject" if not defined $parsed{Subject};
|
die "$fileName has no subject" if not defined $parsed{Subject};
|
||||||
|
|
Loading…
Reference in a new issue