Basic support for creating the CSV files & names of maildir files
This commit is contained in:
parent
123dbd6018
commit
4627b4e24d
1 changed files with 66 additions and 4 deletions
|
@ -8,10 +8,14 @@
|
|||
# native is where things are just moved over at the end
|
||||
# numbered is where the numbered items go
|
||||
|
||||
use utf8;
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use autodie qw(:all);
|
||||
|
||||
use open qw/ :std :encoding(utf-8) /;
|
||||
|
||||
use Getopt::Long;
|
||||
use File::Spec::Functions;
|
||||
use File::Spec;
|
||||
|
@ -19,6 +23,8 @@ use File::Path qw(make_path);
|
|||
use Mail::Header;
|
||||
use Email::Address::XS;
|
||||
use File::Copy;
|
||||
use Date::Manip::DM6 qw(ParseDate UnixDate);
|
||||
use Text::CSV; # libtext-csv-perl in Debian
|
||||
|
||||
my %GROUP_NAMES_BY_DIR = ( confidential => 'CONFIDENTIAL', privilege => 'PRIVILEGE', privileged => 'PRIVILEGE',
|
||||
'journalist-privilege' => 'PRIVILEGE' );
|
||||
|
@ -64,8 +70,23 @@ UsageAndExit("Error reading \"$upiNumberFile\"") unless $count == 1 and $upiStar
|
|||
|
||||
my $upiCurrentNum = $upiStart;
|
||||
|
||||
sub ProcessMailDir($$) {
|
||||
my($inputMailDir, $destOutDir) = @_;
|
||||
my $csvOutFormat = Text::CSV->new({ binary => 1, always_quote => 1, quote_empty => 1, blank_is_undef => 1});
|
||||
my $csvLogFile = File::Spec->rel2abs(catfile($OUTPUT_TOPLEVEL_DIR, "${GROUP}-log.csv"));
|
||||
UsageAndExit("\"$csvLogFile\" cannot already exist! Do not attempt to number the same group twice!")
|
||||
if (-e $csvLogFile);
|
||||
|
||||
my @headerFields = ('UNIQUE PRODUCTION IDENTIFER (UPI) #', 'FILE NAME', 'RFP # TO WHICH FILE IS RESPONSIVE');
|
||||
|
||||
if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGE') {
|
||||
@headerFields = ('UNIQUE PRODUCTION IDENTIFER (UPI) #', 'FROM NAME', 'FROM ADDRESS',
|
||||
'SUBJECT MATTER', 'SUBMIT DATE', 'TO NAME', 'TO ADDRESS', 'CC NAME', 'CC ADDRESS', 'BCC ADDRESS',
|
||||
'PRIVILEGE CLAIMED');
|
||||
}
|
||||
|
||||
my @CSV_OUTPUT_ROWS;
|
||||
|
||||
sub ProcessMailDir($$$) {
|
||||
my($rfp, $inputMailDir, $destOutDir) = @_;
|
||||
|
||||
my @msgDirs = (catfile($inputMailDir, 'new'), catfile($inputMailDir, 'cur'));
|
||||
foreach my $dir (@msgDirs) {
|
||||
|
@ -78,16 +99,48 @@ sub ProcessMailDir($$) {
|
|||
next if -d $file; # skip directories
|
||||
my $msgFile = catfile($dir, $file);
|
||||
open(my $msgFH, "<", $msgFile);
|
||||
my $upiFull = sprintf("UPI-SFC-%07d", ++$upiCurrentNum);
|
||||
print " $msgFile\n";
|
||||
my $header = new Mail::Header($msgFH);
|
||||
my $fields = $header->header_hashref;
|
||||
my %parsed = (FromName => '', ToName => '', FromAddr => "", ToAddr => "", CcName => '', CcAddr => '', 'Subject' => '',
|
||||
Date => '');
|
||||
foreach my $fieldName (qw/From To Cc Subject Date/) {
|
||||
foreach my $item (@{$fields->{$fieldName}}) {
|
||||
chomp $item;
|
||||
print " $fieldName $item\n";
|
||||
if ($fieldName =~ /From|To|Cc/) {
|
||||
my $addr = Email::Address::XS->parse($item);
|
||||
if ($addr->name ne "") {
|
||||
$parsed{"${fieldName}Name"} .= "; " if $parsed{"${fieldName}Name"} !~ /^\s*$/;
|
||||
$parsed{"${fieldName}Name"} .= $addr->name;
|
||||
}
|
||||
if ($addr->address ne "") {
|
||||
$parsed{"${fieldName}Addr"} .= "; " if $parsed{"${fieldName}Addr"} !~ /^\s*$/;
|
||||
$parsed{"${fieldName}Addr"} .= $addr->address;
|
||||
}
|
||||
} elsif ($fieldName eq 'Date' and $parsed{Date} =~ /^\s*$/) {
|
||||
$parsed{Date} = ParseDate($item);
|
||||
die "$item is in the date field but doesn't parse" unless defined $parsed{Date} and $parsed{Date} !~ /^\s*$/;
|
||||
} else {
|
||||
$parsed{$fieldName} = $item;
|
||||
}
|
||||
}
|
||||
}
|
||||
close $msgFH;
|
||||
$parsed{Subject} =~ s/[\r\n]/ /gm;
|
||||
my $subjectDashes = $parsed{Subject};
|
||||
$subjectDashes =~ s/[^a-zA-Z0-9]/_/g; $subjectDashes =~ s/_+/_/g; $subjectDashes =~ s/_+$//g; $subjectDashes =~ s/^_+//g;
|
||||
my $fileName = $upiFull . '-' . $GROUP_NAMES_BY_DIR{$GROUP} . '-' .
|
||||
UnixDate($parsed{Date}, '%Y%m%d-%H%M-') . $subjectDashes . '.eml';
|
||||
print "$fileName\n";
|
||||
die "$fileName has no subject" if not defined $parsed{Subject};
|
||||
if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGE') {
|
||||
push(@CSV_OUTPUT_ROWS, [ $upiFull, $parsed{FromName}, $parsed{FromAddr}, $parsed{Subject},
|
||||
UnixDate("%D", $parsed{Date}), $parsed{ToName}, $parsed{ToAddr},
|
||||
$parsed{CcName}, $parsed{CcAddr}, "", $GROUP ]);
|
||||
} else {
|
||||
push(@CSV_OUTPUT_ROWS, [ $upiFull, $fileName, uc($rfp) ]);
|
||||
}
|
||||
}
|
||||
closedir $mailDirFH;
|
||||
}
|
||||
|
@ -109,7 +162,7 @@ while (my $rfp = readdir $topDH) {
|
|||
die "regular file found where we expected a type in $typeName" unless -d $typeDirName;
|
||||
print " $typeDirName\n";
|
||||
if ($typeName =~ /email/i) {
|
||||
ProcessMailDir($typeDirName, "");
|
||||
ProcessMailDir($rfp, $typeDirName, "");
|
||||
}
|
||||
}
|
||||
closedir $bucketDH;
|
||||
|
@ -118,6 +171,15 @@ while (my $rfp = readdir $topDH) {
|
|||
}
|
||||
closedir $topDH;
|
||||
|
||||
open my $csvFH, ">:encoding(utf8)", $csvLogFile;
|
||||
|
||||
$csvOutFormat->say($csvFH, \@headerFields);
|
||||
$csvOutFormat->say($csvFH, $_) for @CSV_OUTPUT_ROWS; close $csvFH;
|
||||
|
||||
open($upiFH, ">", $upiNumberFile);
|
||||
print $upiFH ++$upiCurrentNum, "\n";
|
||||
close $upiFH;
|
||||
|
||||
#make_path(, {
|
||||
# verbose => 1,
|
||||
# mode => 0755,
|
||||
|
|
Loading…
Reference in a new issue