Compare commits
10 commits
34e24a8a31
...
3f4a63dd3f
Author | SHA1 | Date | |
---|---|---|---|
|
3f4a63dd3f | ||
|
db9a80723f | ||
|
347e0d3113 | ||
|
cdcf26f8fa | ||
|
ea04654b7f | ||
|
3f0716c9f0 | ||
|
9da387af88 | ||
|
c2cadeaa6a | ||
|
bf0fd2ab75 | ||
|
34b6ba4cb8 |
1 changed files with 74 additions and 31 deletions
|
@ -21,14 +21,14 @@ use File::Spec::Functions;
|
|||
use File::Spec;
|
||||
use File::Path qw(make_path);
|
||||
use Mail::Header;
|
||||
use Email::Address::XS;
|
||||
use Email::Address::XS qw(parse_email_groups);
|
||||
use File::Copy;
|
||||
use Date::Manip::DM6 qw(ParseDate UnixDate);
|
||||
use Text::CSV; # libtext-csv-perl in Debian
|
||||
use Encode qw/encode decode/;
|
||||
|
||||
my %GROUP_NAMES_BY_DIR = ( confidential => 'CONFIDENTIAL', privilege => 'PRIVILEGE', privileged => 'PRIVILEGE',
|
||||
'journalist-privilege' => 'PRIVILEGE' );
|
||||
my %GROUP_NAMES_BY_DIR = ( confidential => 'CONFIDENTIAL', privilege => 'PRIVILEGED', privileged => 'PRIVILEGED',
|
||||
'journalist-privilege' => 'PRIVILEGED' );
|
||||
|
||||
sub UsageAndExit($) {
|
||||
print STDERR "usage: $0 --inputToplevelDir=/path/to/inputdir --outputToplevelDir=/path/to/outputdir --group=group [ --verbose=N ]\n";
|
||||
|
@ -70,6 +70,7 @@ close $upiFH;
|
|||
UsageAndExit("Error reading \"$upiNumberFile\"") unless $count == 1 and $upiStart > 0;
|
||||
|
||||
my $upiCurrentNum = $upiStart;
|
||||
sub NextUPI () { return sprintf("UPI-SFC-%07d", $upiCurrentNum++); }
|
||||
|
||||
my $csvOutFormat = Text::CSV->new({ binary => 1, always_quote => 1, quote_empty => 1, blank_is_undef => 1});
|
||||
my $csvLogFile = File::Spec->rel2abs(catfile($OUTPUT_TOPLEVEL_DIR, "${GROUP}-log.csv"));
|
||||
|
@ -79,7 +80,7 @@ UsageAndExit("\"$csvLogFile\" cannot already exist! Do not attempt to number th
|
|||
my @headerFields = ('UNIQUE PRODUCTION IDENTIFER (UPI) #', 'FILE NAME', 'RFP # TO WHICH FILE IS RESPONSIVE',
|
||||
'PROTECTIVE ORDER CATEGORY');
|
||||
|
||||
if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGE') {
|
||||
if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGED') {
|
||||
@headerFields = ('UNIQUE PRODUCTION IDENTIFER (UPI) #', 'FROM NAME', 'FROM ADDRESS',
|
||||
'SUBJECT MATTER', 'SUBMIT DATE', 'TO NAME', 'TO ADDRESS', 'CC NAME', 'CC ADDRESS', 'BCC ADDRESS',
|
||||
'PRIVILEGE CLAIMED');
|
||||
|
@ -99,7 +100,24 @@ sub ProcessDocumentDirectory($$$) {
|
|||
if (-d $fullFilePath) {
|
||||
ProcessDocumentDirectory($rfp, $fullFilePath, catfile($numberedOutputDir, $file));
|
||||
} elsif (-f $fullFilePath) {
|
||||
print " mv $fullFilePath ", catfile($numberedOutputDir, $file), "\n";
|
||||
my $upiFull = NextUPI();
|
||||
unless (-d $numberedOutputDir) {
|
||||
make_path($numberedOutputDir, { mode => 0755 }) or die "unable to make directory $numberedOutputDir: $!";
|
||||
}
|
||||
my($volume, $directories, $bareFileName) = File::Spec->splitpath($fullFilePath);
|
||||
die("Something wrong, since file name is empty on $fullFilePath") unless defined $bareFileName and $bareFileName !~ /^\s*$/;
|
||||
my $fileName = $upiFull . '-' . $GROUP_NAMES_BY_DIR{$GROUP} . '-' . $bareFileName;
|
||||
my $copiedFile = catfile($numberedOutputDir, $fileName);
|
||||
copy($fullFilePath, $copiedFile)
|
||||
or die "unable to copy($fullFilePath, catfile($numberedOutputDir, $fileName))";
|
||||
system('/usr/bin/unix2dos', '-q', $copiedFile) if (-T $copiedFile);
|
||||
die "unable to copy $fullFilePath to $copiedFile" unless -f $copiedFile;
|
||||
if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGED') {
|
||||
push(@CSV_OUTPUT_ROWS, [ $upiFull, "", "", $fileName, "N/A", "", "", "", "", "", $GROUP ]);
|
||||
} else {
|
||||
push(@CSV_OUTPUT_ROWS, [ $upiFull, $fileName, uc($rfp), $GROUP_NAMES_BY_DIR{$GROUP} ]);
|
||||
}
|
||||
|
||||
} else {
|
||||
die("\"$fullFilePath\" is a strange file type, not handled!");
|
||||
}
|
||||
|
@ -121,23 +139,41 @@ sub ProcessMailDir($$$) {
|
|||
next if -d $file; # skip directories
|
||||
my $msgFile = catfile($dir, $file);
|
||||
open(my $msgFH, "<", $msgFile);
|
||||
my $upiFull = sprintf("UPI-SFC-%07d", $upiCurrentNum++);
|
||||
my $upiFull = NextUPI();
|
||||
my $header = new Mail::Header($msgFH);
|
||||
my $fields = $header->header_hashref;
|
||||
my %parsed = (FromName => '', ToName => '', FromAddr => "", ToAddr => "", CcName => '', CcAddr => '', 'Subject' => '',
|
||||
my %parsed = (FromName => '', ToName => '', FromAddr => "", ToAddr => "", CCName => '', CCAddr => '', 'Subject' => '',
|
||||
Date => '');
|
||||
foreach my $fieldName (qw/From To Cc Subject Date/) {
|
||||
use Data::Dumper;
|
||||
foreach my $fieldName (qw/From To CC Cc Subject Date/) {
|
||||
foreach my $item (@{$fields->{$fieldName}}) {
|
||||
chomp $item;
|
||||
if ($fieldName =~ /From|To|Cc/) {
|
||||
my $addr = Email::Address::XS->parse($item);
|
||||
if ($addr->name ne "") {
|
||||
$parsed{"${fieldName}Name"} .= "; " if $parsed{"${fieldName}Name"} !~ /^\s*$/;
|
||||
$parsed{"${fieldName}Name"} .= $addr->name;
|
||||
}
|
||||
if ($addr->address ne "") {
|
||||
$parsed{"${fieldName}Addr"} .= "; " if $parsed{"${fieldName}Addr"} !~ /^\s*$/;
|
||||
$parsed{"${fieldName}Addr"} .= $addr->address;
|
||||
if ($fieldName =~ /From|To|CC/i) {
|
||||
my @groups = parse_email_groups($item);
|
||||
while ( my($groupName, $addrListRef) = each @groups) {
|
||||
if (defined $groupName and $groupName !~ /^[01\s*]$/) {
|
||||
$parsed{"${fieldName}Name"} .= "; " if $parsed{"${fieldName}Name"} !~ /^\s*$/;
|
||||
$parsed{"${fieldName}Name"} .= $groupName;
|
||||
}
|
||||
if (not ref $addrListRef) {
|
||||
if (defined $addrListRef and $addrListRef !~ /^\s*$/) {
|
||||
$parsed{"${fieldName}Name"} .= "; " if $parsed{"${fieldName}Name"} !~ /^\s*$/;
|
||||
$parsed{"${fieldName}Name"} .= $addrListRef;
|
||||
}
|
||||
} else {
|
||||
foreach my $addr (@$addrListRef) {
|
||||
my $name = $addr->name;
|
||||
my $address = $addr->address;
|
||||
if (defined $name and $name !~ /^\s*$/) {
|
||||
$parsed{"${fieldName}Name"} .= "; " if $parsed{"${fieldName}Name"} !~ /^\s*$/;
|
||||
$parsed{"${fieldName}Name"} .= $name;
|
||||
}
|
||||
if (defined $address and $address !~ /^\s*$/) {
|
||||
$parsed{"${fieldName}Addr"} .= "; " if $parsed{"${fieldName}Addr"} !~ /^\s*$/;
|
||||
$parsed{"${fieldName}Addr"} .= $address;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} elsif ($fieldName eq 'Date' and $parsed{Date} =~ /^\s*$/) {
|
||||
$parsed{Date} = ParseDate($item);
|
||||
|
@ -155,15 +191,17 @@ sub ProcessMailDir($$$) {
|
|||
my $fileName = $upiFull . '-' . $GROUP_NAMES_BY_DIR{$GROUP} . '-' .
|
||||
UnixDate($parsed{Date}, '%Y%m%d-%H%M-') . $subjectDashes . '.eml';
|
||||
die "$fileName has no subject" if not defined $parsed{Subject};
|
||||
if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGE') {
|
||||
if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGED') {
|
||||
my $dateFormatted = UnixDate($parsed{Date}, "%D");
|
||||
$dateFormatted = "N/A" if not defined $dateFormatted or $dateFormatted =~ /^\s*$/;
|
||||
push(@CSV_OUTPUT_ROWS, [ $upiFull, $parsed{FromName}, $parsed{FromAddr}, $parsed{Subject},
|
||||
UnixDate("%D", $parsed{Date}), $parsed{ToName}, $parsed{ToAddr},
|
||||
$parsed{CcName}, $parsed{CcAddr}, "", $GROUP ]);
|
||||
$dateFormatted, $parsed{ToName}, $parsed{ToAddr},
|
||||
$parsed{CCName}, $parsed{CCAddr}, "", $GROUP ]);
|
||||
} else {
|
||||
push(@CSV_OUTPUT_ROWS, [ $upiFull, $fileName, uc($rfp), $GROUP ]);
|
||||
push(@CSV_OUTPUT_ROWS, [ $upiFull, $fileName, uc($rfp), $GROUP_NAMES_BY_DIR{$GROUP} ]);
|
||||
}
|
||||
my $copiedFile = catfile($outputDir, $fileName);
|
||||
copy($msgFile, $copiedFile);
|
||||
copy($msgFile, $copiedFile) or die "unable to copy($msgFile, $copiedFile)";
|
||||
system('/usr/bin/unix2dos', '-q', $copiedFile);
|
||||
die "unable to copy $msgFile to $copiedFile" unless -f $copiedFile;
|
||||
}
|
||||
|
@ -187,19 +225,29 @@ while (my $rfp = readdir $topDH) {
|
|||
my $typeDirName = catfile($INPUT_TOPLEVEL_DIR, $rfp, $bucketName, $typeName);
|
||||
die "regular file found where we expected a type in $typeName" unless -d $typeDirName;
|
||||
my($native, $numbered) = ('produce-native', 'produce-numbered');
|
||||
if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGE') {
|
||||
if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGED') {
|
||||
($native, $numbered) = ('priv-native', 'priv-numbered');
|
||||
}
|
||||
my $nativeOutputDirOneUp = File::Spec->rel2abs(catfile($OUTPUT_TOPLEVEL_DIR, $native, $rfp, $bucketName));
|
||||
my $numberedOutputDir = File::Spec->rel2abs(catfile($OUTPUT_TOPLEVEL_DIR, $numbered, $rfp, $bucketName, $typeName));
|
||||
make_path($nativeOutputDirOneUp, { mode => 0755 });
|
||||
make_path($numberedOutputDir, { mode => 0755 });
|
||||
unless (-d $nativeOutputDirOneUp) {
|
||||
make_path($nativeOutputDirOneUp, { mode => 0755 }) or die "unable to create path $nativeOutputDirOneUp: $!";
|
||||
}
|
||||
unless (-d $numberedOutputDir) {
|
||||
make_path($numberedOutputDir, { mode => 0755 }) or die "unable to create path $numberedOutputDir: $!";
|
||||
}
|
||||
my $destDir = catfile($nativeOutputDirOneUp, $typeName);
|
||||
if ($typeName =~ /email/i) {
|
||||
ProcessMailDir($rfp, $typeDirName, $numberedOutputDir);
|
||||
move($typeDirName, $nativeOutputDirOneUp);
|
||||
} else {
|
||||
ProcessDocumentDirectory($rfp, $typeDirName, $numberedOutputDir);
|
||||
}
|
||||
die "cannot move to the directory we want this in" unless -d $nativeOutputDirOneUp;
|
||||
rename($typeDirName, $destDir) or die "unable to move $typeDirName to $destDir: $!";
|
||||
# move($typeDirName, $nativeOutputDirOneUp) or die "unable to move($typeDirName, $nativeOutputDirOneUp)";
|
||||
|
||||
# Note: the above doesn't atually rename the directory from one place
|
||||
# to another; it moves the file contents into the destination directory. IOW, File::Copy->move() doesn't have POSIX mv
|
||||
}
|
||||
closedir $bucketDH;
|
||||
}
|
||||
|
@ -216,11 +264,6 @@ print STDERR "$GROUP ($GROUP_NAMES_BY_DIR{$GROUP}) starts at $upiStart and ends
|
|||
open($upiFH, ">", $upiNumberFile);
|
||||
print $upiFH ++$upiCurrentNum, "\n";
|
||||
close $upiFH;
|
||||
|
||||
#make_path(, {
|
||||
# verbose => 1,
|
||||
# mode => 0755,
|
||||
#});
|
||||
###############################################################################
|
||||
#
|
||||
# Local variables:
|
||||
|
|
Loading…
Reference in a new issue