Compare commits

..

No commits in common. "3f4a63dd3f8f1105193a22ae84ec52b4a0f4623b" and "34e24a8a31a3c027febfda39cfa4b6b517422dac" have entirely different histories.

View file

@ -21,14 +21,14 @@ use File::Spec::Functions;
use File::Spec; use File::Spec;
use File::Path qw(make_path); use File::Path qw(make_path);
use Mail::Header; use Mail::Header;
use Email::Address::XS qw(parse_email_groups); use Email::Address::XS;
use File::Copy; use File::Copy;
use Date::Manip::DM6 qw(ParseDate UnixDate); use Date::Manip::DM6 qw(ParseDate UnixDate);
use Text::CSV; # libtext-csv-perl in Debian use Text::CSV; # libtext-csv-perl in Debian
use Encode qw/encode decode/; use Encode qw/encode decode/;
my %GROUP_NAMES_BY_DIR = ( confidential => 'CONFIDENTIAL', privilege => 'PRIVILEGED', privileged => 'PRIVILEGED', my %GROUP_NAMES_BY_DIR = ( confidential => 'CONFIDENTIAL', privilege => 'PRIVILEGE', privileged => 'PRIVILEGE',
'journalist-privilege' => 'PRIVILEGED' ); 'journalist-privilege' => 'PRIVILEGE' );
sub UsageAndExit($) { sub UsageAndExit($) {
print STDERR "usage: $0 --inputToplevelDir=/path/to/inputdir --outputToplevelDir=/path/to/outputdir --group=group [ --verbose=N ]\n"; print STDERR "usage: $0 --inputToplevelDir=/path/to/inputdir --outputToplevelDir=/path/to/outputdir --group=group [ --verbose=N ]\n";
@ -70,7 +70,6 @@ close $upiFH;
UsageAndExit("Error reading \"$upiNumberFile\"") unless $count == 1 and $upiStart > 0; UsageAndExit("Error reading \"$upiNumberFile\"") unless $count == 1 and $upiStart > 0;
my $upiCurrentNum = $upiStart; my $upiCurrentNum = $upiStart;
sub NextUPI () { return sprintf("UPI-SFC-%07d", $upiCurrentNum++); }
my $csvOutFormat = Text::CSV->new({ binary => 1, always_quote => 1, quote_empty => 1, blank_is_undef => 1}); my $csvOutFormat = Text::CSV->new({ binary => 1, always_quote => 1, quote_empty => 1, blank_is_undef => 1});
my $csvLogFile = File::Spec->rel2abs(catfile($OUTPUT_TOPLEVEL_DIR, "${GROUP}-log.csv")); my $csvLogFile = File::Spec->rel2abs(catfile($OUTPUT_TOPLEVEL_DIR, "${GROUP}-log.csv"));
@ -80,7 +79,7 @@ UsageAndExit("\"$csvLogFile\" cannot already exist! Do not attempt to number th
my @headerFields = ('UNIQUE PRODUCTION IDENTIFER (UPI) #', 'FILE NAME', 'RFP # TO WHICH FILE IS RESPONSIVE', my @headerFields = ('UNIQUE PRODUCTION IDENTIFER (UPI) #', 'FILE NAME', 'RFP # TO WHICH FILE IS RESPONSIVE',
'PROTECTIVE ORDER CATEGORY'); 'PROTECTIVE ORDER CATEGORY');
if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGED') { if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGE') {
@headerFields = ('UNIQUE PRODUCTION IDENTIFER (UPI) #', 'FROM NAME', 'FROM ADDRESS', @headerFields = ('UNIQUE PRODUCTION IDENTIFER (UPI) #', 'FROM NAME', 'FROM ADDRESS',
'SUBJECT MATTER', 'SUBMIT DATE', 'TO NAME', 'TO ADDRESS', 'CC NAME', 'CC ADDRESS', 'BCC ADDRESS', 'SUBJECT MATTER', 'SUBMIT DATE', 'TO NAME', 'TO ADDRESS', 'CC NAME', 'CC ADDRESS', 'BCC ADDRESS',
'PRIVILEGE CLAIMED'); 'PRIVILEGE CLAIMED');
@ -100,24 +99,7 @@ sub ProcessDocumentDirectory($$$) {
if (-d $fullFilePath) { if (-d $fullFilePath) {
ProcessDocumentDirectory($rfp, $fullFilePath, catfile($numberedOutputDir, $file)); ProcessDocumentDirectory($rfp, $fullFilePath, catfile($numberedOutputDir, $file));
} elsif (-f $fullFilePath) { } elsif (-f $fullFilePath) {
my $upiFull = NextUPI(); print " mv $fullFilePath ", catfile($numberedOutputDir, $file), "\n";
unless (-d $numberedOutputDir) {
make_path($numberedOutputDir, { mode => 0755 }) or die "unable to make directory $numberedOutputDir: $!";
}
my($volume, $directories, $bareFileName) = File::Spec->splitpath($fullFilePath);
die("Something wrong, since file name is empty on $fullFilePath") unless defined $bareFileName and $bareFileName !~ /^\s*$/;
my $fileName = $upiFull . '-' . $GROUP_NAMES_BY_DIR{$GROUP} . '-' . $bareFileName;
my $copiedFile = catfile($numberedOutputDir, $fileName);
copy($fullFilePath, $copiedFile)
or die "unable to copy($fullFilePath, catfile($numberedOutputDir, $fileName))";
system('/usr/bin/unix2dos', '-q', $copiedFile) if (-T $copiedFile);
die "unable to copy $fullFilePath to $copiedFile" unless -f $copiedFile;
if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGED') {
push(@CSV_OUTPUT_ROWS, [ $upiFull, "", "", $fileName, "N/A", "", "", "", "", "", $GROUP ]);
} else {
push(@CSV_OUTPUT_ROWS, [ $upiFull, $fileName, uc($rfp), $GROUP_NAMES_BY_DIR{$GROUP} ]);
}
} else { } else {
die("\"$fullFilePath\" is a strange file type, not handled!"); die("\"$fullFilePath\" is a strange file type, not handled!");
} }
@ -139,41 +121,23 @@ sub ProcessMailDir($$$) {
next if -d $file; # skip directories next if -d $file; # skip directories
my $msgFile = catfile($dir, $file); my $msgFile = catfile($dir, $file);
open(my $msgFH, "<", $msgFile); open(my $msgFH, "<", $msgFile);
my $upiFull = NextUPI(); my $upiFull = sprintf("UPI-SFC-%07d", $upiCurrentNum++);
my $header = new Mail::Header($msgFH); my $header = new Mail::Header($msgFH);
my $fields = $header->header_hashref; my $fields = $header->header_hashref;
my %parsed = (FromName => '', ToName => '', FromAddr => "", ToAddr => "", CCName => '', CCAddr => '', 'Subject' => '', my %parsed = (FromName => '', ToName => '', FromAddr => "", ToAddr => "", CcName => '', CcAddr => '', 'Subject' => '',
Date => ''); Date => '');
use Data::Dumper; foreach my $fieldName (qw/From To Cc Subject Date/) {
foreach my $fieldName (qw/From To CC Cc Subject Date/) {
foreach my $item (@{$fields->{$fieldName}}) { foreach my $item (@{$fields->{$fieldName}}) {
chomp $item; chomp $item;
if ($fieldName =~ /From|To|CC/i) { if ($fieldName =~ /From|To|Cc/) {
my @groups = parse_email_groups($item); my $addr = Email::Address::XS->parse($item);
while ( my($groupName, $addrListRef) = each @groups) { if ($addr->name ne "") {
if (defined $groupName and $groupName !~ /^[01\s*]$/) {
$parsed{"${fieldName}Name"} .= "; " if $parsed{"${fieldName}Name"} !~ /^\s*$/; $parsed{"${fieldName}Name"} .= "; " if $parsed{"${fieldName}Name"} !~ /^\s*$/;
$parsed{"${fieldName}Name"} .= $groupName; $parsed{"${fieldName}Name"} .= $addr->name;
} }
if (not ref $addrListRef) { if ($addr->address ne "") {
if (defined $addrListRef and $addrListRef !~ /^\s*$/) {
$parsed{"${fieldName}Name"} .= "; " if $parsed{"${fieldName}Name"} !~ /^\s*$/;
$parsed{"${fieldName}Name"} .= $addrListRef;
}
} else {
foreach my $addr (@$addrListRef) {
my $name = $addr->name;
my $address = $addr->address;
if (defined $name and $name !~ /^\s*$/) {
$parsed{"${fieldName}Name"} .= "; " if $parsed{"${fieldName}Name"} !~ /^\s*$/;
$parsed{"${fieldName}Name"} .= $name;
}
if (defined $address and $address !~ /^\s*$/) {
$parsed{"${fieldName}Addr"} .= "; " if $parsed{"${fieldName}Addr"} !~ /^\s*$/; $parsed{"${fieldName}Addr"} .= "; " if $parsed{"${fieldName}Addr"} !~ /^\s*$/;
$parsed{"${fieldName}Addr"} .= $address; $parsed{"${fieldName}Addr"} .= $addr->address;
}
}
}
} }
} elsif ($fieldName eq 'Date' and $parsed{Date} =~ /^\s*$/) { } elsif ($fieldName eq 'Date' and $parsed{Date} =~ /^\s*$/) {
$parsed{Date} = ParseDate($item); $parsed{Date} = ParseDate($item);
@ -191,17 +155,15 @@ sub ProcessMailDir($$$) {
my $fileName = $upiFull . '-' . $GROUP_NAMES_BY_DIR{$GROUP} . '-' . my $fileName = $upiFull . '-' . $GROUP_NAMES_BY_DIR{$GROUP} . '-' .
UnixDate($parsed{Date}, '%Y%m%d-%H%M-') . $subjectDashes . '.eml'; UnixDate($parsed{Date}, '%Y%m%d-%H%M-') . $subjectDashes . '.eml';
die "$fileName has no subject" if not defined $parsed{Subject}; die "$fileName has no subject" if not defined $parsed{Subject};
if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGED') { if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGE') {
my $dateFormatted = UnixDate($parsed{Date}, "%D");
$dateFormatted = "N/A" if not defined $dateFormatted or $dateFormatted =~ /^\s*$/;
push(@CSV_OUTPUT_ROWS, [ $upiFull, $parsed{FromName}, $parsed{FromAddr}, $parsed{Subject}, push(@CSV_OUTPUT_ROWS, [ $upiFull, $parsed{FromName}, $parsed{FromAddr}, $parsed{Subject},
$dateFormatted, $parsed{ToName}, $parsed{ToAddr}, UnixDate("%D", $parsed{Date}), $parsed{ToName}, $parsed{ToAddr},
$parsed{CCName}, $parsed{CCAddr}, "", $GROUP ]); $parsed{CcName}, $parsed{CcAddr}, "", $GROUP ]);
} else { } else {
push(@CSV_OUTPUT_ROWS, [ $upiFull, $fileName, uc($rfp), $GROUP_NAMES_BY_DIR{$GROUP} ]); push(@CSV_OUTPUT_ROWS, [ $upiFull, $fileName, uc($rfp), $GROUP ]);
} }
my $copiedFile = catfile($outputDir, $fileName); my $copiedFile = catfile($outputDir, $fileName);
copy($msgFile, $copiedFile) or die "unable to copy($msgFile, $copiedFile)"; copy($msgFile, $copiedFile);
system('/usr/bin/unix2dos', '-q', $copiedFile); system('/usr/bin/unix2dos', '-q', $copiedFile);
die "unable to copy $msgFile to $copiedFile" unless -f $copiedFile; die "unable to copy $msgFile to $copiedFile" unless -f $copiedFile;
} }
@ -225,29 +187,19 @@ while (my $rfp = readdir $topDH) {
my $typeDirName = catfile($INPUT_TOPLEVEL_DIR, $rfp, $bucketName, $typeName); my $typeDirName = catfile($INPUT_TOPLEVEL_DIR, $rfp, $bucketName, $typeName);
die "regular file found where we expected a type in $typeName" unless -d $typeDirName; die "regular file found where we expected a type in $typeName" unless -d $typeDirName;
my($native, $numbered) = ('produce-native', 'produce-numbered'); my($native, $numbered) = ('produce-native', 'produce-numbered');
if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGED') { if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGE') {
($native, $numbered) = ('priv-native', 'priv-numbered'); ($native, $numbered) = ('priv-native', 'priv-numbered');
} }
my $nativeOutputDirOneUp = File::Spec->rel2abs(catfile($OUTPUT_TOPLEVEL_DIR, $native, $rfp, $bucketName)); my $nativeOutputDirOneUp = File::Spec->rel2abs(catfile($OUTPUT_TOPLEVEL_DIR, $native, $rfp, $bucketName));
my $numberedOutputDir = File::Spec->rel2abs(catfile($OUTPUT_TOPLEVEL_DIR, $numbered, $rfp, $bucketName, $typeName)); my $numberedOutputDir = File::Spec->rel2abs(catfile($OUTPUT_TOPLEVEL_DIR, $numbered, $rfp, $bucketName, $typeName));
unless (-d $nativeOutputDirOneUp) { make_path($nativeOutputDirOneUp, { mode => 0755 });
make_path($nativeOutputDirOneUp, { mode => 0755 }) or die "unable to create path $nativeOutputDirOneUp: $!"; make_path($numberedOutputDir, { mode => 0755 });
}
unless (-d $numberedOutputDir) {
make_path($numberedOutputDir, { mode => 0755 }) or die "unable to create path $numberedOutputDir: $!";
}
my $destDir = catfile($nativeOutputDirOneUp, $typeName);
if ($typeName =~ /email/i) { if ($typeName =~ /email/i) {
ProcessMailDir($rfp, $typeDirName, $numberedOutputDir); ProcessMailDir($rfp, $typeDirName, $numberedOutputDir);
move($typeDirName, $nativeOutputDirOneUp);
} else { } else {
ProcessDocumentDirectory($rfp, $typeDirName, $numberedOutputDir); ProcessDocumentDirectory($rfp, $typeDirName, $numberedOutputDir);
} }
die "cannot move to the directory we want this in" unless -d $nativeOutputDirOneUp;
rename($typeDirName, $destDir) or die "unable to move $typeDirName to $destDir: $!";
# move($typeDirName, $nativeOutputDirOneUp) or die "unable to move($typeDirName, $nativeOutputDirOneUp)";
# Note: the above doesn't atually rename the directory from one place
# to another; it moves the file contents into the destination directory. IOW, File::Copy->move() doesn't have POSIX mv
} }
closedir $bucketDH; closedir $bucketDH;
} }
@ -264,6 +216,11 @@ print STDERR "$GROUP ($GROUP_NAMES_BY_DIR{$GROUP}) starts at $upiStart and ends
open($upiFH, ">", $upiNumberFile); open($upiFH, ">", $upiNumberFile);
print $upiFH ++$upiCurrentNum, "\n"; print $upiFH ++$upiCurrentNum, "\n";
close $upiFH; close $upiFH;
#make_path(, {
# verbose => 1,
# mode => 0755,
#});
############################################################################### ###############################################################################
# #
# Local variables: # Local variables: