Compare commits
10 commits
34e24a8a31
...
3f4a63dd3f
Author | SHA1 | Date | |
---|---|---|---|
|
3f4a63dd3f | ||
|
db9a80723f | ||
|
347e0d3113 | ||
|
cdcf26f8fa | ||
|
ea04654b7f | ||
|
3f0716c9f0 | ||
|
9da387af88 | ||
|
c2cadeaa6a | ||
|
bf0fd2ab75 | ||
|
34b6ba4cb8 |
1 changed files with 74 additions and 31 deletions
|
@ -21,14 +21,14 @@ use File::Spec::Functions;
|
||||||
use File::Spec;
|
use File::Spec;
|
||||||
use File::Path qw(make_path);
|
use File::Path qw(make_path);
|
||||||
use Mail::Header;
|
use Mail::Header;
|
||||||
use Email::Address::XS;
|
use Email::Address::XS qw(parse_email_groups);
|
||||||
use File::Copy;
|
use File::Copy;
|
||||||
use Date::Manip::DM6 qw(ParseDate UnixDate);
|
use Date::Manip::DM6 qw(ParseDate UnixDate);
|
||||||
use Text::CSV; # libtext-csv-perl in Debian
|
use Text::CSV; # libtext-csv-perl in Debian
|
||||||
use Encode qw/encode decode/;
|
use Encode qw/encode decode/;
|
||||||
|
|
||||||
my %GROUP_NAMES_BY_DIR = ( confidential => 'CONFIDENTIAL', privilege => 'PRIVILEGE', privileged => 'PRIVILEGE',
|
my %GROUP_NAMES_BY_DIR = ( confidential => 'CONFIDENTIAL', privilege => 'PRIVILEGED', privileged => 'PRIVILEGED',
|
||||||
'journalist-privilege' => 'PRIVILEGE' );
|
'journalist-privilege' => 'PRIVILEGED' );
|
||||||
|
|
||||||
sub UsageAndExit($) {
|
sub UsageAndExit($) {
|
||||||
print STDERR "usage: $0 --inputToplevelDir=/path/to/inputdir --outputToplevelDir=/path/to/outputdir --group=group [ --verbose=N ]\n";
|
print STDERR "usage: $0 --inputToplevelDir=/path/to/inputdir --outputToplevelDir=/path/to/outputdir --group=group [ --verbose=N ]\n";
|
||||||
|
@ -70,6 +70,7 @@ close $upiFH;
|
||||||
UsageAndExit("Error reading \"$upiNumberFile\"") unless $count == 1 and $upiStart > 0;
|
UsageAndExit("Error reading \"$upiNumberFile\"") unless $count == 1 and $upiStart > 0;
|
||||||
|
|
||||||
my $upiCurrentNum = $upiStart;
|
my $upiCurrentNum = $upiStart;
|
||||||
|
sub NextUPI () { return sprintf("UPI-SFC-%07d", $upiCurrentNum++); }
|
||||||
|
|
||||||
my $csvOutFormat = Text::CSV->new({ binary => 1, always_quote => 1, quote_empty => 1, blank_is_undef => 1});
|
my $csvOutFormat = Text::CSV->new({ binary => 1, always_quote => 1, quote_empty => 1, blank_is_undef => 1});
|
||||||
my $csvLogFile = File::Spec->rel2abs(catfile($OUTPUT_TOPLEVEL_DIR, "${GROUP}-log.csv"));
|
my $csvLogFile = File::Spec->rel2abs(catfile($OUTPUT_TOPLEVEL_DIR, "${GROUP}-log.csv"));
|
||||||
|
@ -79,7 +80,7 @@ UsageAndExit("\"$csvLogFile\" cannot already exist! Do not attempt to number th
|
||||||
my @headerFields = ('UNIQUE PRODUCTION IDENTIFER (UPI) #', 'FILE NAME', 'RFP # TO WHICH FILE IS RESPONSIVE',
|
my @headerFields = ('UNIQUE PRODUCTION IDENTIFER (UPI) #', 'FILE NAME', 'RFP # TO WHICH FILE IS RESPONSIVE',
|
||||||
'PROTECTIVE ORDER CATEGORY');
|
'PROTECTIVE ORDER CATEGORY');
|
||||||
|
|
||||||
if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGE') {
|
if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGED') {
|
||||||
@headerFields = ('UNIQUE PRODUCTION IDENTIFER (UPI) #', 'FROM NAME', 'FROM ADDRESS',
|
@headerFields = ('UNIQUE PRODUCTION IDENTIFER (UPI) #', 'FROM NAME', 'FROM ADDRESS',
|
||||||
'SUBJECT MATTER', 'SUBMIT DATE', 'TO NAME', 'TO ADDRESS', 'CC NAME', 'CC ADDRESS', 'BCC ADDRESS',
|
'SUBJECT MATTER', 'SUBMIT DATE', 'TO NAME', 'TO ADDRESS', 'CC NAME', 'CC ADDRESS', 'BCC ADDRESS',
|
||||||
'PRIVILEGE CLAIMED');
|
'PRIVILEGE CLAIMED');
|
||||||
|
@ -99,7 +100,24 @@ sub ProcessDocumentDirectory($$$) {
|
||||||
if (-d $fullFilePath) {
|
if (-d $fullFilePath) {
|
||||||
ProcessDocumentDirectory($rfp, $fullFilePath, catfile($numberedOutputDir, $file));
|
ProcessDocumentDirectory($rfp, $fullFilePath, catfile($numberedOutputDir, $file));
|
||||||
} elsif (-f $fullFilePath) {
|
} elsif (-f $fullFilePath) {
|
||||||
print " mv $fullFilePath ", catfile($numberedOutputDir, $file), "\n";
|
my $upiFull = NextUPI();
|
||||||
|
unless (-d $numberedOutputDir) {
|
||||||
|
make_path($numberedOutputDir, { mode => 0755 }) or die "unable to make directory $numberedOutputDir: $!";
|
||||||
|
}
|
||||||
|
my($volume, $directories, $bareFileName) = File::Spec->splitpath($fullFilePath);
|
||||||
|
die("Something wrong, since file name is empty on $fullFilePath") unless defined $bareFileName and $bareFileName !~ /^\s*$/;
|
||||||
|
my $fileName = $upiFull . '-' . $GROUP_NAMES_BY_DIR{$GROUP} . '-' . $bareFileName;
|
||||||
|
my $copiedFile = catfile($numberedOutputDir, $fileName);
|
||||||
|
copy($fullFilePath, $copiedFile)
|
||||||
|
or die "unable to copy($fullFilePath, catfile($numberedOutputDir, $fileName))";
|
||||||
|
system('/usr/bin/unix2dos', '-q', $copiedFile) if (-T $copiedFile);
|
||||||
|
die "unable to copy $fullFilePath to $copiedFile" unless -f $copiedFile;
|
||||||
|
if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGED') {
|
||||||
|
push(@CSV_OUTPUT_ROWS, [ $upiFull, "", "", $fileName, "N/A", "", "", "", "", "", $GROUP ]);
|
||||||
|
} else {
|
||||||
|
push(@CSV_OUTPUT_ROWS, [ $upiFull, $fileName, uc($rfp), $GROUP_NAMES_BY_DIR{$GROUP} ]);
|
||||||
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
die("\"$fullFilePath\" is a strange file type, not handled!");
|
die("\"$fullFilePath\" is a strange file type, not handled!");
|
||||||
}
|
}
|
||||||
|
@ -121,23 +139,41 @@ sub ProcessMailDir($$$) {
|
||||||
next if -d $file; # skip directories
|
next if -d $file; # skip directories
|
||||||
my $msgFile = catfile($dir, $file);
|
my $msgFile = catfile($dir, $file);
|
||||||
open(my $msgFH, "<", $msgFile);
|
open(my $msgFH, "<", $msgFile);
|
||||||
my $upiFull = sprintf("UPI-SFC-%07d", $upiCurrentNum++);
|
my $upiFull = NextUPI();
|
||||||
my $header = new Mail::Header($msgFH);
|
my $header = new Mail::Header($msgFH);
|
||||||
my $fields = $header->header_hashref;
|
my $fields = $header->header_hashref;
|
||||||
my %parsed = (FromName => '', ToName => '', FromAddr => "", ToAddr => "", CcName => '', CcAddr => '', 'Subject' => '',
|
my %parsed = (FromName => '', ToName => '', FromAddr => "", ToAddr => "", CCName => '', CCAddr => '', 'Subject' => '',
|
||||||
Date => '');
|
Date => '');
|
||||||
foreach my $fieldName (qw/From To Cc Subject Date/) {
|
use Data::Dumper;
|
||||||
|
foreach my $fieldName (qw/From To CC Cc Subject Date/) {
|
||||||
foreach my $item (@{$fields->{$fieldName}}) {
|
foreach my $item (@{$fields->{$fieldName}}) {
|
||||||
chomp $item;
|
chomp $item;
|
||||||
if ($fieldName =~ /From|To|Cc/) {
|
if ($fieldName =~ /From|To|CC/i) {
|
||||||
my $addr = Email::Address::XS->parse($item);
|
my @groups = parse_email_groups($item);
|
||||||
if ($addr->name ne "") {
|
while ( my($groupName, $addrListRef) = each @groups) {
|
||||||
|
if (defined $groupName and $groupName !~ /^[01\s*]$/) {
|
||||||
$parsed{"${fieldName}Name"} .= "; " if $parsed{"${fieldName}Name"} !~ /^\s*$/;
|
$parsed{"${fieldName}Name"} .= "; " if $parsed{"${fieldName}Name"} !~ /^\s*$/;
|
||||||
$parsed{"${fieldName}Name"} .= $addr->name;
|
$parsed{"${fieldName}Name"} .= $groupName;
|
||||||
}
|
}
|
||||||
if ($addr->address ne "") {
|
if (not ref $addrListRef) {
|
||||||
|
if (defined $addrListRef and $addrListRef !~ /^\s*$/) {
|
||||||
|
$parsed{"${fieldName}Name"} .= "; " if $parsed{"${fieldName}Name"} !~ /^\s*$/;
|
||||||
|
$parsed{"${fieldName}Name"} .= $addrListRef;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
foreach my $addr (@$addrListRef) {
|
||||||
|
my $name = $addr->name;
|
||||||
|
my $address = $addr->address;
|
||||||
|
if (defined $name and $name !~ /^\s*$/) {
|
||||||
|
$parsed{"${fieldName}Name"} .= "; " if $parsed{"${fieldName}Name"} !~ /^\s*$/;
|
||||||
|
$parsed{"${fieldName}Name"} .= $name;
|
||||||
|
}
|
||||||
|
if (defined $address and $address !~ /^\s*$/) {
|
||||||
$parsed{"${fieldName}Addr"} .= "; " if $parsed{"${fieldName}Addr"} !~ /^\s*$/;
|
$parsed{"${fieldName}Addr"} .= "; " if $parsed{"${fieldName}Addr"} !~ /^\s*$/;
|
||||||
$parsed{"${fieldName}Addr"} .= $addr->address;
|
$parsed{"${fieldName}Addr"} .= $address;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} elsif ($fieldName eq 'Date' and $parsed{Date} =~ /^\s*$/) {
|
} elsif ($fieldName eq 'Date' and $parsed{Date} =~ /^\s*$/) {
|
||||||
$parsed{Date} = ParseDate($item);
|
$parsed{Date} = ParseDate($item);
|
||||||
|
@ -155,15 +191,17 @@ sub ProcessMailDir($$$) {
|
||||||
my $fileName = $upiFull . '-' . $GROUP_NAMES_BY_DIR{$GROUP} . '-' .
|
my $fileName = $upiFull . '-' . $GROUP_NAMES_BY_DIR{$GROUP} . '-' .
|
||||||
UnixDate($parsed{Date}, '%Y%m%d-%H%M-') . $subjectDashes . '.eml';
|
UnixDate($parsed{Date}, '%Y%m%d-%H%M-') . $subjectDashes . '.eml';
|
||||||
die "$fileName has no subject" if not defined $parsed{Subject};
|
die "$fileName has no subject" if not defined $parsed{Subject};
|
||||||
if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGE') {
|
if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGED') {
|
||||||
|
my $dateFormatted = UnixDate($parsed{Date}, "%D");
|
||||||
|
$dateFormatted = "N/A" if not defined $dateFormatted or $dateFormatted =~ /^\s*$/;
|
||||||
push(@CSV_OUTPUT_ROWS, [ $upiFull, $parsed{FromName}, $parsed{FromAddr}, $parsed{Subject},
|
push(@CSV_OUTPUT_ROWS, [ $upiFull, $parsed{FromName}, $parsed{FromAddr}, $parsed{Subject},
|
||||||
UnixDate("%D", $parsed{Date}), $parsed{ToName}, $parsed{ToAddr},
|
$dateFormatted, $parsed{ToName}, $parsed{ToAddr},
|
||||||
$parsed{CcName}, $parsed{CcAddr}, "", $GROUP ]);
|
$parsed{CCName}, $parsed{CCAddr}, "", $GROUP ]);
|
||||||
} else {
|
} else {
|
||||||
push(@CSV_OUTPUT_ROWS, [ $upiFull, $fileName, uc($rfp), $GROUP ]);
|
push(@CSV_OUTPUT_ROWS, [ $upiFull, $fileName, uc($rfp), $GROUP_NAMES_BY_DIR{$GROUP} ]);
|
||||||
}
|
}
|
||||||
my $copiedFile = catfile($outputDir, $fileName);
|
my $copiedFile = catfile($outputDir, $fileName);
|
||||||
copy($msgFile, $copiedFile);
|
copy($msgFile, $copiedFile) or die "unable to copy($msgFile, $copiedFile)";
|
||||||
system('/usr/bin/unix2dos', '-q', $copiedFile);
|
system('/usr/bin/unix2dos', '-q', $copiedFile);
|
||||||
die "unable to copy $msgFile to $copiedFile" unless -f $copiedFile;
|
die "unable to copy $msgFile to $copiedFile" unless -f $copiedFile;
|
||||||
}
|
}
|
||||||
|
@ -187,19 +225,29 @@ while (my $rfp = readdir $topDH) {
|
||||||
my $typeDirName = catfile($INPUT_TOPLEVEL_DIR, $rfp, $bucketName, $typeName);
|
my $typeDirName = catfile($INPUT_TOPLEVEL_DIR, $rfp, $bucketName, $typeName);
|
||||||
die "regular file found where we expected a type in $typeName" unless -d $typeDirName;
|
die "regular file found where we expected a type in $typeName" unless -d $typeDirName;
|
||||||
my($native, $numbered) = ('produce-native', 'produce-numbered');
|
my($native, $numbered) = ('produce-native', 'produce-numbered');
|
||||||
if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGE') {
|
if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGED') {
|
||||||
($native, $numbered) = ('priv-native', 'priv-numbered');
|
($native, $numbered) = ('priv-native', 'priv-numbered');
|
||||||
}
|
}
|
||||||
my $nativeOutputDirOneUp = File::Spec->rel2abs(catfile($OUTPUT_TOPLEVEL_DIR, $native, $rfp, $bucketName));
|
my $nativeOutputDirOneUp = File::Spec->rel2abs(catfile($OUTPUT_TOPLEVEL_DIR, $native, $rfp, $bucketName));
|
||||||
my $numberedOutputDir = File::Spec->rel2abs(catfile($OUTPUT_TOPLEVEL_DIR, $numbered, $rfp, $bucketName, $typeName));
|
my $numberedOutputDir = File::Spec->rel2abs(catfile($OUTPUT_TOPLEVEL_DIR, $numbered, $rfp, $bucketName, $typeName));
|
||||||
make_path($nativeOutputDirOneUp, { mode => 0755 });
|
unless (-d $nativeOutputDirOneUp) {
|
||||||
make_path($numberedOutputDir, { mode => 0755 });
|
make_path($nativeOutputDirOneUp, { mode => 0755 }) or die "unable to create path $nativeOutputDirOneUp: $!";
|
||||||
|
}
|
||||||
|
unless (-d $numberedOutputDir) {
|
||||||
|
make_path($numberedOutputDir, { mode => 0755 }) or die "unable to create path $numberedOutputDir: $!";
|
||||||
|
}
|
||||||
|
my $destDir = catfile($nativeOutputDirOneUp, $typeName);
|
||||||
if ($typeName =~ /email/i) {
|
if ($typeName =~ /email/i) {
|
||||||
ProcessMailDir($rfp, $typeDirName, $numberedOutputDir);
|
ProcessMailDir($rfp, $typeDirName, $numberedOutputDir);
|
||||||
move($typeDirName, $nativeOutputDirOneUp);
|
|
||||||
} else {
|
} else {
|
||||||
ProcessDocumentDirectory($rfp, $typeDirName, $numberedOutputDir);
|
ProcessDocumentDirectory($rfp, $typeDirName, $numberedOutputDir);
|
||||||
}
|
}
|
||||||
|
die "cannot move to the directory we want this in" unless -d $nativeOutputDirOneUp;
|
||||||
|
rename($typeDirName, $destDir) or die "unable to move $typeDirName to $destDir: $!";
|
||||||
|
# move($typeDirName, $nativeOutputDirOneUp) or die "unable to move($typeDirName, $nativeOutputDirOneUp)";
|
||||||
|
|
||||||
|
# Note: the above doesn't atually rename the directory from one place
|
||||||
|
# to another; it moves the file contents into the destination directory. IOW, File::Copy->move() doesn't have POSIX mv
|
||||||
}
|
}
|
||||||
closedir $bucketDH;
|
closedir $bucketDH;
|
||||||
}
|
}
|
||||||
|
@ -216,11 +264,6 @@ print STDERR "$GROUP ($GROUP_NAMES_BY_DIR{$GROUP}) starts at $upiStart and ends
|
||||||
open($upiFH, ">", $upiNumberFile);
|
open($upiFH, ">", $upiNumberFile);
|
||||||
print $upiFH ++$upiCurrentNum, "\n";
|
print $upiFH ++$upiCurrentNum, "\n";
|
||||||
close $upiFH;
|
close $upiFH;
|
||||||
|
|
||||||
#make_path(, {
|
|
||||||
# verbose => 1,
|
|
||||||
# mode => 0755,
|
|
||||||
#});
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
#
|
#
|
||||||
# Local variables:
|
# Local variables:
|
||||||
|
|
Loading…
Reference in a new issue