diff --git a/build-label-upi-number.plx b/build-label-upi-number.plx index 9d0a484..f585c0e 100755 --- a/build-label-upi-number.plx +++ b/build-label-upi-number.plx @@ -8,10 +8,14 @@ # native is where things are just moved over at the end # numbered is where the numbered items go +use utf8; + use strict; use warnings; use autodie qw(:all); +use open qw/ :std :encoding(utf-8) /; + use Getopt::Long; use File::Spec::Functions; use File::Spec; @@ -19,6 +23,8 @@ use File::Path qw(make_path); use Mail::Header; use Email::Address::XS; use File::Copy; +use Date::Manip::DM6 qw(ParseDate UnixDate); +use Text::CSV; # libtext-csv-perl in Debian my %GROUP_NAMES_BY_DIR = ( confidential => 'CONFIDENTIAL', privilege => 'PRIVILEGE', privileged => 'PRIVILEGE', 'journalist-privilege' => 'PRIVILEGE' ); @@ -64,8 +70,23 @@ UsageAndExit("Error reading \"$upiNumberFile\"") unless $count == 1 and $upiStar my $upiCurrentNum = $upiStart; -sub ProcessMailDir($$) { - my($inputMailDir, $destOutDir) = @_; +my $csvOutFormat = Text::CSV->new({ binary => 1, always_quote => 1, quote_empty => 1, blank_is_undef => 1}); +my $csvLogFile = File::Spec->rel2abs(catfile($OUTPUT_TOPLEVEL_DIR, "${GROUP}-log.csv")); +UsageAndExit("\"$csvLogFile\" cannot already exist! Do not attempt to number the same group twice!") + if (-e $csvLogFile); + +my @headerFields = ('UNIQUE PRODUCTION IDENTIFER (UPI) #', 'FILE NAME', 'RFP # TO WHICH FILE IS RESPONSIVE'); + +if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGE') { + @headerFields = ('UNIQUE PRODUCTION IDENTIFER (UPI) #', 'FROM NAME', 'FROM ADDRESS', + 'SUBJECT MATTER', 'SUBMIT DATE', 'TO NAME', 'TO ADDRESS', 'CC NAME', 'CC ADDRESS', 'BCC ADDRESS', + 'PRIVILEGE CLAIMED'); +} + +my @CSV_OUTPUT_ROWS; + +sub ProcessMailDir($$$) { + my($rfp, $inputMailDir, $destOutDir) = @_; my @msgDirs = (catfile($inputMailDir, 'new'), catfile($inputMailDir, 'cur')); foreach my $dir (@msgDirs) { @@ -78,16 +99,48 @@ sub ProcessMailDir($$) { next if -d $file; # skip directories my $msgFile = catfile($dir, $file); open(my $msgFH, "<", $msgFile); + my $upiFull = sprintf("UPI-SFC-%07d", ++$upiCurrentNum); print " $msgFile\n"; my $header = new Mail::Header($msgFH); my $fields = $header->header_hashref; + my %parsed = (FromName => '', ToName => '', FromAddr => "", ToAddr => "", CcName => '', CcAddr => '', 'Subject' => '', + Date => ''); foreach my $fieldName (qw/From To Cc Subject Date/) { foreach my $item (@{$fields->{$fieldName}}) { chomp $item; - print " $fieldName $item\n"; + if ($fieldName =~ /From|To|Cc/) { + my $addr = Email::Address::XS->parse($item); + if ($addr->name ne "") { + $parsed{"${fieldName}Name"} .= "; " if $parsed{"${fieldName}Name"} !~ /^\s*$/; + $parsed{"${fieldName}Name"} .= $addr->name; + } + if ($addr->address ne "") { + $parsed{"${fieldName}Addr"} .= "; " if $parsed{"${fieldName}Addr"} !~ /^\s*$/; + $parsed{"${fieldName}Addr"} .= $addr->address; + } + } elsif ($fieldName eq 'Date' and $parsed{Date} =~ /^\s*$/) { + $parsed{Date} = ParseDate($item); + die "$item is in the date field but doesn't parse" unless defined $parsed{Date} and $parsed{Date} !~ /^\s*$/; + } else { + $parsed{$fieldName} = $item; + } } } close $msgFH; + $parsed{Subject} =~ s/[\r\n]/ /gm; + my $subjectDashes = $parsed{Subject}; + $subjectDashes =~ s/[^a-zA-Z0-9]/_/g; $subjectDashes =~ s/_+/_/g; $subjectDashes =~ s/_+$//g; $subjectDashes =~ s/^_+//g; + my $fileName = $upiFull . '-' . $GROUP_NAMES_BY_DIR{$GROUP} . '-' . + UnixDate($parsed{Date}, '%Y%m%d-%H%M-') . $subjectDashes . '.eml'; + print "$fileName\n"; + die "$fileName has no subject" if not defined $parsed{Subject}; + if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGE') { + push(@CSV_OUTPUT_ROWS, [ $upiFull, $parsed{FromName}, $parsed{FromAddr}, $parsed{Subject}, + UnixDate("%D", $parsed{Date}), $parsed{ToName}, $parsed{ToAddr}, + $parsed{CcName}, $parsed{CcAddr}, "", $GROUP ]); + } else { + push(@CSV_OUTPUT_ROWS, [ $upiFull, $fileName, uc($rfp) ]); + } } closedir $mailDirFH; } @@ -109,7 +162,7 @@ while (my $rfp = readdir $topDH) { die "regular file found where we expected a type in $typeName" unless -d $typeDirName; print " $typeDirName\n"; if ($typeName =~ /email/i) { - ProcessMailDir($typeDirName, ""); + ProcessMailDir($rfp, $typeDirName, ""); } } closedir $bucketDH; @@ -118,6 +171,15 @@ while (my $rfp = readdir $topDH) { } closedir $topDH; +open my $csvFH, ">:encoding(utf8)", $csvLogFile; + +$csvOutFormat->say($csvFH, \@headerFields); +$csvOutFormat->say($csvFH, $_) for @CSV_OUTPUT_ROWS; close $csvFH; + +open($upiFH, ">", $upiNumberFile); +print $upiFH ++$upiCurrentNum, "\n"; +close $upiFH; + #make_path(, { # verbose => 1, # mode => 0755,