Basic support for creating the CSV files & names of maildir files
This commit is contained in:
		
							parent
							
								
									123dbd6018
								
							
						
					
					
						commit
						4627b4e24d
					
				
					 1 changed files with 66 additions and 4 deletions
				
			
		|  | @ -8,10 +8,14 @@ | ||||||
| #  native is where things are just moved over at the end | #  native is where things are just moved over at the end | ||||||
| #  numbered is where the numbered items go | #  numbered is where the numbered items go | ||||||
| 
 | 
 | ||||||
|  | use utf8; | ||||||
|  | 
 | ||||||
| use strict; | use strict; | ||||||
| use warnings; | use warnings; | ||||||
| use autodie qw(:all); | use autodie qw(:all); | ||||||
| 
 | 
 | ||||||
|  | use open qw/ :std :encoding(utf-8) /; | ||||||
|  | 
 | ||||||
| use Getopt::Long; | use Getopt::Long; | ||||||
| use File::Spec::Functions; | use File::Spec::Functions; | ||||||
| use File::Spec; | use File::Spec; | ||||||
|  | @ -19,6 +23,8 @@ use File::Path qw(make_path); | ||||||
| use Mail::Header; | use Mail::Header; | ||||||
| use Email::Address::XS; | use Email::Address::XS; | ||||||
| use File::Copy; | use File::Copy; | ||||||
|  | use Date::Manip::DM6 qw(ParseDate UnixDate); | ||||||
|  | use Text::CSV;  # libtext-csv-perl in Debian | ||||||
| 
 | 
 | ||||||
| my %GROUP_NAMES_BY_DIR = ( confidential => 'CONFIDENTIAL', privilege => 'PRIVILEGE', privileged => 'PRIVILEGE', | my %GROUP_NAMES_BY_DIR = ( confidential => 'CONFIDENTIAL', privilege => 'PRIVILEGE', privileged => 'PRIVILEGE', | ||||||
|                            'journalist-privilege' => 'PRIVILEGE' ); |                            'journalist-privilege' => 'PRIVILEGE' ); | ||||||
|  | @ -64,8 +70,23 @@ UsageAndExit("Error reading \"$upiNumberFile\"") unless $count == 1 and $upiStar | ||||||
| 
 | 
 | ||||||
| my $upiCurrentNum = $upiStart; | my $upiCurrentNum = $upiStart; | ||||||
| 
 | 
 | ||||||
| sub ProcessMailDir($$) { | my $csvOutFormat = Text::CSV->new({ binary => 1, always_quote => 1, quote_empty => 1, blank_is_undef => 1}); | ||||||
|   my($inputMailDir, $destOutDir) = @_; | my $csvLogFile = File::Spec->rel2abs(catfile($OUTPUT_TOPLEVEL_DIR, "${GROUP}-log.csv")); | ||||||
|  | UsageAndExit("\"$csvLogFile\" cannot already exist!  Do not attempt to number the same group twice!") | ||||||
|  |   if (-e $csvLogFile); | ||||||
|  | 
 | ||||||
|  | my @headerFields = ('UNIQUE PRODUCTION IDENTIFER (UPI) #', 'FILE NAME', 'RFP # TO WHICH FILE IS RESPONSIVE'); | ||||||
|  | 
 | ||||||
|  | if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGE') { | ||||||
|  |   @headerFields = ('UNIQUE PRODUCTION IDENTIFER (UPI) #', 'FROM NAME', 'FROM ADDRESS', | ||||||
|  |                    'SUBJECT MATTER', 'SUBMIT DATE', 'TO NAME', 'TO ADDRESS', 'CC NAME', 'CC ADDRESS', 'BCC ADDRESS', | ||||||
|  |                    'PRIVILEGE CLAIMED'); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | my @CSV_OUTPUT_ROWS; | ||||||
|  | 
 | ||||||
|  | sub ProcessMailDir($$$) { | ||||||
|  |   my($rfp, $inputMailDir, $destOutDir) = @_; | ||||||
| 
 | 
 | ||||||
|   my @msgDirs = (catfile($inputMailDir, 'new'), catfile($inputMailDir, 'cur')); |   my @msgDirs = (catfile($inputMailDir, 'new'), catfile($inputMailDir, 'cur')); | ||||||
|   foreach my $dir (@msgDirs) { |   foreach my $dir (@msgDirs) { | ||||||
|  | @ -78,16 +99,48 @@ sub ProcessMailDir($$) { | ||||||
|       next if -d $file;    # skip directories |       next if -d $file;    # skip directories | ||||||
|       my $msgFile = catfile($dir, $file); |       my $msgFile = catfile($dir, $file); | ||||||
|       open(my $msgFH, "<", $msgFile); |       open(my $msgFH, "<", $msgFile); | ||||||
|  |       my $upiFull = sprintf("UPI-SFC-%07d", ++$upiCurrentNum); | ||||||
|       print "       $msgFile\n"; |       print "       $msgFile\n"; | ||||||
|       my $header = new Mail::Header($msgFH); |       my $header = new Mail::Header($msgFH); | ||||||
|       my $fields = $header->header_hashref; |       my $fields = $header->header_hashref; | ||||||
|  |       my %parsed = (FromName => '', ToName => '', FromAddr => "", ToAddr => "", CcName => '', CcAddr => '', 'Subject' => '', | ||||||
|  |                     Date => ''); | ||||||
|       foreach my $fieldName (qw/From To Cc Subject Date/) { |       foreach my $fieldName (qw/From To Cc Subject Date/) { | ||||||
|         foreach my $item (@{$fields->{$fieldName}}) { |         foreach my $item (@{$fields->{$fieldName}}) { | ||||||
|           chomp $item; |           chomp $item; | ||||||
|           print "             $fieldName $item\n"; |           if ($fieldName =~ /From|To|Cc/) { | ||||||
|  |             my $addr = Email::Address::XS->parse($item); | ||||||
|  |             if ($addr->name ne "") { | ||||||
|  |               $parsed{"${fieldName}Name"} .= "; " if $parsed{"${fieldName}Name"} !~ /^\s*$/; | ||||||
|  |               $parsed{"${fieldName}Name"} .= $addr->name; | ||||||
|  |             } | ||||||
|  |             if ($addr->address ne "") { | ||||||
|  |               $parsed{"${fieldName}Addr"} .= "; " if $parsed{"${fieldName}Addr"} !~ /^\s*$/; | ||||||
|  |               $parsed{"${fieldName}Addr"} .= $addr->address; | ||||||
|  |             } | ||||||
|  |           } elsif ($fieldName eq 'Date' and $parsed{Date} =~ /^\s*$/) { | ||||||
|  |             $parsed{Date} = ParseDate($item); | ||||||
|  |             die "$item is in the date field but doesn't parse" unless defined $parsed{Date} and $parsed{Date} !~ /^\s*$/; | ||||||
|  |           } else { | ||||||
|  |             $parsed{$fieldName} = $item; | ||||||
|  |           } | ||||||
|         } |         } | ||||||
|       } |       } | ||||||
|       close $msgFH; |       close $msgFH; | ||||||
|  |       $parsed{Subject} =~ s/[\r\n]/ /gm; | ||||||
|  |       my $subjectDashes = $parsed{Subject}; | ||||||
|  |       $subjectDashes =~ s/[^a-zA-Z0-9]/_/g; $subjectDashes =~ s/_+/_/g; $subjectDashes =~ s/_+$//g; $subjectDashes =~ s/^_+//g; | ||||||
|  |       my $fileName = $upiFull . '-' . $GROUP_NAMES_BY_DIR{$GROUP} . '-' . | ||||||
|  |                      UnixDate($parsed{Date}, '%Y%m%d-%H%M-') . $subjectDashes . '.eml'; | ||||||
|  |       print "$fileName\n"; | ||||||
|  |       die "$fileName has no subject" if not defined $parsed{Subject}; | ||||||
|  |       if ($GROUP_NAMES_BY_DIR{$GROUP} eq 'PRIVILEGE') { | ||||||
|  |         push(@CSV_OUTPUT_ROWS, [ $upiFull, $parsed{FromName}, $parsed{FromAddr}, $parsed{Subject}, | ||||||
|  |                                  UnixDate("%D", $parsed{Date}), $parsed{ToName}, $parsed{ToAddr}, | ||||||
|  |                                  $parsed{CcName}, $parsed{CcAddr}, "", $GROUP ]); | ||||||
|  |       } else { | ||||||
|  |         push(@CSV_OUTPUT_ROWS, [ $upiFull, $fileName, uc($rfp) ]); | ||||||
|  |       } | ||||||
|     } |     } | ||||||
|     closedir $mailDirFH; |     closedir $mailDirFH; | ||||||
|   } |   } | ||||||
|  | @ -109,7 +162,7 @@ while (my $rfp = readdir $topDH) { | ||||||
|       die "regular file found where we expected a type in $typeName" unless -d $typeDirName; |       die "regular file found where we expected a type in $typeName" unless -d $typeDirName; | ||||||
|       print "      $typeDirName\n"; |       print "      $typeDirName\n"; | ||||||
|       if ($typeName =~ /email/i) { |       if ($typeName =~ /email/i) { | ||||||
|         ProcessMailDir($typeDirName, ""); |         ProcessMailDir($rfp, $typeDirName, ""); | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     closedir $bucketDH; |     closedir $bucketDH; | ||||||
|  | @ -118,6 +171,15 @@ while (my $rfp = readdir $topDH) { | ||||||
| } | } | ||||||
| closedir $topDH; | closedir $topDH; | ||||||
| 
 | 
 | ||||||
|  | open my $csvFH, ">:encoding(utf8)", $csvLogFile; | ||||||
|  | 
 | ||||||
|  | $csvOutFormat->say($csvFH, \@headerFields); | ||||||
|  | $csvOutFormat->say($csvFH, $_) for @CSV_OUTPUT_ROWS; close $csvFH; | ||||||
|  | 
 | ||||||
|  | open($upiFH, ">", $upiNumberFile); | ||||||
|  | print $upiFH ++$upiCurrentNum, "\n"; | ||||||
|  | close $upiFH; | ||||||
|  | 
 | ||||||
| #make_path(, { | #make_path(, { | ||||||
| #    verbose => 1, | #    verbose => 1, | ||||||
| #    mode => 0755, | #    mode => 0755, | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Bradley M. Kuhn
						Bradley M. Kuhn