gpl-compliance-tools/hierarchy-comparison

250 lines
8 KiB
Perl
Executable file

#!/usr/bin/perl -w
# hierarchy-comparison -*- Perl -*-
# Possible bug: only -type f and -type d are checked
# Copyright (C) 2001, 2002, 2003, 2004, 2008, 2011, 2012, 2014 Bradley M. Kuhn <bkuhn@ebb.org>
# Copyright (C) 2011 Denver Gingerich <denver@ossguy.com>
#
# This software's license gives you freedom; you can copy, convey,
# propogate, redistribute and/or modify this program under the terms of
# the GNU General Public License (GPL) as published by the Free
# Software Foundation (FSF), either version 3 of the License, or (at your
# option) any later version of the GPL published by the FSF.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program in a file in the toplevel directory called
# "GPLv3". If not, see <http://www.gnu.org/licenses/>.
#
use strict;
use File::Temp qw/tempfile/;
use Cwd;
my $VERSION = '1.1';
my $DO_DIFF = 1;
######################################################################
sub FindAndSortOutput ($$$$$) {
my($type, $dir, $outputFH, $ignoreRegex, $filterRewrite) = @_;
use File::Find;
my @files;
my $buildList = sub {
my $val = $_;
chomp $val;
$val =~ s/$filterRewrite// if defined $filterRewrite;
if ($type eq "NON-REGULAR") {
push(@files, $val) unless -f $_;
} elsif ($type eq "FILES") {
push(@files, $val) if -f $_;
} elsif ($type eq "DIRECTORY") {
push(@files, $val) if -d $_;
} else {
die "Unknown type requested: $type";
}
};
find({ wanted => $buildList, no_chdir => 1}, $dir);
my @sortedChompedFiles;
foreach my $file (sort {$a cmp $b } @files) {
chomp $file;
next if defined $ignoreRegex and $file =~ /$ignoreRegex/;
push(@sortedChompedFiles, $file);
$outputFH->print("$file\n");
}
$? = 0;
$outputFH->close();
die "unable to write to output file: $outputFH: $! ($?)"
if $? != 0 and defined $outputFH;
return @sortedChompedFiles;
}
######################################################################
sub MD5SumFiles {
my($dir, $fileListRef, $outputFile) = @_;
my $curdir = getcwd();
chdir $dir or die "unable to change to $dir: $0";
# open(FILE_OUTPUT, "|/usr/bin/xargs /usr/bin/md5sum >$outputFile 2>&1")
open(FILE_OUTPUT, "|/usr/bin/xargs -0 /usr/bin/sha1sum >$outputFile")
or die "unable to write to $outputFile: $!";
foreach my $file (@{$fileListRef}) {
print FILE_OUTPUT "$file\000";
}
close FILE_OUTPUT;
die "$0: error running xargs to md5sum command; $!" unless ($? == 0);
open(FILE_INPUT, "<", $outputFile) or die "unable to read back in $outputFile: $!";
my %files2sha1;
while (my $line = <FILE_INPUT>) {
chomp $line;
die "$outputFile has a strange line, \"$line\""
unless $line =~ /^(\S+)\s+(.+)$/;
$files2sha1{$2} = $1;
}
close FILE_INPUT;
chdir $curdir or die "$0: cannot go back into $curdir: $!";
return \%files2sha1;
}
######################################################################
if (@ARGV != 3 and @ARGV != 4) {
print "usage: $0 <ORIG_DIRECTORY> ",
"<COMPARED_DIRECTORY> <DIFF_OUTPUT_FILE> [<IGNORED_FILES_REGEX>]\n";
exit 2;
}
my($origDir, $comparedDir, $diffOutputFile, $ignoreRegex) = @ARGV;
$origDir =~ s%/\s*$%%;
$comparedDir =~ s%/\s*$%%;
my $origTempFH = File::Temp->new(UNLINK => 0, SUFFIX => '.orig');
my $origTempFile = $origTempFH->filename;
my $comparedTempFH = File::Temp->new(UNLINK => 0, SUFFIX => '.compared');
my $comparedTempFile = $comparedTempFH->filename;
# First, look for directory differences
print "Comparing $origDir to $comparedDir";
if (defined $ignoreRegex and $ignoreRegex !~ /^\s*$/) {
print ", excluding excluding all files matching this RE: $ignoreRegex\n";
} else {
print "\n";
}
print "Doing directory comparison: ";
my(@orgNonRegular) = FindAndSortOutput("NON-REGULAR", $origDir, $origTempFH,
$ignoreRegex, "^/?($origDir|$comparedDir)/?");
my(@comparedNonRegular) = FindAndSortOutput("NON-REGULAR", $comparedDir,
$comparedTempFH, $ignoreRegex,
"^/?($origDir|$comparedDir)/?");
# TODO: use the right Perl mechanism instead of /bin/echo (ossguy)
system("/bin/echo Directory comparison: >> $diffOutputFile 2>&1");
system(
"/usr/bin/diff -u $origTempFile $comparedTempFile >> $diffOutputFile 2>&1");
my $diffExitCode = $?;
unlink($origTempFile, $comparedTempFile);
if ($diffExitCode == 2) {
print "\n";
die "$0: error trying to diff files: $!";
} elsif ($diffExitCode == 0) {
print "directory structures match.\n";
} else {
print "differences found in directory structures.\n",
"See $diffOutputFile for more information.\n";
}
# Now, see if the files are all the same
print "Doing file hierarchy comparison: ";
$origTempFH = File::Temp->new(UNLINK => 0, SUFFIX => '.orig');
$origTempFile = $origTempFH->filename;
$comparedTempFH = File::Temp->new(UNLINK => 0, SUFFIX => '.compared');
$comparedTempFile = $comparedTempFH->filename;
my(@orgFiles) = FindAndSortOutput("FILES", $origDir, $origTempFH,
$ignoreRegex, "^/?($origDir|$comparedDir)/?");
my(@comparedFiles) = FindAndSortOutput("FILES", $comparedDir,
$comparedTempFH, $ignoreRegex, "^/?($origDir|$comparedDir)/?");
# TODO: use the right Perl mechanism instead of /bin/echo (ossguy)
system("/bin/echo >> $diffOutputFile 2>&1");
system("/bin/echo File hierarchy comparison: >> $diffOutputFile 2>&1");
system(
"/usr/bin/diff -u $origTempFile $comparedTempFile >> $diffOutputFile 2>&1");
$diffExitCode = $?;
if ($diffExitCode == 2) {
print "\n";
die "$0: error trying to diff files: $!";
} elsif ($diffExitCode == 0) {
print "both contain the same list of files.\n";
} else {
print "differences found in file hierarchies.\n",
"See $diffOutputFile for more information.\n";
}
# Now, diff the md5sums of the files.
print "Doing file contents comparisons: ";
# Assume that the two lists were the same, and come down to the list of
# shared files.
my(%origH, %comparedH);
@origH{@orgFiles} = @orgFiles;
@comparedH{@comparedFiles} = @comparedFiles;
my %final;
foreach my $ii (@orgFiles, @comparedFiles) {
$final{$ii} = $ii
if defined $origH{$ii} and defined $comparedH{$ii};
}
my(@o, @c);
@o = sort(keys %final);
@c = sort(keys %final);
my $origFiles2sha1 = MD5SumFiles($origDir, \@o, $origTempFile);
my $comparedFiles2sha1 = MD5SumFiles($comparedDir, \@c, $comparedTempFile);
# TODO: use the right Perl mechanism instead of /bin/echo (ossguy)
system("/bin/echo >> $diffOutputFile 2>&1");
system("/bin/echo 'File contents comparisons (MD5):' >> $diffOutputFile 2>&1");
# for C&CS checking, the list of files that are the same (esp. binary) is useful
# so use infinite number of lines of context to show these files
# TODO: replace -U[big_number] with proper infinite context flag (ossguy)
system(
"/usr/bin/diff -U2000000000 $origTempFile $comparedTempFile >> $diffOutputFile 2>&1");
$diffExitCode = $?;
unlink($origTempFile, $comparedTempFile);
# TODO: use the right Perl mechanism instead of /bin/echo (ossguy)
system("/bin/echo >> $diffOutputFile 2>&1");
system("/bin/echo 'File contents comparisons (diff):' >> $diffOutputFile 2>&1");
if ($DO_DIFF) {
foreach my $file (sort(keys %final)) {
if ($origFiles2sha1->{$file} ne $comparedFiles2sha1->{$file}) {
system(
"/usr/bin/diff -wu \"$origDir/$file\" \"$comparedDir/$file\" >> $diffOutputFile 2>&1");
}
}
}
if ($diffExitCode == 2) {
print "\n";
die "$0: error trying to diff files: $!";
} elsif ($diffExitCode == 0) {
print "all files match.\n";
} else {
print "differences found in some files.\n",
"See $diffOutputFile for more information.\n";
exit 1;
}
exit 0;
#
# Local variables:
# compile-command: "perl -c hierarchy-comparison"
# End: