gpl-compliance-tools/hierarchy-comparison
Denver Gingerich 1208e8b7da h-c: sort the list of files used for file compare
Sorting the list of files used for file comparisons (MD5 sums and
actually diffs) makes the output much easier to deal with, especially
for C&CS checkers.  It groups all the files in a given directory
together so that one can more easily deal with all these files at
once.  This is useful for comparing differences in particular
components, especially ones like BusyBox whose files exist mostly in a
single directory and have the same checksums (since they all point to
one binary).
2011-05-20 11:24:13 -04:00

238 lines
7.4 KiB
Perl
Executable file

#!/usr/bin/perl -w
# hierarchy-comparison -*- Perl -*-
# Possible bug: only -type f and -type d are checked
# Copyright (C) 2001, 2002, 2003, 2004, 2008 Bradley M. Kuhn <bkuhn@ebb.org>
# Copyright (C) 2011 Denver Gingerich <denver@ossguy.com>
#
# This software's license gives you freedom; you can copy, convey,
# propogate, redistribute and/or modify this program under the terms of
# the GNU General Public License (GPL) as published by the Free
# Software Foundation (FSF), either version 3 of the License, or (at your
# option) any later version of the GPL published by the FSF.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program in a file in the toplevel directory called
# "GPLv3". If not, see <http://www.gnu.org/licenses/>.
#
use strict;
use POSIX qw(tmpnam);
use Cwd;
my $VERSION = '1.1';
my $DO_DIFF = 1;
######################################################################
sub FindAndSortOutput {
use File::Find;
my($type, $dir, $output, $ignoreRegex, $filterRewrite) = @_;
my @files;
my $buildList = sub {
my $val = $_;
chomp $val;
$val =~ s/$filterRewrite// if defined $filterRewrite;
if ($type eq "NON-REGULAR") {
push(@files, $val) unless -f $_;
} elsif ($type eq "FILES") {
push(@files, $val) if -f $_;
} elsif ($type eq "DIRECTORY") {
push(@files, $val) if -d $_;
} else {
die "Unknown type requested: $type";
}
};
find({ wanted => $buildList, no_chdir => 1}, $dir);
open(FILE_OUTPUT, ">$output") or
die "$0: unable to open temporary output file, $output: $!";
my @sortedChompedFiles;
foreach my $file (sort {$a cmp $b } @files) {
chomp $file;
next if defined $ignoreRegex and $file =~ /$ignoreRegex/;
push(@sortedChompedFiles, $file);
print FILE_OUTPUT "$file\n";
}
close FILE_OUTPUT;
return @sortedChompedFiles;
}
######################################################################
sub MD5SumFiles {
my($dir, $fileListRef, $outputFile) = @_;
my $curdir = getcwd();
chdir $dir or die "unable to change to $dir: $0";
# open(FILE_OUTPUT, "|/usr/bin/xargs /usr/bin/md5sum >$outputFile 2>&1")
open(FILE_OUTPUT, "|/usr/bin/xargs -0 /usr/bin/sha1sum >$outputFile")
or die "unable to write to $outputFile: $!";
foreach my $file (@{$fileListRef}) {
print FILE_OUTPUT "$file\000";
}
close FILE_OUTPUT;
die "$0: error running xargs to md5sum command; $!" unless ($? == 0);
open(FILE_INPUT, "<", $outputFile) or die "unable to read back in $outputFile: $!";
my %files2sha1;
while (my $line = <FILE_INPUT>) {
chomp $line;
die "$outputFile has a strange line, \"$line\""
unless $line =~ /^(\S+)\s+(.+)$/;
$files2sha1{$2} = $1;
}
close FILE_INPUT;
chdir $curdir or die "$0: cannot go back into $curdir: $!";
return \%files2sha1;
}
######################################################################
if (@ARGV != 3 and @ARGV != 4) {
print "usage: $0 <ORIG_DIRECTORY> ",
"<COMPARED_DIRECTORY> <DIFF_OUTPUT_FILE> [<IGNORED_FILES_REGEX>]\n";
exit 2;
}
my($origDir, $comparedDir, $diffOutputFile, $ignoreRegex) = @ARGV;
$origDir =~ s%/\s*$%%;
$comparedDir =~ s%/\s*$%%;
my $origTempFile = POSIX::tmpnam();
my $comparedTempFile = POSIX::tmpnam();
# First, look for directory differences
print "Doing directory comparison: ";
my(@orgNonRegular) = FindAndSortOutput("NON-REGULAR", $origDir, $origTempFile,
$ignoreRegex, "^/?($origDir|$comparedDir)/?");
my(@comparedNonRegular) = FindAndSortOutput("NON-REGULAR", $comparedDir,
$comparedTempFile, $ignoreRegex,
"^/?($origDir|$comparedDir)/?");
# TODO: use the right Perl mechanism instead of /bin/echo (ossguy)
system("/bin/echo Directory comparison: >> $diffOutputFile 2>&1");
system(
"/usr/bin/diff -u $origTempFile $comparedTempFile >> $diffOutputFile 2>&1");
my $diffExitCode = $?;
unlink($origTempFile, $comparedTempFile);
if ($diffExitCode == 2) {
print "\n";
die "$0: error trying to diff files: $!";
} elsif ($diffExitCode == 0) {
print "directory structures match.\n";
} else {
print "differences found in directory structures.\n",
"See $diffOutputFile for more information.\n";
}
# Now, see if the files are all the same
print "Doing file hierarchy comparison: ";
my(@orgFiles) = FindAndSortOutput("FILES", $origDir, $origTempFile,
$ignoreRegex, "^/?($origDir|$comparedDir)/?");
my(@comparedFiles) = FindAndSortOutput("FILES", $comparedDir,
$comparedTempFile, $ignoreRegex, "^/?($origDir|$comparedDir)/?");
# TODO: use the right Perl mechanism instead of /bin/echo (ossguy)
system("/bin/echo >> $diffOutputFile 2>&1");
system("/bin/echo File hierarchy comparison: >> $diffOutputFile 2>&1");
system(
"/usr/bin/diff -u $origTempFile $comparedTempFile >> $diffOutputFile 2>&1");
$diffExitCode = $?;
if ($diffExitCode == 2) {
print "\n";
die "$0: error trying to diff files: $!";
} elsif ($diffExitCode == 0) {
print "both contain the same list of files.\n";
} else {
print "differences found in file hierarchies.\n",
"See $diffOutputFile for more information.\n";
}
# Now, diff the md5sums of the files.
print "Doing file contents comparisons: ";
# Assume that the two lists were the same, and come down to the list of
# shared files.
my(%origH, %comparedH);
@origH{@orgFiles} = @orgFiles;
@comparedH{@comparedFiles} = @comparedFiles;
my %final;
foreach my $ii (@orgFiles, @comparedFiles) {
$final{$ii} = $ii
if defined $origH{$ii} and defined $comparedH{$ii};
}
my(@o, @c);
@o = sort(keys %final);
@c = sort(keys %final);
my $origFiles2sha1 = MD5SumFiles($origDir, \@o, $origTempFile);
my $comparedFiles2sha1 = MD5SumFiles($comparedDir, \@c, $comparedTempFile);
# TODO: use the right Perl mechanism instead of /bin/echo (ossguy)
system("/bin/echo >> $diffOutputFile 2>&1");
system("/bin/echo 'File contents comparisons (MD5):' >> $diffOutputFile 2>&1");
# for C&CS checking, the list of files that are the same (esp. binary) is useful
# so use infinite number of lines of context to show these files
# TODO: replace -U[big_number] with proper infinite context flag (ossguy)
system(
"/usr/bin/diff -U2000000000 $origTempFile $comparedTempFile >> $diffOutputFile 2>&1");
$diffExitCode = $?;
unlink($origTempFile, $comparedTempFile);
# TODO: use the right Perl mechanism instead of /bin/echo (ossguy)
system("/bin/echo >> $diffOutputFile 2>&1");
system("/bin/echo 'File contents comparisons (diff):' >> $diffOutputFile 2>&1");
if ($DO_DIFF) {
foreach my $file (sort(keys %final)) {
if ($origFiles2sha1->{$file} ne $comparedFiles2sha1->{$file}) {
system(
"/usr/bin/diff -u \"$origDir/$file\" \"$comparedDir/$file\" >> $diffOutputFile 2>&1");
}
}
}
if ($diffExitCode == 2) {
print "\n";
die "$0: error trying to diff files: $!";
} elsif ($diffExitCode == 0) {
print "all files match.\n";
} else {
print "differences found in some files.\n",
"See $diffOutputFile for more information.\n";
exit 1;
}
exit 0;
#
# Local variables:
# compile-command: "perl -c hierarchy-comparison"
# End: