Seems this would not be a deterministic thing, or is there a way to do this reliably?
A python solution for zip files:
import difflib
import zipfile
def diff(filename1, filename2):
differs = False
z1 = zipfile.ZipFile(open(filename1))
z2 = zipfile.ZipFile(open(filename2))
if len(z1.infolist()) != len(z2.infolist()):
print "number of archive elements differ: {} in {} vs {} in {}".format(
len(z1.infolist()), z1.filename, len(z2.infolist()), z2.filename)
return 1
for zipentry in z1.infolist():
if zipentry.filename not in z2.namelist():
print "no file named {} found in {}".format(zipentry.filename,
z2.filename)
differs = True
else:
diff = difflib.ndiff(z1.open(zipentry.filename),
z2.open(zipentry.filename))
delta = ''.join(x[2:] for x in diff
if x.startswith('- ') or x.startswith('+ '))
if delta:
differs = True
print "content for {} differs:\n{}".format(
zipentry.filename, delta)
if not differs:
print "all files are the same"
return 0
return 1
Use as
diff(filename1, filename2)
It compares files line-by-line in memory and shows changes.