2019-05-08 17:04:09 -03:00
|
|
|
#! /usr/bin/env python3
|
2016-02-17 11:27:35 -08:00
|
|
|
|
|
|
|
import sys
|
|
|
|
from zipfile import ZipFile
|
|
|
|
|
|
|
|
class ApkDiff:
|
2019-05-11 10:46:19 -07:00
|
|
|
# resources.arsc is ignored due to https://issuetracker.google.com/issues/110237303
|
|
|
|
# May be fixed in Android Gradle Plugin 3.4
|
|
|
|
IGNORE_FILES = ["META-INF/MANIFEST.MF", "META-INF/SIGNAL_S.RSA", "META-INF/SIGNAL_S.SF", "resources.arsc"]
|
2016-02-17 11:27:35 -08:00
|
|
|
|
|
|
|
def compare(self, sourceApk, destinationApk):
|
|
|
|
sourceZip = ZipFile(sourceApk, 'r')
|
|
|
|
destinationZip = ZipFile(destinationApk, 'r')
|
|
|
|
|
|
|
|
if self.compareManifests(sourceZip, destinationZip) and self.compareEntries(sourceZip, destinationZip) == True:
|
2019-05-08 17:04:09 -03:00
|
|
|
print("APKs match!")
|
2016-02-17 11:27:35 -08:00
|
|
|
else:
|
2019-05-08 17:04:09 -03:00
|
|
|
print("APKs don't match!")
|
2016-02-17 11:27:35 -08:00
|
|
|
|
|
|
|
def compareManifests(self, sourceZip, destinationZip):
|
|
|
|
sourceEntrySortedList = sorted(sourceZip.namelist())
|
|
|
|
destinationEntrySortedList = sorted(destinationZip.namelist())
|
|
|
|
|
|
|
|
for ignoreFile in self.IGNORE_FILES:
|
|
|
|
while ignoreFile in sourceEntrySortedList: sourceEntrySortedList.remove(ignoreFile)
|
|
|
|
while ignoreFile in destinationEntrySortedList: destinationEntrySortedList.remove(ignoreFile)
|
2019-05-08 17:04:09 -03:00
|
|
|
|
2016-02-17 11:27:35 -08:00
|
|
|
if len(sourceEntrySortedList) != len(destinationEntrySortedList):
|
2019-05-08 17:04:09 -03:00
|
|
|
print("Manifest lengths differ!")
|
|
|
|
|
2016-02-17 11:27:35 -08:00
|
|
|
for (sourceEntryName, destinationEntryName) in zip(sourceEntrySortedList, destinationEntrySortedList):
|
|
|
|
if sourceEntryName != destinationEntryName:
|
2019-05-08 17:04:09 -03:00
|
|
|
print("Sorted manifests don't match, %s vs %s" % (sourceEntryName, destinationEntryName))
|
2016-02-17 11:27:35 -08:00
|
|
|
return False
|
|
|
|
|
|
|
|
return True
|
2019-05-08 17:04:09 -03:00
|
|
|
|
2016-02-17 11:27:35 -08:00
|
|
|
def compareEntries(self, sourceZip, destinationZip):
|
Reproducible build: Ensure apkdiff.py works properly again
The recent switch to Python3 (2ccdf0e396e82dd) introduced a regression
that led to file content no longer being compared:
In compareEntries(), two generators/iterators are created:
sourceInfoList = filter(lambda sourceInfo: …, sourceZip.infolist())
destinationInfoList = filter(lambda destinationInfo: …, destinationZip.infolist())
Few lines later, those are exhausted:
if len(sourceInfoList) != len(destinationInfoList):
Yet another few lines later, the exhausted generator is used again:
for sourceEntryInfo in sourceInfoList:
… # <-- unreachable
This is caused by behavioral differences between Python2 and Python3:
user@z_signal:~$ python2
Python 2.7.13 (default, Sep 26 2018, 18:42:22)
[GCC 6.3.0 20170516] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> f = filter(lambda i: i % 2 == 0, [0, 1, 2, 3, 4, 5, 6])
>>> list(f)
[0, 2, 4, 6]
>>> list(f)
[0, 2, 4, 6]
>>>
user@z_signal:~$ python3
Python 3.5.3 (default, Sep 27 2018, 17:25:39)
[GCC 6.3.0 20170516] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> f = filter(lambda i: i % 2 == 0, [0, 1, 2, 3, 4, 5, 6])
>>> list(f)
[0, 2, 4, 6]
>>> list(f)
[]
>>>
2019-05-11 12:00:58 +02:00
|
|
|
sourceInfoList = list(filter(lambda sourceInfo: sourceInfo.filename not in self.IGNORE_FILES, sourceZip.infolist()))
|
|
|
|
destinationInfoList = list(filter(lambda destinationInfo: destinationInfo.filename not in self.IGNORE_FILES, destinationZip.infolist()))
|
2019-05-08 17:04:09 -03:00
|
|
|
|
Reproducible build: Ensure apkdiff.py works properly again
The recent switch to Python3 (2ccdf0e396e82dd) introduced a regression
that led to file content no longer being compared:
In compareEntries(), two generators/iterators are created:
sourceInfoList = filter(lambda sourceInfo: …, sourceZip.infolist())
destinationInfoList = filter(lambda destinationInfo: …, destinationZip.infolist())
Few lines later, those are exhausted:
if len(sourceInfoList) != len(destinationInfoList):
Yet another few lines later, the exhausted generator is used again:
for sourceEntryInfo in sourceInfoList:
… # <-- unreachable
This is caused by behavioral differences between Python2 and Python3:
user@z_signal:~$ python2
Python 2.7.13 (default, Sep 26 2018, 18:42:22)
[GCC 6.3.0 20170516] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> f = filter(lambda i: i % 2 == 0, [0, 1, 2, 3, 4, 5, 6])
>>> list(f)
[0, 2, 4, 6]
>>> list(f)
[0, 2, 4, 6]
>>>
user@z_signal:~$ python3
Python 3.5.3 (default, Sep 27 2018, 17:25:39)
[GCC 6.3.0 20170516] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> f = filter(lambda i: i % 2 == 0, [0, 1, 2, 3, 4, 5, 6])
>>> list(f)
[0, 2, 4, 6]
>>> list(f)
[]
>>>
2019-05-11 12:00:58 +02:00
|
|
|
if len(sourceInfoList) != len(destinationInfoList):
|
2019-05-08 17:04:09 -03:00
|
|
|
print("APK info lists of different length!")
|
2016-02-17 11:27:35 -08:00
|
|
|
return False
|
|
|
|
|
|
|
|
for sourceEntryInfo in sourceInfoList:
|
|
|
|
for destinationEntryInfo in list(destinationInfoList):
|
|
|
|
if sourceEntryInfo.filename == destinationEntryInfo.filename:
|
|
|
|
sourceEntry = sourceZip.open(sourceEntryInfo, 'r')
|
|
|
|
destinationEntry = destinationZip.open(destinationEntryInfo, 'r')
|
|
|
|
|
|
|
|
if self.compareFiles(sourceEntry, destinationEntry) != True:
|
2019-05-08 17:04:09 -03:00
|
|
|
print("APK entry %s does not match %s!" % (sourceEntryInfo.filename, destinationEntryInfo.filename))
|
2016-02-17 11:27:35 -08:00
|
|
|
return False
|
|
|
|
|
|
|
|
destinationInfoList.remove(destinationEntryInfo)
|
|
|
|
break
|
2019-05-08 17:04:09 -03:00
|
|
|
|
2016-02-17 11:27:35 -08:00
|
|
|
return True
|
|
|
|
|
|
|
|
def compareFiles(self, sourceFile, destinationFile):
|
|
|
|
sourceChunk = sourceFile.read(1024)
|
|
|
|
destinationChunk = destinationFile.read(1024)
|
|
|
|
|
Reproducible build: Ensure apkdiff.py works properly again
The recent switch to Python3 (2ccdf0e396e82dd) introduced a regression
that led to file content no longer being compared:
In compareEntries(), two generators/iterators are created:
sourceInfoList = filter(lambda sourceInfo: …, sourceZip.infolist())
destinationInfoList = filter(lambda destinationInfo: …, destinationZip.infolist())
Few lines later, those are exhausted:
if len(sourceInfoList) != len(destinationInfoList):
Yet another few lines later, the exhausted generator is used again:
for sourceEntryInfo in sourceInfoList:
… # <-- unreachable
This is caused by behavioral differences between Python2 and Python3:
user@z_signal:~$ python2
Python 2.7.13 (default, Sep 26 2018, 18:42:22)
[GCC 6.3.0 20170516] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> f = filter(lambda i: i % 2 == 0, [0, 1, 2, 3, 4, 5, 6])
>>> list(f)
[0, 2, 4, 6]
>>> list(f)
[0, 2, 4, 6]
>>>
user@z_signal:~$ python3
Python 3.5.3 (default, Sep 27 2018, 17:25:39)
[GCC 6.3.0 20170516] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> f = filter(lambda i: i % 2 == 0, [0, 1, 2, 3, 4, 5, 6])
>>> list(f)
[0, 2, 4, 6]
>>> list(f)
[]
>>>
2019-05-11 12:00:58 +02:00
|
|
|
while sourceChunk != b"" or destinationChunk != b"":
|
2016-02-17 11:27:35 -08:00
|
|
|
if sourceChunk != destinationChunk:
|
|
|
|
return False
|
|
|
|
|
|
|
|
sourceChunk = sourceFile.read(1024)
|
|
|
|
destinationChunk = destinationFile.read(1024)
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
if len(sys.argv) != 3:
|
2019-05-08 17:04:09 -03:00
|
|
|
print("Usage: apkdiff <pathToFirstApk> <pathToSecondApk>")
|
2016-02-17 11:27:35 -08:00
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
ApkDiff().compare(sys.argv[1], sys.argv[2])
|