From 5d139d7e171494a026c1fef798c37a00504c95aa Mon Sep 17 00:00:00 2001 From: jvoisin Date: Tue, 18 Aug 2015 19:33:42 +0200 Subject: Improves the way MAT deals with ZipTimestamps It seems that using os.utime is TZ-dependent, so lets use a ZipInfo thing instead. --- libmat/archive.py | 20 +++++++++++++------- "test/clean \303\251.docx" | Bin 6520 -> 2300 bytes "test/clean \303\251.odt" | Bin 33140 -> 10907 bytes "test/clean \303\251.tar" | Bin 10240 -> 10240 bytes "test/clean \303\251.tar.bz2" | Bin 5976 -> 10240 bytes "test/clean \303\251.tar.gz" | Bin 5656 -> 10240 bytes "test/clean \303\251.zip" | Bin 5885 -> 5747 bytes 7 files changed, 13 insertions(+), 7 deletions(-) diff --git a/libmat/archive.py b/libmat/archive.py index 470b0cc..7a71717 100644 --- a/libmat/archive.py +++ b/libmat/archive.py @@ -14,9 +14,7 @@ import mat import parser # Zip files do not support dates older than 01/01/1980 -ZIP_EPOCH = (1980, 1, 1, 0, 0, 0) -ZIP_EPOCH_SECONDS = (datetime.datetime(1980, 1, 1, 0, 0, 0) - - datetime.datetime(1970, 1, 1, 0, 0, 0)).total_seconds() +ZIP_EPOCH = (1980, 1, 1, 1, 0, 0) class GenericArchiveStripper(parser.GenericParser): @@ -185,8 +183,14 @@ class ZipStripper(GenericArchiveStripper): basename, ext = os.path.splitext(path) if not (self.add2archive or ext in parser.NOMETA): continue - os.utime(path, (ZIP_EPOCH_SECONDS, ZIP_EPOCH_SECONDS)) - zipout.write(path, item.filename) + zinfo = zipfile.ZipInfo(item.filename, date_time=ZIP_EPOCH) + zinfo.compress_type = zipfile.ZIP_DEFLATED + zinfo.create_system = 3 # Linux + zinfo.comment = '' + with open(path, 'r') as f: + zipout.writestr(zinfo, f.read()) + # os.utime(path, (ZIP_EPOCH_SECONDS, ZIP_EPOCH_SECONDS)) + # zipout.write(path, item.filename) zipin.close() zipout.close() @@ -199,7 +203,8 @@ class TarStripper(GenericArchiveStripper): """ Represent a tarfile archive """ - def _remove(self, current_file): + @staticmethod + def _remove_tar_added(current_file): """ Remove the meta added by tarfile itself to the file """ current_file.mtime = 0 @@ -239,7 +244,7 @@ class TarStripper(GenericArchiveStripper): continue tarout.add(unicode(path.decode('utf-8')), unicode(item.name.decode('utf-8')), - filter=self._remove) + filter=self._remove_tar_added) tarin.close() tarout.close() self.do_backup() @@ -333,6 +338,7 @@ class TerminalZipStripper(ZipStripper): It is used for formats like docx, which are basically ziped xml. """ + pass class GzipStripper(TarStripper): diff --git "a/test/clean \303\251.docx" "b/test/clean \303\251.docx" index 738eb6c..727084e 100644 Binary files "a/test/clean \303\251.docx" and "b/test/clean \303\251.docx" differ diff --git "a/test/clean \303\251.odt" "b/test/clean \303\251.odt" index a06d816..94032e8 100644 Binary files "a/test/clean \303\251.odt" and "b/test/clean \303\251.odt" differ diff --git "a/test/clean \303\251.tar" "b/test/clean \303\251.tar" index b841792..618c146 100644 Binary files "a/test/clean \303\251.tar" and "b/test/clean \303\251.tar" differ diff --git "a/test/clean \303\251.tar.bz2" "b/test/clean \303\251.tar.bz2" index ad4e6cb..618c146 100644 Binary files "a/test/clean \303\251.tar.bz2" and "b/test/clean \303\251.tar.bz2" differ diff --git "a/test/clean \303\251.tar.gz" "b/test/clean \303\251.tar.gz" index 1ab4407..618c146 100644 Binary files "a/test/clean \303\251.tar.gz" and "b/test/clean \303\251.tar.gz" differ diff --git "a/test/clean \303\251.zip" "b/test/clean \303\251.zip" index b2805c4..c28f424 100644 Binary files "a/test/clean \303\251.zip" and "b/test/clean \303\251.zip" differ -- cgit v1.3