summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjvoisin2015-08-18 19:33:42 +0200
committerjvoisin2015-08-18 19:33:42 +0200
commit5d139d7e171494a026c1fef798c37a00504c95aa (patch)
tree2da153072305a7fc1670a350ec1fff6e2a1eab8d
parentfcd5bdfdeb10175d0bbef9be2b1ac631dec703a5 (diff)
Improves the way MAT deals with ZipTimestamps
It seems that using os.utime is TZ-dependent, so lets use a ZipInfo thing instead.
-rw-r--r--libmat/archive.py20
-rw-r--r--test/clean é.docxbin6520 -> 2300 bytes
-rw-r--r--test/clean é.odtbin33140 -> 10907 bytes
-rw-r--r--test/clean é.tarbin10240 -> 10240 bytes
-rw-r--r--test/clean é.tar.bz2bin5976 -> 10240 bytes
-rw-r--r--test/clean é.tar.gzbin5656 -> 10240 bytes
-rw-r--r--test/clean é.zipbin5885 -> 5747 bytes
7 files changed, 13 insertions, 7 deletions
diff --git a/libmat/archive.py b/libmat/archive.py
index 470b0cc..7a71717 100644
--- a/libmat/archive.py
+++ b/libmat/archive.py
@@ -14,9 +14,7 @@ import mat
14import parser 14import parser
15 15
16# Zip files do not support dates older than 01/01/1980 16# Zip files do not support dates older than 01/01/1980
17ZIP_EPOCH = (1980, 1, 1, 0, 0, 0) 17ZIP_EPOCH = (1980, 1, 1, 1, 0, 0)
18ZIP_EPOCH_SECONDS = (datetime.datetime(1980, 1, 1, 0, 0, 0)
19 - datetime.datetime(1970, 1, 1, 0, 0, 0)).total_seconds()
20 18
21 19
22class GenericArchiveStripper(parser.GenericParser): 20class GenericArchiveStripper(parser.GenericParser):
@@ -185,8 +183,14 @@ class ZipStripper(GenericArchiveStripper):
185 basename, ext = os.path.splitext(path) 183 basename, ext = os.path.splitext(path)
186 if not (self.add2archive or ext in parser.NOMETA): 184 if not (self.add2archive or ext in parser.NOMETA):
187 continue 185 continue
188 os.utime(path, (ZIP_EPOCH_SECONDS, ZIP_EPOCH_SECONDS)) 186 zinfo = zipfile.ZipInfo(item.filename, date_time=ZIP_EPOCH)
189 zipout.write(path, item.filename) 187 zinfo.compress_type = zipfile.ZIP_DEFLATED
188 zinfo.create_system = 3 # Linux
189 zinfo.comment = ''
190 with open(path, 'r') as f:
191 zipout.writestr(zinfo, f.read())
192 # os.utime(path, (ZIP_EPOCH_SECONDS, ZIP_EPOCH_SECONDS))
193 # zipout.write(path, item.filename)
190 zipin.close() 194 zipin.close()
191 zipout.close() 195 zipout.close()
192 196
@@ -199,7 +203,8 @@ class TarStripper(GenericArchiveStripper):
199 """ Represent a tarfile archive 203 """ Represent a tarfile archive
200 """ 204 """
201 205
202 def _remove(self, current_file): 206 @staticmethod
207 def _remove_tar_added(current_file):
203 """ Remove the meta added by tarfile itself to the file 208 """ Remove the meta added by tarfile itself to the file
204 """ 209 """
205 current_file.mtime = 0 210 current_file.mtime = 0
@@ -239,7 +244,7 @@ class TarStripper(GenericArchiveStripper):
239 continue 244 continue
240 tarout.add(unicode(path.decode('utf-8')), 245 tarout.add(unicode(path.decode('utf-8')),
241 unicode(item.name.decode('utf-8')), 246 unicode(item.name.decode('utf-8')),
242 filter=self._remove) 247 filter=self._remove_tar_added)
243 tarin.close() 248 tarin.close()
244 tarout.close() 249 tarout.close()
245 self.do_backup() 250 self.do_backup()
@@ -333,6 +338,7 @@ class TerminalZipStripper(ZipStripper):
333 It is used for formats like docx, which are basically 338 It is used for formats like docx, which are basically
334 ziped xml. 339 ziped xml.
335 """ 340 """
341 pass
336 342
337 343
338class GzipStripper(TarStripper): 344class GzipStripper(TarStripper):
diff --git a/test/clean é.docx b/test/clean é.docx
index 738eb6c..727084e 100644
--- a/test/clean é.docx
+++ b/test/clean é.docx
Binary files differ
diff --git a/test/clean é.odt b/test/clean é.odt
index a06d816..94032e8 100644
--- a/test/clean é.odt
+++ b/test/clean é.odt
Binary files differ
diff --git a/test/clean é.tar b/test/clean é.tar
index b841792..618c146 100644
--- a/test/clean é.tar
+++ b/test/clean é.tar
Binary files differ
diff --git a/test/clean é.tar.bz2 b/test/clean é.tar.bz2
index ad4e6cb..618c146 100644
--- a/test/clean é.tar.bz2
+++ b/test/clean é.tar.bz2
Binary files differ
diff --git a/test/clean é.tar.gz b/test/clean é.tar.gz
index 1ab4407..618c146 100644
--- a/test/clean é.tar.gz
+++ b/test/clean é.tar.gz
Binary files differ
diff --git a/test/clean é.zip b/test/clean é.zip
index b2805c4..c28f424 100644
--- a/test/clean é.zip
+++ b/test/clean é.zip
Binary files differ