diff options
| author | jvoisin | 2015-08-18 19:33:42 +0200 |
|---|---|---|
| committer | jvoisin | 2015-08-18 19:33:42 +0200 |
| commit | 5d139d7e171494a026c1fef798c37a00504c95aa (patch) | |
| tree | 2da153072305a7fc1670a350ec1fff6e2a1eab8d | |
| parent | fcd5bdfdeb10175d0bbef9be2b1ac631dec703a5 (diff) | |
Improves the way MAT deals with ZipTimestamps
It seems that using os.utime is TZ-dependent,
so lets use a ZipInfo thing instead.
| -rw-r--r-- | libmat/archive.py | 20 | ||||
| -rw-r--r-- | test/clean é.docx | bin | 6520 -> 2300 bytes | |||
| -rw-r--r-- | test/clean é.odt | bin | 33140 -> 10907 bytes | |||
| -rw-r--r-- | test/clean é.tar | bin | 10240 -> 10240 bytes | |||
| -rw-r--r-- | test/clean é.tar.bz2 | bin | 5976 -> 10240 bytes | |||
| -rw-r--r-- | test/clean é.tar.gz | bin | 5656 -> 10240 bytes | |||
| -rw-r--r-- | test/clean é.zip | bin | 5885 -> 5747 bytes |
7 files changed, 13 insertions, 7 deletions
diff --git a/libmat/archive.py b/libmat/archive.py index 470b0cc..7a71717 100644 --- a/libmat/archive.py +++ b/libmat/archive.py | |||
| @@ -14,9 +14,7 @@ import mat | |||
| 14 | import parser | 14 | import parser |
| 15 | 15 | ||
| 16 | # Zip files do not support dates older than 01/01/1980 | 16 | # Zip files do not support dates older than 01/01/1980 |
| 17 | ZIP_EPOCH = (1980, 1, 1, 0, 0, 0) | 17 | ZIP_EPOCH = (1980, 1, 1, 1, 0, 0) |
| 18 | ZIP_EPOCH_SECONDS = (datetime.datetime(1980, 1, 1, 0, 0, 0) | ||
| 19 | - datetime.datetime(1970, 1, 1, 0, 0, 0)).total_seconds() | ||
| 20 | 18 | ||
| 21 | 19 | ||
| 22 | class GenericArchiveStripper(parser.GenericParser): | 20 | class GenericArchiveStripper(parser.GenericParser): |
| @@ -185,8 +183,14 @@ class ZipStripper(GenericArchiveStripper): | |||
| 185 | basename, ext = os.path.splitext(path) | 183 | basename, ext = os.path.splitext(path) |
| 186 | if not (self.add2archive or ext in parser.NOMETA): | 184 | if not (self.add2archive or ext in parser.NOMETA): |
| 187 | continue | 185 | continue |
| 188 | os.utime(path, (ZIP_EPOCH_SECONDS, ZIP_EPOCH_SECONDS)) | 186 | zinfo = zipfile.ZipInfo(item.filename, date_time=ZIP_EPOCH) |
| 189 | zipout.write(path, item.filename) | 187 | zinfo.compress_type = zipfile.ZIP_DEFLATED |
| 188 | zinfo.create_system = 3 # Linux | ||
| 189 | zinfo.comment = '' | ||
| 190 | with open(path, 'r') as f: | ||
| 191 | zipout.writestr(zinfo, f.read()) | ||
| 192 | # os.utime(path, (ZIP_EPOCH_SECONDS, ZIP_EPOCH_SECONDS)) | ||
| 193 | # zipout.write(path, item.filename) | ||
| 190 | zipin.close() | 194 | zipin.close() |
| 191 | zipout.close() | 195 | zipout.close() |
| 192 | 196 | ||
| @@ -199,7 +203,8 @@ class TarStripper(GenericArchiveStripper): | |||
| 199 | """ Represent a tarfile archive | 203 | """ Represent a tarfile archive |
| 200 | """ | 204 | """ |
| 201 | 205 | ||
| 202 | def _remove(self, current_file): | 206 | @staticmethod |
| 207 | def _remove_tar_added(current_file): | ||
| 203 | """ Remove the meta added by tarfile itself to the file | 208 | """ Remove the meta added by tarfile itself to the file |
| 204 | """ | 209 | """ |
| 205 | current_file.mtime = 0 | 210 | current_file.mtime = 0 |
| @@ -239,7 +244,7 @@ class TarStripper(GenericArchiveStripper): | |||
| 239 | continue | 244 | continue |
| 240 | tarout.add(unicode(path.decode('utf-8')), | 245 | tarout.add(unicode(path.decode('utf-8')), |
| 241 | unicode(item.name.decode('utf-8')), | 246 | unicode(item.name.decode('utf-8')), |
| 242 | filter=self._remove) | 247 | filter=self._remove_tar_added) |
| 243 | tarin.close() | 248 | tarin.close() |
| 244 | tarout.close() | 249 | tarout.close() |
| 245 | self.do_backup() | 250 | self.do_backup() |
| @@ -333,6 +338,7 @@ class TerminalZipStripper(ZipStripper): | |||
| 333 | It is used for formats like docx, which are basically | 338 | It is used for formats like docx, which are basically |
| 334 | ziped xml. | 339 | ziped xml. |
| 335 | """ | 340 | """ |
| 341 | pass | ||
| 336 | 342 | ||
| 337 | 343 | ||
| 338 | class GzipStripper(TarStripper): | 344 | class GzipStripper(TarStripper): |
diff --git a/test/clean é.docx b/test/clean é.docx index 738eb6c..727084e 100644 --- a/test/clean é.docx +++ b/test/clean é.docx | |||
| Binary files differ | |||
diff --git a/test/clean é.odt b/test/clean é.odt index a06d816..94032e8 100644 --- a/test/clean é.odt +++ b/test/clean é.odt | |||
| Binary files differ | |||
diff --git a/test/clean é.tar b/test/clean é.tar index b841792..618c146 100644 --- a/test/clean é.tar +++ b/test/clean é.tar | |||
| Binary files differ | |||
diff --git a/test/clean é.tar.bz2 b/test/clean é.tar.bz2 index ad4e6cb..618c146 100644 --- a/test/clean é.tar.bz2 +++ b/test/clean é.tar.bz2 | |||
| Binary files differ | |||
diff --git a/test/clean é.tar.gz b/test/clean é.tar.gz index 1ab4407..618c146 100644 --- a/test/clean é.tar.gz +++ b/test/clean é.tar.gz | |||
| Binary files differ | |||
diff --git a/test/clean é.zip b/test/clean é.zip index b2805c4..c28f424 100644 --- a/test/clean é.zip +++ b/test/clean é.zip | |||
| Binary files differ | |||
