summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjvoisin2019-02-24 23:03:17 +0100
committerjvoisin2019-02-24 23:32:32 +0100
commit545dccc3527fcdf851b30b072ae6c7222b711777 (patch)
tree6c76bd51bd56a87aac7e70accc460c80d492253f
parent524bae597209d775828bd176f6c00dd243f47c75 (diff)
In archive-based formats, the `mimetype` file comes first
This should improve epub compatibility, along with other formats as a side-effect
-rw-r--r--libmat2/archive.py12
-rw-r--r--tests/test_deep_cleaning.py4
2 files changed, 14 insertions, 2 deletions
diff --git a/libmat2/archive.py b/libmat2/archive.py
index d155664..1ae3b45 100644
--- a/libmat2/archive.py
+++ b/libmat2/archive.py
@@ -4,13 +4,14 @@ import tempfile
4import os 4import os
5import logging 5import logging
6import shutil 6import shutil
7from typing import Dict, Set, Pattern, Union, Any 7from typing import Dict, Set, Pattern, Union, Any, List
8 8
9from . import abstract, UnknownMemberPolicy, parser_factory 9from . import abstract, UnknownMemberPolicy, parser_factory
10 10
11# Make pyflakes happy 11# Make pyflakes happy
12assert Set 12assert Set
13assert Pattern 13assert Pattern
14assert List
14assert Union 15assert Union
15 16
16 17
@@ -115,9 +116,16 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
115 temp_folder = tempfile.mkdtemp() 116 temp_folder = tempfile.mkdtemp()
116 abort = False 117 abort = False
117 118
119 items = list() # type: List[zipfile.ZipInfo]
120 for item in sorted(zin.infolist(), key=lambda z: z.filename):
121 if item.filename == 'mimetype':
122 items = [item] + items
123 else:
124 items.append(item)
125
118 # Since files order is a fingerprint factor, 126 # Since files order is a fingerprint factor,
119 # we're iterating (and thus inserting) them in lexicographic order. 127 # we're iterating (and thus inserting) them in lexicographic order.
120 for item in sorted(zin.infolist(), key=lambda z: z.filename): 128 for item in items:
121 if item.filename[-1] == '/': # `is_dir` is added in Python3.6 129 if item.filename[-1] == '/': # `is_dir` is added in Python3.6
122 continue # don't keep empty folders 130 continue # don't keep empty folders
123 131
diff --git a/tests/test_deep_cleaning.py b/tests/test_deep_cleaning.py
index 8466127..ccd4955 100644
--- a/tests/test_deep_cleaning.py
+++ b/tests/test_deep_cleaning.py
@@ -83,6 +83,8 @@ class TestZipOrder(unittest.TestCase):
83 previous_name = '' 83 previous_name = ''
84 for item in zin.infolist(): 84 for item in zin.infolist():
85 if previous_name == '': 85 if previous_name == '':
86 if item.filename == 'mimetype':
87 continue
86 previous_name = item.filename 88 previous_name = item.filename
87 continue 89 continue
88 elif item.filename < previous_name: 90 elif item.filename < previous_name:
@@ -97,6 +99,8 @@ class TestZipOrder(unittest.TestCase):
97 previous_name = '' 99 previous_name = ''
98 for item in zin.infolist(): 100 for item in zin.infolist():
99 if previous_name == '': 101 if previous_name == '':
102 if item.filename == 'mimetype':
103 continue
100 previous_name = item.filename 104 previous_name = item.filename
101 continue 105 continue
102 self.assertGreaterEqual(item.filename, previous_name) 106 self.assertGreaterEqual(item.filename, previous_name)