summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjvoisin2018-06-21 23:18:50 +0200
committerjvoisin2018-06-21 23:18:59 +0200
commit5b38bd7ccd97cdca864351b4af0fcbaa227f509e (patch)
tree503037b7e2f0426db11fcee7acfbe3af138a5ff3
parent846a261465a95a594e9bfc5398cf09fd5deb437f (diff)
Improve the reliability of the office parser
-rw-r--r--libmat2/office.py7
-rw-r--r--tests/test_libmat2.py5
2 files changed, 12 insertions, 0 deletions
diff --git a/libmat2/office.py b/libmat2/office.py
index a5a49cf..aea56b9 100644
--- a/libmat2/office.py
+++ b/libmat2/office.py
@@ -16,6 +16,13 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
16 files_to_keep = set() # type: Set[str] 16 files_to_keep = set() # type: Set[str]
17 files_to_omit = set() # type: Set[Pattern] 17 files_to_omit = set() # type: Set[Pattern]
18 18
19 def __init__(self, filename):
20 super().__init__(filename)
21 try: # better fail here than later
22 zipfile.ZipFile(self.filename)
23 except zipfile.BadZipFile:
24 raise ValueError
25
19 def _clean_zipinfo(self, zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo: 26 def _clean_zipinfo(self, zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo:
20 zipinfo.create_system = 3 # Linux 27 zipinfo.create_system = 3 # Linux
21 zipinfo.comment = b'' 28 zipinfo.comment = b''
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py
index 4b312de..e1d949d 100644
--- a/tests/test_libmat2.py
+++ b/tests/test_libmat2.py
@@ -90,6 +90,11 @@ class TestCorruptedFiles(unittest.TestCase):
90 90
91 os.remove('./tests/data/clean.torrent') 91 os.remove('./tests/data/clean.torrent')
92 92
93 def test_odg(self):
94 shutil.copy('./tests/data/dirty.png', './tests/data/clean.odg')
95 with self.assertRaises(ValueError):
96 office.LibreOfficeParser('./tests/data/clean.odg')
97
93class TestGetMeta(unittest.TestCase): 98class TestGetMeta(unittest.TestCase):
94 def test_pdf(self): 99 def test_pdf(self):
95 p = pdf.PDFParser('./tests/data/dirty.pdf') 100 p = pdf.PDFParser('./tests/data/dirty.pdf')