diff options
| author | jvoisin | 2018-06-10 20:19:35 +0200 |
|---|---|---|
| committer | jvoisin | 2018-06-10 20:19:35 +0200 |
| commit | 8c7979aae3f9073cc8426613d8d7594ddb560cf7 (patch) | |
| tree | e77106b36e3fca458d3c3ded03b80dcd72227f3c | |
| parent | b310a18e697c6c8d854c0547932b611dbb0d880a (diff) | |
Add some tests for non-supported embedded fileformats
| -rw-r--r-- | libmat2/office.py | 14 | ||||
| -rw-r--r-- | tests/data/embedded.docx | bin | 0 -> 24601 bytes | |||
| -rw-r--r-- | tests/data/embedded.odt | bin | 0 -> 33019 bytes | |||
| -rw-r--r-- | tests/test_libmat2.py | 13 |
4 files changed, 23 insertions, 4 deletions
diff --git a/libmat2/office.py b/libmat2/office.py index 90f7c7a..914fd39 100644 --- a/libmat2/office.py +++ b/libmat2/office.py | |||
| @@ -40,7 +40,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): | |||
| 40 | 40 | ||
| 41 | 41 | ||
| 42 | def _clean_internal_file(self, item: zipfile.ZipInfo, temp_folder: str, | 42 | def _clean_internal_file(self, item: zipfile.ZipInfo, temp_folder: str, |
| 43 | zin: zipfile.ZipFile, zout: zipfile.ZipFile): | 43 | zin: zipfile.ZipFile, zout: zipfile.ZipFile) -> bool: |
| 44 | output = '' | 44 | output = '' |
| 45 | zin.extract(member=item, path=temp_folder) | 45 | zin.extract(member=item, path=temp_folder) |
| 46 | if item.filename not in self.whitelist: | 46 | if item.filename not in self.whitelist: |
| @@ -48,7 +48,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): | |||
| 48 | tmp_parser, mtype = parser_factory.get_parser(full_path) # type: ignore | 48 | tmp_parser, mtype = parser_factory.get_parser(full_path) # type: ignore |
| 49 | if not tmp_parser: | 49 | if not tmp_parser: |
| 50 | print("%s's format (%s) isn't supported" % (item.filename, mtype)) | 50 | print("%s's format (%s) isn't supported" % (item.filename, mtype)) |
| 51 | return | 51 | return False |
| 52 | tmp_parser.remove_all() | 52 | tmp_parser.remove_all() |
| 53 | output = tmp_parser.output_filename | 53 | output = tmp_parser.output_filename |
| 54 | else: | 54 | else: |
| @@ -57,6 +57,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): | |||
| 57 | clean_zinfo = self._clean_zipinfo(zinfo) | 57 | clean_zinfo = self._clean_zipinfo(zinfo) |
| 58 | with open(output, 'rb') as f: | 58 | with open(output, 'rb') as f: |
| 59 | zout.writestr(clean_zinfo, f.read()) | 59 | zout.writestr(clean_zinfo, f.read()) |
| 60 | return True | ||
| 60 | 61 | ||
| 61 | 62 | ||
| 62 | class MSOfficeParser(ArchiveBasedAbstractParser): | 63 | class MSOfficeParser(ArchiveBasedAbstractParser): |
| @@ -104,7 +105,10 @@ class MSOfficeParser(ArchiveBasedAbstractParser): | |||
| 104 | zout.writestr(item, zin.read(item)) | 105 | zout.writestr(item, zin.read(item)) |
| 105 | continue | 106 | continue |
| 106 | 107 | ||
| 107 | self._clean_internal_file(item, temp_folder, zin, zout) | 108 | if self._clean_internal_file(item, temp_folder, zin, zout) is False: |
| 109 | zout.close() | ||
| 110 | os.remove(self.output_filename) | ||
| 111 | return False | ||
| 108 | 112 | ||
| 109 | shutil.rmtree(temp_folder) | 113 | shutil.rmtree(temp_folder) |
| 110 | zout.close() | 114 | zout.close() |
| @@ -156,7 +160,9 @@ class LibreOfficeParser(ArchiveBasedAbstractParser): | |||
| 156 | elif item.filename == 'meta.xml': | 160 | elif item.filename == 'meta.xml': |
| 157 | continue # don't keep metadata files | 161 | continue # don't keep metadata files |
| 158 | 162 | ||
| 159 | self._clean_internal_file(item, temp_folder, zin, zout) | 163 | if self._clean_internal_file(item, temp_folder, zin, zout) is False: |
| 164 | os.remove(self.output_filename) | ||
| 165 | return False | ||
| 160 | 166 | ||
| 161 | shutil.rmtree(temp_folder) | 167 | shutil.rmtree(temp_folder) |
| 162 | zout.close() | 168 | zout.close() |
diff --git a/tests/data/embedded.docx b/tests/data/embedded.docx new file mode 100644 index 0000000..b134724 --- /dev/null +++ b/tests/data/embedded.docx | |||
| Binary files differ | |||
diff --git a/tests/data/embedded.odt b/tests/data/embedded.odt new file mode 100644 index 0000000..62bf8cc --- /dev/null +++ b/tests/data/embedded.odt | |||
| Binary files differ | |||
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index 7deeadc..c85f425 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py | |||
| @@ -27,6 +27,19 @@ class TestParameterInjection(unittest.TestCase): | |||
| 27 | os.remove('-ver') | 27 | os.remove('-ver') |
| 28 | 28 | ||
| 29 | 29 | ||
| 30 | class TestUnsupportedEmbeddedFiles(unittest.TestCase): | ||
| 31 | def test_odt_with_svg(self): | ||
| 32 | shutil.copy('./tests/data/embedded.odt', './tests/data/clean.odt') | ||
| 33 | p = office.LibreOfficeParser('./tests/data/clean.odt') | ||
| 34 | self.assertFalse(p.remove_all()) | ||
| 35 | os.remove('./tests/data/clean.odt') | ||
| 36 | |||
| 37 | def test_docx_with_svg(self): | ||
| 38 | shutil.copy('./tests/data/embedded.docx', './tests/data/clean.docx') | ||
| 39 | p = office.MSOfficeParser('./tests/data/clean.docx') | ||
| 40 | self.assertFalse(p.remove_all()) | ||
| 41 | os.remove('./tests/data/clean.docx') | ||
| 42 | |||
| 30 | class TestUnsupportedFiles(unittest.TestCase): | 43 | class TestUnsupportedFiles(unittest.TestCase): |
| 31 | def test_pdf(self): | 44 | def test_pdf(self): |
| 32 | shutil.copy('./tests/test_libmat2.py', './tests/clean.py') | 45 | shutil.copy('./tests/test_libmat2.py', './tests/clean.py') |
