From c67bbafb2c60782096af4f6225d94e18225d2ecf Mon Sep 17 00:00:00 2001 From: jvoisin Date: Mon, 1 Oct 2018 22:26:35 +0200 Subject: Use [Content_Types].xml to improve MS Office coverage --- tests/data/broken_xml_content_types.docx | Bin 0 -> 4145 bytes tests/data/malformed_content_types.docx | Bin 4131 -> 4135 bytes tests/data/no_content_types.docx | Bin 0 -> 3651 bytes tests/test_corrupted_files.py | 16 ++++++++++++++-- 4 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 tests/data/broken_xml_content_types.docx create mode 100644 tests/data/no_content_types.docx (limited to 'tests') diff --git a/tests/data/broken_xml_content_types.docx b/tests/data/broken_xml_content_types.docx new file mode 100644 index 0000000..41e0e49 Binary files /dev/null and b/tests/data/broken_xml_content_types.docx differ diff --git a/tests/data/malformed_content_types.docx b/tests/data/malformed_content_types.docx index 43ac743..cc5caf3 100644 Binary files a/tests/data/malformed_content_types.docx and b/tests/data/malformed_content_types.docx differ diff --git a/tests/data/no_content_types.docx b/tests/data/no_content_types.docx new file mode 100644 index 0000000..d0e0330 Binary files /dev/null and b/tests/data/no_content_types.docx differ diff --git a/tests/test_corrupted_files.py b/tests/test_corrupted_files.py index 4ac2678..8d7c252 100644 --- a/tests/test_corrupted_files.py +++ b/tests/test_corrupted_files.py @@ -86,14 +86,26 @@ class TestExplicitelyUnsupportedFiles(unittest.TestCase): os.remove('./tests/data/clean.py') -class TestCorruptedContentTypesOffice(unittest.TestCase): - def test_office(self): +class TestWrongContentTypesFileOffice(unittest.TestCase): + def test_office_incomplete(self): shutil.copy('./tests/data/malformed_content_types.docx', './tests/data/clean.docx') p = office.MSOfficeParser('./tests/data/clean.docx') self.assertIsNotNone(p) self.assertFalse(p.remove_all()) os.remove('./tests/data/clean.docx') + def test_office_broken(self): + shutil.copy('./tests/data/broken_xml_content_types.docx', './tests/data/clean.docx') + with self.assertRaises(ValueError): + office.MSOfficeParser('./tests/data/clean.docx') + os.remove('./tests/data/clean.docx') + + def test_office_absent(self): + shutil.copy('./tests/data/no_content_types.docx', './tests/data/clean.docx') + with self.assertRaises(ValueError): + office.MSOfficeParser('./tests/data/clean.docx') + os.remove('./tests/data/clean.docx') + class TestCorruptedFiles(unittest.TestCase): def test_pdf(self): shutil.copy('./tests/data/dirty.png', './tests/data/clean.png') -- cgit v1.3