From bee56a57ce0f45c51386423d508ba06836be366e Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sun, 1 Jul 2018 23:11:10 +0200 Subject: Remove docx revisions --- tests/data/revision.docx | Bin 0 -> 4701 bytes tests/test_libmat2.py | 21 +++++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 tests/data/revision.docx (limited to 'tests') diff --git a/tests/data/revision.docx b/tests/data/revision.docx new file mode 100644 index 0000000..8a2d814 Binary files /dev/null and b/tests/data/revision.docx differ diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index 1573790..4df6385 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -121,6 +121,7 @@ class TestRemovingThumbnails(unittest.TestCase): zipin.close() os.remove('./tests/data/clean.cleaned.odt') + os.remove('./tests/data/clean.odt') class TestRevisionsCleaning(unittest.TestCase): @@ -142,6 +143,26 @@ class TestRevisionsCleaning(unittest.TestCase): os.remove('./tests/data/clean.odt') os.remove('./tests/data/clean.cleaned.odt') + def test_msoffice(self): + with zipfile.ZipFile('./tests/data/revision.docx') as zipin: + c = zipin.open('word/document.xml') + content = c.read() + r = b'' + self.assertIn(r, content) + + shutil.copy('./tests/data/revision.docx', './tests/data/revision_clean.docx') + p = office.MSOfficeParser('./tests/data/revision_clean.docx') + self.assertTrue(p.remove_all()) + + with zipfile.ZipFile('./tests/data/revision_clean.cleaned.docx') as zipin: + c = zipin.open('word/document.xml') + content = c.read() + r = b'' + self.assertNotIn(r, content) + + os.remove('./tests/data/revision_clean.docx') + os.remove('./tests/data/revision_clean.cleaned.docx') + class TestDeepCleaning(unittest.TestCase): def __check_deep_meta(self, p): -- cgit v1.3