From f2c898c92d0422ddc76fa977d60f7345b06a5ad6 Mon Sep 17 00:00:00 2001 From: Alex Marchant Date: Wed, 3 Apr 2024 15:20:00 -0400 Subject: Strip comment references from document.xml --- tests/test_libmat2.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) (limited to 'tests') diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index 0435113..491f396 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -900,4 +900,34 @@ class TextDocx(unittest.TestCase): self.assertIsNotNone(match) os.remove('./tests/data/comment_clean.docx') - os.remove('./tests/data/comment_clean.cleaned.docx') \ No newline at end of file + os.remove('./tests/data/comment_clean.cleaned.docx') + + def test_comment_references_are_removed(self): + with zipfile.ZipFile('./tests/data/comment.docx') as zipin: + c = zipin.open('word/document.xml') + content = c.read() + + r = b'w:commentRangeStart' + self.assertIn(r, content) + r = b'w:commentRangeEnd' + self.assertIn(r, content) + r = b'w:commentReference' + self.assertIn(r, content) + + shutil.copy('./tests/data/comment.docx', './tests/data/comment_clean.docx') + p = office.MSOfficeParser('./tests/data/comment_clean.docx') + self.assertTrue(p.remove_all()) + + with zipfile.ZipFile('./tests/data/comment_clean.cleaned.docx') as zipin: + c = zipin.open('word/document.xml') + content = c.read() + + r = b'w:commentRangeStart' + self.assertNotIn(r, content) + r = b'w:commentRangeEnd' + self.assertNotIn(r, content) + r = b'w:commentReference' + self.assertNotIn(r, content) + + os.remove('./tests/data/comment_clean.docx') + os.remove('./tests/data/comment_clean.cleaned.docx') -- cgit v1.3