From 61f39c4bd0b51be6371fb2973c14054a2772352e Mon Sep 17 00:00:00 2001 From: Alex Marchant Date: Wed, 3 Apr 2024 15:20:00 -0400 Subject: Strip comment references from document.xml --- tests/test_libmat2.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'tests/test_libmat2.py') diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index 32ae543..d199f54 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -873,5 +873,35 @@ class TextDocx(unittest.TestCase): # Check if 'word/comments.xml' exists in the zip self.assertNotIn('word/comments.xml', zipin.namelist()) + os.remove('./tests/data/comment_clean.docx') + os.remove('./tests/data/comment_clean.cleaned.docx') + + def test_comment_references_are_removed(self): + with zipfile.ZipFile('./tests/data/comment.docx') as zipin: + c = zipin.open('word/document.xml') + content = c.read() + + r = b'w:commentRangeStart' + self.assertIn(r, content) + r = b'w:commentRangeEnd' + self.assertIn(r, content) + r = b'w:commentReference' + self.assertIn(r, content) + + shutil.copy('./tests/data/comment.docx', './tests/data/comment_clean.docx') + p = office.MSOfficeParser('./tests/data/comment_clean.docx') + self.assertTrue(p.remove_all()) + + with zipfile.ZipFile('./tests/data/comment_clean.cleaned.docx') as zipin: + c = zipin.open('word/document.xml') + content = c.read() + + r = b'w:commentRangeStart' + self.assertNotIn(r, content) + r = b'w:commentRangeEnd' + self.assertNotIn(r, content) + r = b'w:commentReference' + self.assertNotIn(r, content) + os.remove('./tests/data/comment_clean.docx') os.remove('./tests/data/comment_clean.cleaned.docx') \ No newline at end of file -- cgit v1.3