From 156855ab7e79a311c1d19e9c937c41aed12b7506 Mon Sep 17 00:00:00 2001 From: Alex Marchant Date: Fri, 5 Apr 2024 18:45:58 +0200 Subject: Remove dangling references from document.xml.rels The file `word/_rels/document.xml.rels` is similar to `[Content_Types].xml` and has references to other files in the archive. If those references aren't removed Word refuses to open the document. # Please enter the commit message for your changes. Lines starting --- tests/test_libmat2.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'tests') diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index 491f396..7855062 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -931,3 +931,24 @@ class TextDocx(unittest.TestCase): os.remove('./tests/data/comment_clean.docx') os.remove('./tests/data/comment_clean.cleaned.docx') + + def test_clean_document_xml_rels(self): + with zipfile.ZipFile('./tests/data/comment.docx') as zipin: + c = zipin.open('word/_rels/document.xml.rels') + content = c.read() + r = b'Target="comments.xml"' + self.assertIn(r, content) + + shutil.copy('./tests/data/comment.docx', './tests/data/comment_clean.docx') + p = office.MSOfficeParser('./tests/data/comment_clean.docx') + self.assertTrue(p.remove_all()) + + with zipfile.ZipFile('./tests/data/comment_clean.cleaned.docx') as zipin: + c = zipin.open('word/_rels/document.xml.rels') + content = c.read() + r = b'Target="comments.xml"' + self.assertNotIn(r, content) + + os.remove('./tests/data/comment_clean.docx') + os.remove('./tests/data/comment_clean.cleaned.docx') + -- cgit v1.3