summaryrefslogtreecommitdiff
path: root/tests/test_libmat2.py
diff options
context:
space:
mode:
authorjvoisin2024-04-05 18:33:30 +0200
committerjvoisin2024-04-05 18:33:30 +0200
commit09672a2dccb2fea0035278c7014f319b85e89c31 (patch)
tree2f530cf359d3c99807c5ac6c03fc52b2b93445d6 /tests/test_libmat2.py
parent61f39c4bd0b51be6371fb2973c14054a2772352e (diff)
parentf2c898c92d0422ddc76fa977d60f7345b06a5ad6 (diff)
Merge branch 'alexmarchant-utf-8-encode-all'
Diffstat (limited to 'tests/test_libmat2.py')
-rw-r--r--tests/test_libmat2.py28
1 files changed, 27 insertions, 1 deletions
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py
index d199f54..491f396 100644
--- a/tests/test_libmat2.py
+++ b/tests/test_libmat2.py
@@ -876,6 +876,32 @@ class TextDocx(unittest.TestCase):
876 os.remove('./tests/data/comment_clean.docx') 876 os.remove('./tests/data/comment_clean.docx')
877 os.remove('./tests/data/comment_clean.cleaned.docx') 877 os.remove('./tests/data/comment_clean.cleaned.docx')
878 878
879 def test_xml_is_utf8(self):
880 with zipfile.ZipFile('./tests/data/comment.docx') as zipin:
881 c = zipin.open('word/document.xml')
882 content = c.read()
883
884 # ensure encoding is utf-8
885 r = b'encoding=(\'|\")UTF-8(\'|\")'
886 match = re.search(r, content, re.IGNORECASE)
887 self.assertIsNotNone(match)
888
889 shutil.copy('./tests/data/comment.docx', './tests/data/comment_clean.docx')
890 p = office.MSOfficeParser('./tests/data/comment_clean.docx')
891 self.assertTrue(p.remove_all())
892
893 with zipfile.ZipFile('./tests/data/comment_clean.cleaned.docx') as zipin:
894 c = zipin.open('word/document.xml')
895 content = c.read()
896
897 # ensure encoding is still utf-8
898 r = b'encoding=(\'|\")UTF-8(\'|\")'
899 match = re.search(r, content, re.IGNORECASE)
900 self.assertIsNotNone(match)
901
902 os.remove('./tests/data/comment_clean.docx')
903 os.remove('./tests/data/comment_clean.cleaned.docx')
904
879 def test_comment_references_are_removed(self): 905 def test_comment_references_are_removed(self):
880 with zipfile.ZipFile('./tests/data/comment.docx') as zipin: 906 with zipfile.ZipFile('./tests/data/comment.docx') as zipin:
881 c = zipin.open('word/document.xml') 907 c = zipin.open('word/document.xml')
@@ -904,4 +930,4 @@ class TextDocx(unittest.TestCase):
904 self.assertNotIn(r, content) 930 self.assertNotIn(r, content)
905 931
906 os.remove('./tests/data/comment_clean.docx') 932 os.remove('./tests/data/comment_clean.docx')
907 os.remove('./tests/data/comment_clean.cleaned.docx') \ No newline at end of file 933 os.remove('./tests/data/comment_clean.cleaned.docx')