From fc924239febb3f186585d9ea6c263e1cb7dc690d Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sun, 1 Sep 2019 13:34:26 +0200 Subject: Add a test for nsid cleaning --- tests/test_deep_cleaning.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'tests/test_deep_cleaning.py') diff --git a/tests/test_deep_cleaning.py b/tests/test_deep_cleaning.py index ccd4955..aab46c7 100644 --- a/tests/test_deep_cleaning.py +++ b/tests/test_deep_cleaning.py @@ -137,3 +137,34 @@ class TestRsidRemoval(unittest.TestCase): os.remove('./tests/data/clean.docx') os.remove('./tests/data/clean.cleaned.docx') + + +class TestNsidRemoval(unittest.TestCase): + def test_office(self): + shutil.copy('./tests/data/dirty_with_nsid.docx', './tests/data/clean.docx') + p = office.MSOfficeParser('./tests/data/clean.docx') + + meta = p.get_meta() + self.assertIsNotNone(meta) + + how_many_rsid = False + with zipfile.ZipFile('./tests/data/clean.docx') as zin: + for item in zin.infolist(): + if not item.filename.endswith('.xml'): + continue + num = zin.read(item).decode('utf-8').lower().count('w:rsid') + how_many_rsid += num + self.assertEqual(how_many_rsid, 1190) + + ret = p.remove_all() + self.assertTrue(ret) + + with zipfile.ZipFile('./tests/data/clean.cleaned.docx') as zin: + for item in zin.infolist(): + if not item.filename.endswith('.xml'): + continue + num = zin.read(item).decode('utf-8').lower().count('w:nsid') + self.assertEqual(num, 0) + + os.remove('./tests/data/clean.docx') + os.remove('./tests/data/clean.cleaned.docx') -- cgit v1.3