summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md3
-rw-r--r--tests/data/dirty_with_nsid.docxbin0 -> 45889 bytes
-rw-r--r--tests/test_deep_cleaning.py31
3 files changed, 34 insertions, 0 deletions
diff --git a/README.md b/README.md
index 28ea2fd..821e5e2 100644
--- a/README.md
+++ b/README.md
@@ -140,6 +140,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
140Copyright 2018 Julien (jvoisin) Voisin <julien.voisin+mat2@dustri.org> 140Copyright 2018 Julien (jvoisin) Voisin <julien.voisin+mat2@dustri.org>
141Copyright 2016 Marie Rose for MAT2's logo 141Copyright 2016 Marie Rose for MAT2's logo
142 142
143The `tests/data/dirty_with_nsid.docx` file is licensed under GPLv3,
144and was borrowed from the Calibre project: https://calibre-ebook.com/downloads/demos/demo.docx
145
143# Thanks 146# Thanks
144 147
145MAT2 wouldn't exist without: 148MAT2 wouldn't exist without:
diff --git a/tests/data/dirty_with_nsid.docx b/tests/data/dirty_with_nsid.docx
new file mode 100644
index 0000000..6f4ae99
--- /dev/null
+++ b/tests/data/dirty_with_nsid.docx
Binary files differ
diff --git a/tests/test_deep_cleaning.py b/tests/test_deep_cleaning.py
index ccd4955..aab46c7 100644
--- a/tests/test_deep_cleaning.py
+++ b/tests/test_deep_cleaning.py
@@ -137,3 +137,34 @@ class TestRsidRemoval(unittest.TestCase):
137 137
138 os.remove('./tests/data/clean.docx') 138 os.remove('./tests/data/clean.docx')
139 os.remove('./tests/data/clean.cleaned.docx') 139 os.remove('./tests/data/clean.cleaned.docx')
140
141
142class TestNsidRemoval(unittest.TestCase):
143 def test_office(self):
144 shutil.copy('./tests/data/dirty_with_nsid.docx', './tests/data/clean.docx')
145 p = office.MSOfficeParser('./tests/data/clean.docx')
146
147 meta = p.get_meta()
148 self.assertIsNotNone(meta)
149
150 how_many_rsid = False
151 with zipfile.ZipFile('./tests/data/clean.docx') as zin:
152 for item in zin.infolist():
153 if not item.filename.endswith('.xml'):
154 continue
155 num = zin.read(item).decode('utf-8').lower().count('w:rsid')
156 how_many_rsid += num
157 self.assertEqual(how_many_rsid, 1190)
158
159 ret = p.remove_all()
160 self.assertTrue(ret)
161
162 with zipfile.ZipFile('./tests/data/clean.cleaned.docx') as zin:
163 for item in zin.infolist():
164 if not item.filename.endswith('.xml'):
165 continue
166 num = zin.read(item).decode('utf-8').lower().count('w:nsid')
167 self.assertEqual(num, 0)
168
169 os.remove('./tests/data/clean.docx')
170 os.remove('./tests/data/clean.cleaned.docx')