diff options
| -rw-r--r-- | README.md | 3 | ||||
| -rw-r--r-- | tests/data/dirty_with_nsid.docx | bin | 0 -> 45889 bytes | |||
| -rw-r--r-- | tests/test_deep_cleaning.py | 31 |
3 files changed, 34 insertions, 0 deletions
| @@ -140,6 +140,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. | |||
| 140 | Copyright 2018 Julien (jvoisin) Voisin <julien.voisin+mat2@dustri.org> | 140 | Copyright 2018 Julien (jvoisin) Voisin <julien.voisin+mat2@dustri.org> |
| 141 | Copyright 2016 Marie Rose for MAT2's logo | 141 | Copyright 2016 Marie Rose for MAT2's logo |
| 142 | 142 | ||
| 143 | The `tests/data/dirty_with_nsid.docx` file is licensed under GPLv3, | ||
| 144 | and was borrowed from the Calibre project: https://calibre-ebook.com/downloads/demos/demo.docx | ||
| 145 | |||
| 143 | # Thanks | 146 | # Thanks |
| 144 | 147 | ||
| 145 | MAT2 wouldn't exist without: | 148 | MAT2 wouldn't exist without: |
diff --git a/tests/data/dirty_with_nsid.docx b/tests/data/dirty_with_nsid.docx new file mode 100644 index 0000000..6f4ae99 --- /dev/null +++ b/tests/data/dirty_with_nsid.docx | |||
| Binary files differ | |||
diff --git a/tests/test_deep_cleaning.py b/tests/test_deep_cleaning.py index ccd4955..aab46c7 100644 --- a/tests/test_deep_cleaning.py +++ b/tests/test_deep_cleaning.py | |||
| @@ -137,3 +137,34 @@ class TestRsidRemoval(unittest.TestCase): | |||
| 137 | 137 | ||
| 138 | os.remove('./tests/data/clean.docx') | 138 | os.remove('./tests/data/clean.docx') |
| 139 | os.remove('./tests/data/clean.cleaned.docx') | 139 | os.remove('./tests/data/clean.cleaned.docx') |
| 140 | |||
| 141 | |||
| 142 | class TestNsidRemoval(unittest.TestCase): | ||
| 143 | def test_office(self): | ||
| 144 | shutil.copy('./tests/data/dirty_with_nsid.docx', './tests/data/clean.docx') | ||
| 145 | p = office.MSOfficeParser('./tests/data/clean.docx') | ||
| 146 | |||
| 147 | meta = p.get_meta() | ||
| 148 | self.assertIsNotNone(meta) | ||
| 149 | |||
| 150 | how_many_rsid = False | ||
| 151 | with zipfile.ZipFile('./tests/data/clean.docx') as zin: | ||
| 152 | for item in zin.infolist(): | ||
| 153 | if not item.filename.endswith('.xml'): | ||
| 154 | continue | ||
| 155 | num = zin.read(item).decode('utf-8').lower().count('w:rsid') | ||
| 156 | how_many_rsid += num | ||
| 157 | self.assertEqual(how_many_rsid, 1190) | ||
| 158 | |||
| 159 | ret = p.remove_all() | ||
| 160 | self.assertTrue(ret) | ||
| 161 | |||
| 162 | with zipfile.ZipFile('./tests/data/clean.cleaned.docx') as zin: | ||
| 163 | for item in zin.infolist(): | ||
| 164 | if not item.filename.endswith('.xml'): | ||
| 165 | continue | ||
| 166 | num = zin.read(item).decode('utf-8').lower().count('w:nsid') | ||
| 167 | self.assertEqual(num, 0) | ||
| 168 | |||
| 169 | os.remove('./tests/data/clean.docx') | ||
| 170 | os.remove('./tests/data/clean.cleaned.docx') | ||
