summaryrefslogtreecommitdiff
path: root/tests/test_libmat2.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test_libmat2.py')
-rw-r--r--tests/test_libmat2.py64
1 files changed, 0 insertions, 64 deletions
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py
index 74dad2e..1af5b62 100644
--- a/tests/test_libmat2.py
+++ b/tests/test_libmat2.py
@@ -182,70 +182,6 @@ class TestRevisionsCleaning(unittest.TestCase):
182 os.remove('./tests/data/revision_clean.docx') 182 os.remove('./tests/data/revision_clean.docx')
183 os.remove('./tests/data/revision_clean.cleaned.docx') 183 os.remove('./tests/data/revision_clean.cleaned.docx')
184 184
185
186class TestDeepCleaning(unittest.TestCase):
187 def __check_deep_meta(self, p):
188 tempdir = tempfile.mkdtemp()
189 zipin = zipfile.ZipFile(p.filename)
190 zipin.extractall(tempdir)
191
192 for subdir, dirs, files in os.walk(tempdir):
193 for f in files:
194 complete_path = os.path.join(subdir, f)
195 inside_p, _ = parser_factory.get_parser(complete_path)
196 if inside_p is None:
197 continue
198 self.assertEqual(inside_p.get_meta(), {})
199 shutil.rmtree(tempdir)
200
201
202 def __check_zip_meta(self, p):
203 zipin = zipfile.ZipFile(p.filename)
204 for item in zipin.infolist():
205 self.assertEqual(item.comment, b'')
206 self.assertEqual(item.date_time, (1980, 1, 1, 0, 0, 0))
207 self.assertEqual(item.create_system, 3) # 3 is UNIX
208
209
210 def test_office(self):
211 shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx')
212 p = office.MSOfficeParser('./tests/data/clean.docx')
213
214 meta = p.get_meta()
215 self.assertIsNotNone(meta)
216
217 ret = p.remove_all()
218 self.assertTrue(ret)
219
220 p = office.MSOfficeParser('./tests/data/clean.cleaned.docx')
221 self.assertEqual(p.get_meta(), {})
222
223 self.__check_zip_meta(p)
224 self.__check_deep_meta(p)
225
226 os.remove('./tests/data/clean.docx')
227 os.remove('./tests/data/clean.cleaned.docx')
228
229
230 def test_libreoffice(self):
231 shutil.copy('./tests/data/dirty.odt', './tests/data/clean.odt')
232 p = office.LibreOfficeParser('./tests/data/clean.odt')
233
234 meta = p.get_meta()
235 self.assertIsNotNone(meta)
236
237 ret = p.remove_all()
238 self.assertTrue(ret)
239
240 p = office.LibreOfficeParser('./tests/data/clean.cleaned.odt')
241 self.assertEqual(p.get_meta(), {})
242
243 self.__check_zip_meta(p)
244 self.__check_deep_meta(p)
245
246 os.remove('./tests/data/clean.odt')
247 os.remove('./tests/data/clean.cleaned.odt')
248
249class TestLightWeightCleaning(unittest.TestCase): 185class TestLightWeightCleaning(unittest.TestCase):
250 def test_pdf(self): 186 def test_pdf(self):
251 shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf') 187 shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')