summaryrefslogtreecommitdiff
path: root/tests/test_deep_cleaning.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test_deep_cleaning.py')
-rw-r--r--tests/test_deep_cleaning.py72
1 files changed, 72 insertions, 0 deletions
diff --git a/tests/test_deep_cleaning.py b/tests/test_deep_cleaning.py
new file mode 100644
index 0000000..8c6f257
--- /dev/null
+++ b/tests/test_deep_cleaning.py
@@ -0,0 +1,72 @@
1#!/usr/bin/env python3
2
3import unittest
4import shutil
5import os
6import zipfile
7import tempfile
8
9from libmat2 import office, parser_factory
10
11class TestZipMetadata(unittest.TestCase):
12 def __check_deep_meta(self, p):
13 tempdir = tempfile.mkdtemp()
14 zipin = zipfile.ZipFile(p.filename)
15 zipin.extractall(tempdir)
16
17 for subdir, dirs, files in os.walk(tempdir):
18 for f in files:
19 complete_path = os.path.join(subdir, f)
20 inside_p, _ = parser_factory.get_parser(complete_path)
21 if inside_p is None:
22 continue
23 self.assertEqual(inside_p.get_meta(), {})
24 shutil.rmtree(tempdir)
25
26
27 def __check_zip_meta(self, p):
28 zipin = zipfile.ZipFile(p.filename)
29 for item in zipin.infolist():
30 self.assertEqual(item.comment, b'')
31 self.assertEqual(item.date_time, (1980, 1, 1, 0, 0, 0))
32 self.assertEqual(item.create_system, 3) # 3 is UNIX
33
34
35 def test_office(self):
36 shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx')
37 p = office.MSOfficeParser('./tests/data/clean.docx')
38
39 meta = p.get_meta()
40 self.assertIsNotNone(meta)
41
42 ret = p.remove_all()
43 self.assertTrue(ret)
44
45 p = office.MSOfficeParser('./tests/data/clean.cleaned.docx')
46 self.assertEqual(p.get_meta(), {})
47
48 self.__check_zip_meta(p)
49 self.__check_deep_meta(p)
50
51 os.remove('./tests/data/clean.docx')
52 os.remove('./tests/data/clean.cleaned.docx')
53
54
55 def test_libreoffice(self):
56 shutil.copy('./tests/data/dirty.odt', './tests/data/clean.odt')
57 p = office.LibreOfficeParser('./tests/data/clean.odt')
58
59 meta = p.get_meta()
60 self.assertIsNotNone(meta)
61
62 ret = p.remove_all()
63 self.assertTrue(ret)
64
65 p = office.LibreOfficeParser('./tests/data/clean.cleaned.odt')
66 self.assertEqual(p.get_meta(), {})
67
68 self.__check_zip_meta(p)
69 self.__check_deep_meta(p)
70
71 os.remove('./tests/data/clean.odt')
72 os.remove('./tests/data/clean.cleaned.odt')