summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorjvoisin2018-03-31 23:09:54 +0200
committerjvoisin2018-03-31 23:09:54 +0200
commit6d506b87575ded3a59c9fc4f7b28d4160d9e9c43 (patch)
tree8cbb2edad30c4ae630eb371cf7a114208cf51dbb /tests
parentfb5956bd6b33161a462da36d6ebe62631e6da275 (diff)
Add a deep check for office/libreoffice files
Diffstat (limited to 'tests')
-rw-r--r--tests/test_libmat2.py54
1 files changed, 53 insertions, 1 deletions
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py
index 743a845..c065237 100644
--- a/tests/test_libmat2.py
+++ b/tests/test_libmat2.py
@@ -3,8 +3,10 @@
3import unittest 3import unittest
4import shutil 4import shutil
5import os 5import os
6import zipfile
7import tempfile
6 8
7from src import pdf, png, jpg, audio, office, libreoffice 9from src import pdf, png, jpg, audio, office, libreoffice, parser_factory
8 10
9class TestGetMeta(unittest.TestCase): 11class TestGetMeta(unittest.TestCase):
10 def test_pdf(self): 12 def test_pdf(self):
@@ -54,6 +56,56 @@ class TestGetMeta(unittest.TestCase):
54 self.assertEqual(meta['meta:generator'], 'LibreOffice/3.3$Unix LibreOffice_project/330m19$Build-202') 56 self.assertEqual(meta['meta:generator'], 'LibreOffice/3.3$Unix LibreOffice_project/330m19$Build-202')
55 57
56 58
59class TestDeepCleaning(unittest.TestCase):
60 def __check_zip_clean(self, p):
61 tempdir = tempfile.mkdtemp()
62 zipin = zipfile.ZipFile(p.filename)
63 zipin.extractall(tempdir)
64
65 for subdir, dirs, files in os.walk(tempdir):
66 for f in files:
67 complete_path = os.path.join(subdir, f)
68 inside_p = parser_factory.get_parser(complete_path)
69 if inside_p is None:
70 continue
71 print('[+] %s is clean inside %s' %(complete_path, p.filename))
72 self.assertEqual(inside_p.get_meta(), {})
73 shutil.rmtree(tempdir)
74
75 def test_office(self):
76 shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx')
77 p = office.OfficeParser('./tests/data/clean.docx')
78
79 meta = p.get_meta()
80 self.assertIsNotNone(meta)
81
82 ret = p.remove_all()
83 self.assertTrue(ret)
84
85 p = office.OfficeParser('./tests/data/clean.docx.cleaned')
86 self.assertEqual(p.get_meta(), {})
87
88 self.__check_zip_clean(p)
89
90 os.remove('./tests/data/clean.docx')
91
92
93 def test_libreoffice(self):
94 shutil.copy('./tests/data/dirty.odt', './tests/data/clean.odt')
95 p = libreoffice.LibreOfficeParser('./tests/data/clean.odt')
96
97 meta = p.get_meta()
98 self.assertIsNotNone(meta)
99
100 ret = p.remove_all()
101 self.assertTrue(ret)
102
103 p = libreoffice.LibreOfficeParser('./tests/data/clean.odt.cleaned')
104 self.assertEqual(p.get_meta(), {})
105
106 self.__check_zip_clean(p)
107
108 os.remove('./tests/data/clean.odt')
57 109
58class TestCleaning(unittest.TestCase): 110class TestCleaning(unittest.TestCase):
59 def test_pdf(self): 111 def test_pdf(self):