diff options
| author | jvoisin | 2018-03-31 23:09:54 +0200 |
|---|---|---|
| committer | jvoisin | 2018-03-31 23:09:54 +0200 |
| commit | 6d506b87575ded3a59c9fc4f7b28d4160d9e9c43 (patch) | |
| tree | 8cbb2edad30c4ae630eb371cf7a114208cf51dbb /tests | |
| parent | fb5956bd6b33161a462da36d6ebe62631e6da275 (diff) | |
Add a deep check for office/libreoffice files
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/test_libmat2.py | 54 |
1 files changed, 53 insertions, 1 deletions
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index 743a845..c065237 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py | |||
| @@ -3,8 +3,10 @@ | |||
| 3 | import unittest | 3 | import unittest |
| 4 | import shutil | 4 | import shutil |
| 5 | import os | 5 | import os |
| 6 | import zipfile | ||
| 7 | import tempfile | ||
| 6 | 8 | ||
| 7 | from src import pdf, png, jpg, audio, office, libreoffice | 9 | from src import pdf, png, jpg, audio, office, libreoffice, parser_factory |
| 8 | 10 | ||
| 9 | class TestGetMeta(unittest.TestCase): | 11 | class TestGetMeta(unittest.TestCase): |
| 10 | def test_pdf(self): | 12 | def test_pdf(self): |
| @@ -54,6 +56,56 @@ class TestGetMeta(unittest.TestCase): | |||
| 54 | self.assertEqual(meta['meta:generator'], 'LibreOffice/3.3$Unix LibreOffice_project/330m19$Build-202') | 56 | self.assertEqual(meta['meta:generator'], 'LibreOffice/3.3$Unix LibreOffice_project/330m19$Build-202') |
| 55 | 57 | ||
| 56 | 58 | ||
| 59 | class TestDeepCleaning(unittest.TestCase): | ||
| 60 | def __check_zip_clean(self, p): | ||
| 61 | tempdir = tempfile.mkdtemp() | ||
| 62 | zipin = zipfile.ZipFile(p.filename) | ||
| 63 | zipin.extractall(tempdir) | ||
| 64 | |||
| 65 | for subdir, dirs, files in os.walk(tempdir): | ||
| 66 | for f in files: | ||
| 67 | complete_path = os.path.join(subdir, f) | ||
| 68 | inside_p = parser_factory.get_parser(complete_path) | ||
| 69 | if inside_p is None: | ||
| 70 | continue | ||
| 71 | print('[+] %s is clean inside %s' %(complete_path, p.filename)) | ||
| 72 | self.assertEqual(inside_p.get_meta(), {}) | ||
| 73 | shutil.rmtree(tempdir) | ||
| 74 | |||
| 75 | def test_office(self): | ||
| 76 | shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx') | ||
| 77 | p = office.OfficeParser('./tests/data/clean.docx') | ||
| 78 | |||
| 79 | meta = p.get_meta() | ||
| 80 | self.assertIsNotNone(meta) | ||
| 81 | |||
| 82 | ret = p.remove_all() | ||
| 83 | self.assertTrue(ret) | ||
| 84 | |||
| 85 | p = office.OfficeParser('./tests/data/clean.docx.cleaned') | ||
| 86 | self.assertEqual(p.get_meta(), {}) | ||
| 87 | |||
| 88 | self.__check_zip_clean(p) | ||
| 89 | |||
| 90 | os.remove('./tests/data/clean.docx') | ||
| 91 | |||
| 92 | |||
| 93 | def test_libreoffice(self): | ||
| 94 | shutil.copy('./tests/data/dirty.odt', './tests/data/clean.odt') | ||
| 95 | p = libreoffice.LibreOfficeParser('./tests/data/clean.odt') | ||
| 96 | |||
| 97 | meta = p.get_meta() | ||
| 98 | self.assertIsNotNone(meta) | ||
| 99 | |||
| 100 | ret = p.remove_all() | ||
| 101 | self.assertTrue(ret) | ||
| 102 | |||
| 103 | p = libreoffice.LibreOfficeParser('./tests/data/clean.odt.cleaned') | ||
| 104 | self.assertEqual(p.get_meta(), {}) | ||
| 105 | |||
| 106 | self.__check_zip_clean(p) | ||
| 107 | |||
| 108 | os.remove('./tests/data/clean.odt') | ||
| 57 | 109 | ||
| 58 | class TestCleaning(unittest.TestCase): | 110 | class TestCleaning(unittest.TestCase): |
| 59 | def test_pdf(self): | 111 | def test_pdf(self): |
