From 3d2842802c4cd4afb749856d30c58f64626a51ce Mon Sep 17 00:00:00 2001 From: jvoisin Date: Thu, 20 Sep 2018 01:13:59 +0200 Subject: Split the tests --- tests/test_deep_cleaning.py | 72 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 tests/test_deep_cleaning.py (limited to 'tests/test_deep_cleaning.py') diff --git a/tests/test_deep_cleaning.py b/tests/test_deep_cleaning.py new file mode 100644 index 0000000..8c6f257 --- /dev/null +++ b/tests/test_deep_cleaning.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 + +import unittest +import shutil +import os +import zipfile +import tempfile + +from libmat2 import office, parser_factory + +class TestZipMetadata(unittest.TestCase): + def __check_deep_meta(self, p): + tempdir = tempfile.mkdtemp() + zipin = zipfile.ZipFile(p.filename) + zipin.extractall(tempdir) + + for subdir, dirs, files in os.walk(tempdir): + for f in files: + complete_path = os.path.join(subdir, f) + inside_p, _ = parser_factory.get_parser(complete_path) + if inside_p is None: + continue + self.assertEqual(inside_p.get_meta(), {}) + shutil.rmtree(tempdir) + + + def __check_zip_meta(self, p): + zipin = zipfile.ZipFile(p.filename) + for item in zipin.infolist(): + self.assertEqual(item.comment, b'') + self.assertEqual(item.date_time, (1980, 1, 1, 0, 0, 0)) + self.assertEqual(item.create_system, 3) # 3 is UNIX + + + def test_office(self): + shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx') + p = office.MSOfficeParser('./tests/data/clean.docx') + + meta = p.get_meta() + self.assertIsNotNone(meta) + + ret = p.remove_all() + self.assertTrue(ret) + + p = office.MSOfficeParser('./tests/data/clean.cleaned.docx') + self.assertEqual(p.get_meta(), {}) + + self.__check_zip_meta(p) + self.__check_deep_meta(p) + + os.remove('./tests/data/clean.docx') + os.remove('./tests/data/clean.cleaned.docx') + + + def test_libreoffice(self): + shutil.copy('./tests/data/dirty.odt', './tests/data/clean.odt') + p = office.LibreOfficeParser('./tests/data/clean.odt') + + meta = p.get_meta() + self.assertIsNotNone(meta) + + ret = p.remove_all() + self.assertTrue(ret) + + p = office.LibreOfficeParser('./tests/data/clean.cleaned.odt') + self.assertEqual(p.get_meta(), {}) + + self.__check_zip_meta(p) + self.__check_deep_meta(p) + + os.remove('./tests/data/clean.odt') + os.remove('./tests/data/clean.cleaned.odt') -- cgit v1.3