From 865ad181ae075ebc168e5dab5d00b7c99a0b7c9b Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sat, 31 Mar 2018 15:47:06 +0200 Subject: Add support for docx --- tests/data/dirty.docx | Bin 0 -> 598120 bytes tests/test_libmat2.py | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 tests/data/dirty.docx (limited to 'tests') diff --git a/tests/data/dirty.docx b/tests/data/dirty.docx new file mode 100644 index 0000000..97e2c21 Binary files /dev/null and b/tests/data/dirty.docx differ diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index c21185e..02579b0 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -39,6 +39,11 @@ class TestGetMeta(unittest.TestCase): meta = p.get_meta() self.assertEqual(meta['TITLE'], ['I am so']) + def test_docx(self): + p = office.OfficeParser('./tests/data/dirty.docx') + meta = p.get_meta() + print(meta) + class TestCleaning(unittest.TestCase): def test_pdf(self): @@ -131,3 +136,18 @@ class TestCleaning(unittest.TestCase): self.assertEqual(p.get_meta(), {}) os.remove('./tests/data/clean.flac') + + def test_office(self): + shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx') + p = office.OfficeParser('./tests/data/clean.docx') + + meta = p.get_meta() + self.assertIsNotNone(meta) + + ret = p.remove_all() + self.assertTrue(ret) + + p = office.OfficeParser('./tests/data/clean.docx.cleaned') + self.assertEqual(p.get_meta(), {}) + + os.remove('./tests/data/clean.docx') -- cgit v1.3