summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorjvoisin2018-03-31 15:47:06 +0200
committerjvoisin2018-03-31 15:47:06 +0200
commit865ad181ae075ebc168e5dab5d00b7c99a0b7c9b (patch)
treec019e0b0d72587371fd56d6c05e705ec79d34628 /tests
parent302a5ea002478cac86ff7be03d2add46c81a96de (diff)
Add support for docx
Diffstat (limited to 'tests')
-rw-r--r--tests/data/dirty.docxbin0 -> 598120 bytes
-rw-r--r--tests/test_libmat2.py20
2 files changed, 20 insertions, 0 deletions
diff --git a/tests/data/dirty.docx b/tests/data/dirty.docx
new file mode 100644
index 0000000..97e2c21
--- /dev/null
+++ b/tests/data/dirty.docx
Binary files differ
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py
index c21185e..02579b0 100644
--- a/tests/test_libmat2.py
+++ b/tests/test_libmat2.py
@@ -39,6 +39,11 @@ class TestGetMeta(unittest.TestCase):
39 meta = p.get_meta() 39 meta = p.get_meta()
40 self.assertEqual(meta['TITLE'], ['I am so']) 40 self.assertEqual(meta['TITLE'], ['I am so'])
41 41
42 def test_docx(self):
43 p = office.OfficeParser('./tests/data/dirty.docx')
44 meta = p.get_meta()
45 print(meta)
46
42 47
43class TestCleaning(unittest.TestCase): 48class TestCleaning(unittest.TestCase):
44 def test_pdf(self): 49 def test_pdf(self):
@@ -131,3 +136,18 @@ class TestCleaning(unittest.TestCase):
131 self.assertEqual(p.get_meta(), {}) 136 self.assertEqual(p.get_meta(), {})
132 137
133 os.remove('./tests/data/clean.flac') 138 os.remove('./tests/data/clean.flac')
139
140 def test_office(self):
141 shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx')
142 p = office.OfficeParser('./tests/data/clean.docx')
143
144 meta = p.get_meta()
145 self.assertIsNotNone(meta)
146
147 ret = p.remove_all()
148 self.assertTrue(ret)
149
150 p = office.OfficeParser('./tests/data/clean.docx.cleaned')
151 self.assertEqual(p.get_meta(), {})
152
153 os.remove('./tests/data/clean.docx')