diff options
| author | jvoisin | 2018-03-31 15:47:06 +0200 |
|---|---|---|
| committer | jvoisin | 2018-03-31 15:47:06 +0200 |
| commit | 865ad181ae075ebc168e5dab5d00b7c99a0b7c9b (patch) | |
| tree | c019e0b0d72587371fd56d6c05e705ec79d34628 /tests | |
| parent | 302a5ea002478cac86ff7be03d2add46c81a96de (diff) | |
Add support for docx
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/data/dirty.docx | bin | 0 -> 598120 bytes | |||
| -rw-r--r-- | tests/test_libmat2.py | 20 |
2 files changed, 20 insertions, 0 deletions
diff --git a/tests/data/dirty.docx b/tests/data/dirty.docx new file mode 100644 index 0000000..97e2c21 --- /dev/null +++ b/tests/data/dirty.docx | |||
| Binary files differ | |||
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index c21185e..02579b0 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py | |||
| @@ -39,6 +39,11 @@ class TestGetMeta(unittest.TestCase): | |||
| 39 | meta = p.get_meta() | 39 | meta = p.get_meta() |
| 40 | self.assertEqual(meta['TITLE'], ['I am so']) | 40 | self.assertEqual(meta['TITLE'], ['I am so']) |
| 41 | 41 | ||
| 42 | def test_docx(self): | ||
| 43 | p = office.OfficeParser('./tests/data/dirty.docx') | ||
| 44 | meta = p.get_meta() | ||
| 45 | print(meta) | ||
| 46 | |||
| 42 | 47 | ||
| 43 | class TestCleaning(unittest.TestCase): | 48 | class TestCleaning(unittest.TestCase): |
| 44 | def test_pdf(self): | 49 | def test_pdf(self): |
| @@ -131,3 +136,18 @@ class TestCleaning(unittest.TestCase): | |||
| 131 | self.assertEqual(p.get_meta(), {}) | 136 | self.assertEqual(p.get_meta(), {}) |
| 132 | 137 | ||
| 133 | os.remove('./tests/data/clean.flac') | 138 | os.remove('./tests/data/clean.flac') |
| 139 | |||
| 140 | def test_office(self): | ||
| 141 | shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx') | ||
| 142 | p = office.OfficeParser('./tests/data/clean.docx') | ||
| 143 | |||
| 144 | meta = p.get_meta() | ||
| 145 | self.assertIsNotNone(meta) | ||
| 146 | |||
| 147 | ret = p.remove_all() | ||
| 148 | self.assertTrue(ret) | ||
| 149 | |||
| 150 | p = office.OfficeParser('./tests/data/clean.docx.cleaned') | ||
| 151 | self.assertEqual(p.get_meta(), {}) | ||
| 152 | |||
| 153 | os.remove('./tests/data/clean.docx') | ||
