diff options
| author | jvoisin | 2019-02-20 16:28:11 -0800 |
|---|---|---|
| committer | jvoisin | 2019-02-20 16:28:11 -0800 |
| commit | 02ff21b158c76fcd355a74ddb940e1c54fc2d7ed (patch) | |
| tree | 701c6f5e316265e5a95a162356965ecf2fb8d6b2 /tests/test_corrupted_files.py | |
| parent | 6b45064c784d03bb21ffaf7e50c9ba684e6985a9 (diff) | |
Implement epub support
Diffstat (limited to '')
| -rw-r--r-- | tests/test_corrupted_files.py | 41 |
1 files changed, 31 insertions, 10 deletions
diff --git a/tests/test_corrupted_files.py b/tests/test_corrupted_files.py index 8728cb2..53c856a 100644 --- a/tests/test_corrupted_files.py +++ b/tests/test_corrupted_files.py | |||
| @@ -7,7 +7,7 @@ import logging | |||
| 7 | import zipfile | 7 | import zipfile |
| 8 | 8 | ||
| 9 | from libmat2 import pdf, images, audio, office, parser_factory, torrent | 9 | from libmat2 import pdf, images, audio, office, parser_factory, torrent |
| 10 | from libmat2 import harmless, video, html | 10 | from libmat2 import harmless, video, web |
| 11 | 11 | ||
| 12 | # No need to logging messages, should something go wrong, | 12 | # No need to logging messages, should something go wrong, |
| 13 | # the testsuite _will_ fail. | 13 | # the testsuite _will_ fail. |
| @@ -220,34 +220,34 @@ class TestCorruptedFiles(unittest.TestCase): | |||
| 220 | os.remove('./tests/data/--output.avi') | 220 | os.remove('./tests/data/--output.avi') |
| 221 | 221 | ||
| 222 | def test_zip(self): | 222 | def test_zip(self): |
| 223 | with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout: | 223 | with zipfile.ZipFile('./tests/data/clean.zip', 'w') as zout: |
| 224 | zout.write('./tests/data/dirty.flac') | 224 | zout.write('./tests/data/dirty.flac') |
| 225 | zout.write('./tests/data/dirty.docx') | 225 | zout.write('./tests/data/dirty.docx') |
| 226 | zout.write('./tests/data/dirty.jpg') | 226 | zout.write('./tests/data/dirty.jpg') |
| 227 | zout.write('./tests/data/embedded_corrupted.docx') | 227 | zout.write('./tests/data/embedded_corrupted.docx') |
| 228 | p, mimetype = parser_factory.get_parser('./tests/data/dirty.zip') | 228 | p, mimetype = parser_factory.get_parser('./tests/data/clean.zip') |
| 229 | self.assertEqual(mimetype, 'application/zip') | 229 | self.assertEqual(mimetype, 'application/zip') |
| 230 | meta = p.get_meta() | 230 | meta = p.get_meta() |
| 231 | self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !') | 231 | self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !') |
| 232 | self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') | 232 | self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') |
| 233 | self.assertFalse(p.remove_all()) | 233 | self.assertFalse(p.remove_all()) |
| 234 | os.remove('./tests/data/dirty.zip') | 234 | os.remove('./tests/data/clean.zip') |
| 235 | 235 | ||
| 236 | def test_html(self): | 236 | def test_html(self): |
| 237 | shutil.copy('./tests/data/dirty.html', './tests/data/clean.html') | 237 | shutil.copy('./tests/data/dirty.html', './tests/data/clean.html') |
| 238 | with open('./tests/data/clean.html', 'a') as f: | 238 | with open('./tests/data/clean.html', 'a') as f: |
| 239 | f.write('<open>but not</closed>') | 239 | f.write('<open>but not</closed>') |
| 240 | with self.assertRaises(ValueError): | 240 | with self.assertRaises(ValueError): |
| 241 | html.HTMLParser('./tests/data/clean.html') | 241 | web.HTMLParser('./tests/data/clean.html') |
| 242 | os.remove('./tests/data/clean.html') | 242 | os.remove('./tests/data/clean.html') |
| 243 | 243 | ||
| 244 | # Yes, we're able to deal with malformed html :/ | 244 | # Yes, we're able to deal with malformed html :/ |
| 245 | shutil.copy('./tests/data/dirty.html', './tests/data/clean.html') | 245 | shutil.copy('./tests/data/dirty.html', './tests/data/clean.html') |
| 246 | with open('./tests/data/clean.html', 'a') as f: | 246 | with open('./tests/data/clean.html', 'a') as f: |
| 247 | f.write('<meta name=\'this" is="weird"/>') | 247 | f.write('<meta name=\'this" is="weird"/>') |
| 248 | p = html.HTMLParser('./tests/data/clean.html') | 248 | p = web.HTMLParser('./tests/data/clean.html') |
| 249 | self.assertTrue(p.remove_all()) | 249 | self.assertTrue(p.remove_all()) |
| 250 | p = html.HTMLParser('./tests/data/clean.cleaned.html') | 250 | p = web.HTMLParser('./tests/data/clean.cleaned.html') |
| 251 | self.assertEqual(p.get_meta(), {}) | 251 | self.assertEqual(p.get_meta(), {}) |
| 252 | os.remove('./tests/data/clean.html') | 252 | os.remove('./tests/data/clean.html') |
| 253 | os.remove('./tests/data/clean.cleaned.html') | 253 | os.remove('./tests/data/clean.cleaned.html') |
| @@ -255,17 +255,38 @@ class TestCorruptedFiles(unittest.TestCase): | |||
| 255 | with open('./tests/data/clean.html', 'w') as f: | 255 | with open('./tests/data/clean.html', 'w') as f: |
| 256 | f.write('</close>') | 256 | f.write('</close>') |
| 257 | with self.assertRaises(ValueError): | 257 | with self.assertRaises(ValueError): |
| 258 | html.HTMLParser('./tests/data/clean.html') | 258 | web.HTMLParser('./tests/data/clean.html') |
| 259 | os.remove('./tests/data/clean.html') | 259 | os.remove('./tests/data/clean.html') |
| 260 | 260 | ||
| 261 | with open('./tests/data/clean.html', 'w') as f: | 261 | with open('./tests/data/clean.html', 'w') as f: |
| 262 | f.write('<notclosed>') | 262 | f.write('<notclosed>') |
| 263 | p = html.HTMLParser('./tests/data/clean.html') | 263 | p = web.HTMLParser('./tests/data/clean.html') |
| 264 | with self.assertRaises(ValueError): | 264 | with self.assertRaises(ValueError): |
| 265 | p.get_meta() | 265 | p.get_meta() |
| 266 | p = html.HTMLParser('./tests/data/clean.html') | 266 | p = web.HTMLParser('./tests/data/clean.html') |
| 267 | with self.assertRaises(ValueError): | 267 | with self.assertRaises(ValueError): |
| 268 | p.remove_all() | 268 | p.remove_all() |
| 269 | os.remove('./tests/data/clean.html') | 269 | os.remove('./tests/data/clean.html') |
| 270 | 270 | ||
| 271 | with open('./tests/data/clean.html', 'w') as f: | ||
| 272 | f.write('<doctitle><br/></doctitle><br/><notclosed>') | ||
| 273 | p = web.HTMLParser('./tests/data/clean.html') | ||
| 274 | with self.assertRaises(ValueError): | ||
| 275 | p.get_meta() | ||
| 276 | p = web.HTMLParser('./tests/data/clean.html') | ||
| 277 | with self.assertRaises(ValueError): | ||
| 278 | p.remove_all() | ||
| 279 | os.remove('./tests/data/clean.html') | ||
| 280 | |||
| 281 | def test_epub(self): | ||
| 282 | with zipfile.ZipFile('./tests/data/clean.epub', 'w') as zout: | ||
| 283 | zout.write('./tests/data/dirty.jpg', 'OEBPS/content.opf') | ||
| 284 | p, mimetype = parser_factory.get_parser('./tests/data/clean.epub') | ||
| 285 | self.assertEqual(mimetype, 'application/epub+zip') | ||
| 286 | meta = p.get_meta() | ||
| 287 | self.assertEqual(meta['OEBPS/content.opf']['OEBPS/content.opf'], | ||
| 288 | 'harmful content') | ||
| 289 | |||
| 290 | self.assertFalse(p.remove_all()) | ||
| 291 | os.remove('./tests/data/clean.epub') | ||
| 271 | 292 | ||
