summaryrefslogtreecommitdiff
path: root/tests/test_corrupted_files.py
diff options
context:
space:
mode:
authorjvoisin2019-02-08 00:26:47 +0100
committerjvoisin2019-02-08 23:05:18 +0100
commit6cc034e81bd0cea98dffe4d7311f3bd16178b63e (patch)
tree319ec5a697a1a1c49089084c09b0a30cbd4983f7 /tests/test_corrupted_files.py
parente1dd439fc86ba15816e2331e8bed67dd7147e368 (diff)
Add support for html files
Diffstat (limited to '')
-rw-r--r--tests/test_corrupted_files.py39
1 files changed, 38 insertions, 1 deletions
diff --git a/tests/test_corrupted_files.py b/tests/test_corrupted_files.py
index b2e7798..8728cb2 100644
--- a/tests/test_corrupted_files.py
+++ b/tests/test_corrupted_files.py
@@ -7,7 +7,7 @@ import logging
7import zipfile 7import zipfile
8 8
9from libmat2 import pdf, images, audio, office, parser_factory, torrent 9from libmat2 import pdf, images, audio, office, parser_factory, torrent
10from libmat2 import harmless, video 10from libmat2 import harmless, video, html
11 11
12# No need to logging messages, should something go wrong, 12# No need to logging messages, should something go wrong,
13# the testsuite _will_ fail. 13# the testsuite _will_ fail.
@@ -232,3 +232,40 @@ class TestCorruptedFiles(unittest.TestCase):
232 self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') 232 self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
233 self.assertFalse(p.remove_all()) 233 self.assertFalse(p.remove_all())
234 os.remove('./tests/data/dirty.zip') 234 os.remove('./tests/data/dirty.zip')
235
236 def test_html(self):
237 shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
238 with open('./tests/data/clean.html', 'a') as f:
239 f.write('<open>but not</closed>')
240 with self.assertRaises(ValueError):
241 html.HTMLParser('./tests/data/clean.html')
242 os.remove('./tests/data/clean.html')
243
244 # Yes, we're able to deal with malformed html :/
245 shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
246 with open('./tests/data/clean.html', 'a') as f:
247 f.write('<meta name=\'this" is="weird"/>')
248 p = html.HTMLParser('./tests/data/clean.html')
249 self.assertTrue(p.remove_all())
250 p = html.HTMLParser('./tests/data/clean.cleaned.html')
251 self.assertEqual(p.get_meta(), {})
252 os.remove('./tests/data/clean.html')
253 os.remove('./tests/data/clean.cleaned.html')
254
255 with open('./tests/data/clean.html', 'w') as f:
256 f.write('</close>')
257 with self.assertRaises(ValueError):
258 html.HTMLParser('./tests/data/clean.html')
259 os.remove('./tests/data/clean.html')
260
261 with open('./tests/data/clean.html', 'w') as f:
262 f.write('<notclosed>')
263 p = html.HTMLParser('./tests/data/clean.html')
264 with self.assertRaises(ValueError):
265 p.get_meta()
266 p = html.HTMLParser('./tests/data/clean.html')
267 with self.assertRaises(ValueError):
268 p.remove_all()
269 os.remove('./tests/data/clean.html')
270
271