diff options
| author | jvoisin | 2019-02-08 00:26:47 +0100 |
|---|---|---|
| committer | jvoisin | 2019-02-08 23:05:18 +0100 |
| commit | 6cc034e81bd0cea98dffe4d7311f3bd16178b63e (patch) | |
| tree | 319ec5a697a1a1c49089084c09b0a30cbd4983f7 /tests/test_corrupted_files.py | |
| parent | e1dd439fc86ba15816e2331e8bed67dd7147e368 (diff) | |
Add support for html files
Diffstat (limited to 'tests/test_corrupted_files.py')
| -rw-r--r-- | tests/test_corrupted_files.py | 39 |
1 files changed, 38 insertions, 1 deletions
diff --git a/tests/test_corrupted_files.py b/tests/test_corrupted_files.py index b2e7798..8728cb2 100644 --- a/tests/test_corrupted_files.py +++ b/tests/test_corrupted_files.py | |||
| @@ -7,7 +7,7 @@ import logging | |||
| 7 | import zipfile | 7 | import zipfile |
| 8 | 8 | ||
| 9 | from libmat2 import pdf, images, audio, office, parser_factory, torrent | 9 | from libmat2 import pdf, images, audio, office, parser_factory, torrent |
| 10 | from libmat2 import harmless, video | 10 | from libmat2 import harmless, video, html |
| 11 | 11 | ||
| 12 | # No need to logging messages, should something go wrong, | 12 | # No need to logging messages, should something go wrong, |
| 13 | # the testsuite _will_ fail. | 13 | # the testsuite _will_ fail. |
| @@ -232,3 +232,40 @@ class TestCorruptedFiles(unittest.TestCase): | |||
| 232 | self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') | 232 | self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') |
| 233 | self.assertFalse(p.remove_all()) | 233 | self.assertFalse(p.remove_all()) |
| 234 | os.remove('./tests/data/dirty.zip') | 234 | os.remove('./tests/data/dirty.zip') |
| 235 | |||
| 236 | def test_html(self): | ||
| 237 | shutil.copy('./tests/data/dirty.html', './tests/data/clean.html') | ||
| 238 | with open('./tests/data/clean.html', 'a') as f: | ||
| 239 | f.write('<open>but not</closed>') | ||
| 240 | with self.assertRaises(ValueError): | ||
| 241 | html.HTMLParser('./tests/data/clean.html') | ||
| 242 | os.remove('./tests/data/clean.html') | ||
| 243 | |||
| 244 | # Yes, we're able to deal with malformed html :/ | ||
| 245 | shutil.copy('./tests/data/dirty.html', './tests/data/clean.html') | ||
| 246 | with open('./tests/data/clean.html', 'a') as f: | ||
| 247 | f.write('<meta name=\'this" is="weird"/>') | ||
| 248 | p = html.HTMLParser('./tests/data/clean.html') | ||
| 249 | self.assertTrue(p.remove_all()) | ||
| 250 | p = html.HTMLParser('./tests/data/clean.cleaned.html') | ||
| 251 | self.assertEqual(p.get_meta(), {}) | ||
| 252 | os.remove('./tests/data/clean.html') | ||
| 253 | os.remove('./tests/data/clean.cleaned.html') | ||
| 254 | |||
| 255 | with open('./tests/data/clean.html', 'w') as f: | ||
| 256 | f.write('</close>') | ||
| 257 | with self.assertRaises(ValueError): | ||
| 258 | html.HTMLParser('./tests/data/clean.html') | ||
| 259 | os.remove('./tests/data/clean.html') | ||
| 260 | |||
| 261 | with open('./tests/data/clean.html', 'w') as f: | ||
| 262 | f.write('<notclosed>') | ||
| 263 | p = html.HTMLParser('./tests/data/clean.html') | ||
| 264 | with self.assertRaises(ValueError): | ||
| 265 | p.get_meta() | ||
| 266 | p = html.HTMLParser('./tests/data/clean.html') | ||
| 267 | with self.assertRaises(ValueError): | ||
| 268 | p.remove_all() | ||
| 269 | os.remove('./tests/data/clean.html') | ||
| 270 | |||
| 271 | |||
