summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
Diffstat (limited to 'tests')
-rw-r--r--tests/data/dirty.html14
-rw-r--r--tests/test_corrupted_files.py39
-rw-r--r--tests/test_libmat2.py20
3 files changed, 71 insertions, 2 deletions
diff --git a/tests/data/dirty.html b/tests/data/dirty.html
new file mode 100644
index 0000000..1aa1723
--- /dev/null
+++ b/tests/data/dirty.html
@@ -0,0 +1,14 @@
1<html>
2 <head>
3 <meta content="vim" name="generator"/>
4 <meta content="jvoisin" name="author"/>
5</head>
6<body>
7 <p>
8 <h1>Hello</h1>
9 I am a web page.
10 Please <b>love</b> me.
11 Here, have a pretty picture: <img src='dirty.jpg' alt='a pretty picture'/>
12 </p>
13</body>
14</html>
diff --git a/tests/test_corrupted_files.py b/tests/test_corrupted_files.py
index b2e7798..8728cb2 100644
--- a/tests/test_corrupted_files.py
+++ b/tests/test_corrupted_files.py
@@ -7,7 +7,7 @@ import logging
7import zipfile 7import zipfile
8 8
9from libmat2 import pdf, images, audio, office, parser_factory, torrent 9from libmat2 import pdf, images, audio, office, parser_factory, torrent
10from libmat2 import harmless, video 10from libmat2 import harmless, video, html
11 11
12# No need to logging messages, should something go wrong, 12# No need to logging messages, should something go wrong,
13# the testsuite _will_ fail. 13# the testsuite _will_ fail.
@@ -232,3 +232,40 @@ class TestCorruptedFiles(unittest.TestCase):
232 self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') 232 self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
233 self.assertFalse(p.remove_all()) 233 self.assertFalse(p.remove_all())
234 os.remove('./tests/data/dirty.zip') 234 os.remove('./tests/data/dirty.zip')
235
236 def test_html(self):
237 shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
238 with open('./tests/data/clean.html', 'a') as f:
239 f.write('<open>but not</closed>')
240 with self.assertRaises(ValueError):
241 html.HTMLParser('./tests/data/clean.html')
242 os.remove('./tests/data/clean.html')
243
244 # Yes, we're able to deal with malformed html :/
245 shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
246 with open('./tests/data/clean.html', 'a') as f:
247 f.write('<meta name=\'this" is="weird"/>')
248 p = html.HTMLParser('./tests/data/clean.html')
249 self.assertTrue(p.remove_all())
250 p = html.HTMLParser('./tests/data/clean.cleaned.html')
251 self.assertEqual(p.get_meta(), {})
252 os.remove('./tests/data/clean.html')
253 os.remove('./tests/data/clean.cleaned.html')
254
255 with open('./tests/data/clean.html', 'w') as f:
256 f.write('</close>')
257 with self.assertRaises(ValueError):
258 html.HTMLParser('./tests/data/clean.html')
259 os.remove('./tests/data/clean.html')
260
261 with open('./tests/data/clean.html', 'w') as f:
262 f.write('<notclosed>')
263 p = html.HTMLParser('./tests/data/clean.html')
264 with self.assertRaises(ValueError):
265 p.get_meta()
266 p = html.HTMLParser('./tests/data/clean.html')
267 with self.assertRaises(ValueError):
268 p.remove_all()
269 os.remove('./tests/data/clean.html')
270
271
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py
index 548b076..8753e09 100644
--- a/tests/test_libmat2.py
+++ b/tests/test_libmat2.py
@@ -6,7 +6,7 @@ import os
6import zipfile 6import zipfile
7 7
8from libmat2 import pdf, images, audio, office, parser_factory, torrent, harmless 8from libmat2 import pdf, images, audio, office, parser_factory, torrent, harmless
9from libmat2 import check_dependencies, video, archive 9from libmat2 import check_dependencies, video, archive, html
10 10
11 11
12class TestCheckDependencies(unittest.TestCase): 12class TestCheckDependencies(unittest.TestCase):
@@ -596,3 +596,21 @@ class TestCleaning(unittest.TestCase):
596 os.remove('./tests/data/clean.gif') 596 os.remove('./tests/data/clean.gif')
597 os.remove('./tests/data/clean.cleaned.gif') 597 os.remove('./tests/data/clean.cleaned.gif')
598 os.remove('./tests/data/clean.cleaned.cleaned.gif') 598 os.remove('./tests/data/clean.cleaned.cleaned.gif')
599
600 def test_html(self):
601 shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
602 p = html.HTMLParser('./tests/data/clean.html')
603
604 meta = p.get_meta()
605 self.assertEqual(meta['author'], 'jvoisin')
606
607 ret = p.remove_all()
608 self.assertTrue(ret)
609
610 p = html.HTMLParser('./tests/data/clean.cleaned.html')
611 self.assertEqual(p.get_meta(), {})
612 self.assertTrue(p.remove_all())
613
614 os.remove('./tests/data/clean.html')
615 os.remove('./tests/data/clean.cleaned.html')
616 os.remove('./tests/data/clean.cleaned.cleaned.html')