summaryrefslogtreecommitdiff
path: root/tests/test_corrupted_files.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test_corrupted_files.py')
-rw-r--r--tests/test_corrupted_files.py41
1 files changed, 31 insertions, 10 deletions
diff --git a/tests/test_corrupted_files.py b/tests/test_corrupted_files.py
index 8728cb2..53c856a 100644
--- a/tests/test_corrupted_files.py
+++ b/tests/test_corrupted_files.py
@@ -7,7 +7,7 @@ import logging
7import zipfile 7import zipfile
8 8
9from libmat2 import pdf, images, audio, office, parser_factory, torrent 9from libmat2 import pdf, images, audio, office, parser_factory, torrent
10from libmat2 import harmless, video, html 10from libmat2 import harmless, video, web
11 11
12# No need to logging messages, should something go wrong, 12# No need to logging messages, should something go wrong,
13# the testsuite _will_ fail. 13# the testsuite _will_ fail.
@@ -220,34 +220,34 @@ class TestCorruptedFiles(unittest.TestCase):
220 os.remove('./tests/data/--output.avi') 220 os.remove('./tests/data/--output.avi')
221 221
222 def test_zip(self): 222 def test_zip(self):
223 with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout: 223 with zipfile.ZipFile('./tests/data/clean.zip', 'w') as zout:
224 zout.write('./tests/data/dirty.flac') 224 zout.write('./tests/data/dirty.flac')
225 zout.write('./tests/data/dirty.docx') 225 zout.write('./tests/data/dirty.docx')
226 zout.write('./tests/data/dirty.jpg') 226 zout.write('./tests/data/dirty.jpg')
227 zout.write('./tests/data/embedded_corrupted.docx') 227 zout.write('./tests/data/embedded_corrupted.docx')
228 p, mimetype = parser_factory.get_parser('./tests/data/dirty.zip') 228 p, mimetype = parser_factory.get_parser('./tests/data/clean.zip')
229 self.assertEqual(mimetype, 'application/zip') 229 self.assertEqual(mimetype, 'application/zip')
230 meta = p.get_meta() 230 meta = p.get_meta()
231 self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !') 231 self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !')
232 self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') 232 self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
233 self.assertFalse(p.remove_all()) 233 self.assertFalse(p.remove_all())
234 os.remove('./tests/data/dirty.zip') 234 os.remove('./tests/data/clean.zip')
235 235
236 def test_html(self): 236 def test_html(self):
237 shutil.copy('./tests/data/dirty.html', './tests/data/clean.html') 237 shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
238 with open('./tests/data/clean.html', 'a') as f: 238 with open('./tests/data/clean.html', 'a') as f:
239 f.write('<open>but not</closed>') 239 f.write('<open>but not</closed>')
240 with self.assertRaises(ValueError): 240 with self.assertRaises(ValueError):
241 html.HTMLParser('./tests/data/clean.html') 241 web.HTMLParser('./tests/data/clean.html')
242 os.remove('./tests/data/clean.html') 242 os.remove('./tests/data/clean.html')
243 243
244 # Yes, we're able to deal with malformed html :/ 244 # Yes, we're able to deal with malformed html :/
245 shutil.copy('./tests/data/dirty.html', './tests/data/clean.html') 245 shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
246 with open('./tests/data/clean.html', 'a') as f: 246 with open('./tests/data/clean.html', 'a') as f:
247 f.write('<meta name=\'this" is="weird"/>') 247 f.write('<meta name=\'this" is="weird"/>')
248 p = html.HTMLParser('./tests/data/clean.html') 248 p = web.HTMLParser('./tests/data/clean.html')
249 self.assertTrue(p.remove_all()) 249 self.assertTrue(p.remove_all())
250 p = html.HTMLParser('./tests/data/clean.cleaned.html') 250 p = web.HTMLParser('./tests/data/clean.cleaned.html')
251 self.assertEqual(p.get_meta(), {}) 251 self.assertEqual(p.get_meta(), {})
252 os.remove('./tests/data/clean.html') 252 os.remove('./tests/data/clean.html')
253 os.remove('./tests/data/clean.cleaned.html') 253 os.remove('./tests/data/clean.cleaned.html')
@@ -255,17 +255,38 @@ class TestCorruptedFiles(unittest.TestCase):
255 with open('./tests/data/clean.html', 'w') as f: 255 with open('./tests/data/clean.html', 'w') as f:
256 f.write('</close>') 256 f.write('</close>')
257 with self.assertRaises(ValueError): 257 with self.assertRaises(ValueError):
258 html.HTMLParser('./tests/data/clean.html') 258 web.HTMLParser('./tests/data/clean.html')
259 os.remove('./tests/data/clean.html') 259 os.remove('./tests/data/clean.html')
260 260
261 with open('./tests/data/clean.html', 'w') as f: 261 with open('./tests/data/clean.html', 'w') as f:
262 f.write('<notclosed>') 262 f.write('<notclosed>')
263 p = html.HTMLParser('./tests/data/clean.html') 263 p = web.HTMLParser('./tests/data/clean.html')
264 with self.assertRaises(ValueError): 264 with self.assertRaises(ValueError):
265 p.get_meta() 265 p.get_meta()
266 p = html.HTMLParser('./tests/data/clean.html') 266 p = web.HTMLParser('./tests/data/clean.html')
267 with self.assertRaises(ValueError): 267 with self.assertRaises(ValueError):
268 p.remove_all() 268 p.remove_all()
269 os.remove('./tests/data/clean.html') 269 os.remove('./tests/data/clean.html')
270 270
271 with open('./tests/data/clean.html', 'w') as f:
272 f.write('<doctitle><br/></doctitle><br/><notclosed>')
273 p = web.HTMLParser('./tests/data/clean.html')
274 with self.assertRaises(ValueError):
275 p.get_meta()
276 p = web.HTMLParser('./tests/data/clean.html')
277 with self.assertRaises(ValueError):
278 p.remove_all()
279 os.remove('./tests/data/clean.html')
280
281 def test_epub(self):
282 with zipfile.ZipFile('./tests/data/clean.epub', 'w') as zout:
283 zout.write('./tests/data/dirty.jpg', 'OEBPS/content.opf')
284 p, mimetype = parser_factory.get_parser('./tests/data/clean.epub')
285 self.assertEqual(mimetype, 'application/epub+zip')
286 meta = p.get_meta()
287 self.assertEqual(meta['OEBPS/content.opf']['OEBPS/content.opf'],
288 'harmful content')
289
290 self.assertFalse(p.remove_all())
291 os.remove('./tests/data/clean.epub')
271 292