summaryrefslogtreecommitdiff
path: root/tests/test_libmat2.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test_libmat2.py')
-rw-r--r--tests/test_libmat2.py631
1 files changed, 218 insertions, 413 deletions
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py
index d596ff2..13d861d 100644
--- a/tests/test_libmat2.py
+++ b/tests/test_libmat2.py
@@ -120,7 +120,6 @@ class TestGetMeta(unittest.TestCase):
120 self.assertEqual(meta['4'], '# And an other one') 120 self.assertEqual(meta['4'], '# And an other one')
121 self.assertEqual(meta['6'], '# and a final one here') 121 self.assertEqual(meta['6'], '# and a final one here')
122 122
123
124 def test_tiff(self): 123 def test_tiff(self):
125 p = images.TiffParser('./tests/data/dirty.tiff') 124 p = images.TiffParser('./tests/data/dirty.tiff')
126 meta = p.get_meta() 125 meta = p.get_meta()
@@ -222,6 +221,10 @@ class TestGetMeta(unittest.TestCase):
222 self.assertEqual(meta['./tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') 221 self.assertEqual(meta['./tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
223 os.remove('./tests/data/dirty.tar') 222 os.remove('./tests/data/dirty.tar')
224 223
224 def test_svg(self):
225 p = images.SVGParser('./tests/data/weird.svg')
226 self.assertEqual(p.get_meta()['Xmlns'], 'http://www.w3.org/1337/svg')
227
225 228
226class TestRemovingThumbnails(unittest.TestCase): 229class TestRemovingThumbnails(unittest.TestCase):
227 def test_odt(self): 230 def test_odt(self):
@@ -281,367 +284,215 @@ class TestRevisionsCleaning(unittest.TestCase):
281 os.remove('./tests/data/revision_clean.docx') 284 os.remove('./tests/data/revision_clean.docx')
282 os.remove('./tests/data/revision_clean.cleaned.docx') 285 os.remove('./tests/data/revision_clean.cleaned.docx')
283 286
284class TestCleaning(unittest.TestCase):
285 def test_pdf(self):
286 shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')
287 p = pdf.PDFParser('./tests/data/clean.pdf')
288
289 meta = p.get_meta()
290 self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
291
292 ret = p.remove_all()
293 self.assertTrue(ret)
294
295 p = pdf.PDFParser('./tests/data/clean.cleaned.pdf')
296 expected_meta = {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1}
297 self.assertEqual(p.get_meta(), expected_meta)
298 self.assertTrue(p.remove_all())
299
300 os.remove('./tests/data/clean.pdf')
301 os.remove('./tests/data/clean.cleaned.pdf')
302 os.remove('./tests/data/clean.cleaned.cleaned.pdf')
303
304 def test_png(self):
305 shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
306 p = images.PNGParser('./tests/data/clean.png')
307
308 meta = p.get_meta()
309 self.assertEqual(meta['Comment'], 'This is a comment, be careful!')
310
311 ret = p.remove_all()
312 self.assertTrue(ret)
313
314 p = images.PNGParser('./tests/data/clean.cleaned.png')
315 self.assertEqual(p.get_meta(), {})
316 self.assertTrue(p.remove_all())
317
318 os.remove('./tests/data/clean.png')
319 os.remove('./tests/data/clean.cleaned.png')
320 os.remove('./tests/data/clean.cleaned.cleaned.png')
321
322 def test_jpg(self):
323 shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
324 p = images.JPGParser('./tests/data/clean.jpg')
325
326 meta = p.get_meta()
327 self.assertEqual(meta['Comment'], 'Created with GIMP')
328
329 ret = p.remove_all()
330 self.assertTrue(ret)
331
332 p = images.JPGParser('./tests/data/clean.cleaned.jpg')
333 self.assertEqual(p.get_meta(), {})
334 self.assertTrue(p.remove_all())
335
336 os.remove('./tests/data/clean.jpg')
337 os.remove('./tests/data/clean.cleaned.jpg')
338 os.remove('./tests/data/clean.cleaned.cleaned.jpg')
339
340 def test_mp3(self):
341 shutil.copy('./tests/data/dirty.mp3', './tests/data/clean.mp3')
342 p = audio.MP3Parser('./tests/data/clean.mp3')
343
344 meta = p.get_meta()
345 self.assertEqual(meta['TXXX:I am a'], 'various comment')
346
347 ret = p.remove_all()
348 self.assertTrue(ret)
349
350 p = audio.MP3Parser('./tests/data/clean.cleaned.mp3')
351 self.assertEqual(p.get_meta(), {})
352 self.assertTrue(p.remove_all())
353
354 os.remove('./tests/data/clean.mp3')
355 os.remove('./tests/data/clean.cleaned.mp3')
356 os.remove('./tests/data/clean.cleaned.cleaned.mp3')
357
358 def test_ogg(self):
359 shutil.copy('./tests/data/dirty.ogg', './tests/data/clean.ogg')
360 p = audio.OGGParser('./tests/data/clean.ogg')
361
362 meta = p.get_meta()
363 self.assertEqual(meta['title'], 'I am so')
364
365 ret = p.remove_all()
366 self.assertTrue(ret)
367
368 p = audio.OGGParser('./tests/data/clean.cleaned.ogg')
369 self.assertEqual(p.get_meta(), {})
370 self.assertTrue(p.remove_all())
371
372 os.remove('./tests/data/clean.ogg')
373 os.remove('./tests/data/clean.cleaned.ogg')
374 os.remove('./tests/data/clean.cleaned.cleaned.ogg')
375
376 def test_flac(self):
377 shutil.copy('./tests/data/dirty.flac', './tests/data/clean.flac')
378 p = audio.FLACParser('./tests/data/clean.flac')
379
380 meta = p.get_meta()
381 self.assertEqual(meta['title'], 'I am so')
382
383 ret = p.remove_all()
384 self.assertTrue(ret)
385
386 p = audio.FLACParser('./tests/data/clean.cleaned.flac')
387 self.assertEqual(p.get_meta(), {})
388 self.assertTrue(p.remove_all())
389
390 os.remove('./tests/data/clean.flac')
391 os.remove('./tests/data/clean.cleaned.flac')
392 os.remove('./tests/data/clean.cleaned.cleaned.flac')
393
394 def test_office(self):
395 shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx')
396 p = office.MSOfficeParser('./tests/data/clean.docx')
397
398 meta = p.get_meta()
399 self.assertIsNotNone(meta)
400
401 ret = p.remove_all()
402 self.assertTrue(ret)
403
404 p = office.MSOfficeParser('./tests/data/clean.cleaned.docx')
405 self.assertEqual(p.get_meta(), {})
406 self.assertTrue(p.remove_all())
407
408 os.remove('./tests/data/clean.docx')
409 os.remove('./tests/data/clean.cleaned.docx')
410 os.remove('./tests/data/clean.cleaned.cleaned.docx')
411
412 def test_libreoffice(self):
413 shutil.copy('./tests/data/dirty.odt', './tests/data/clean.odt')
414 p = office.LibreOfficeParser('./tests/data/clean.odt')
415
416 meta = p.get_meta()
417 self.assertIsNotNone(meta)
418
419 ret = p.remove_all()
420 self.assertTrue(ret)
421
422 p = office.LibreOfficeParser('./tests/data/clean.cleaned.odt')
423 self.assertEqual(p.get_meta(), {})
424 self.assertTrue(p.remove_all())
425
426 os.remove('./tests/data/clean.odt')
427 os.remove('./tests/data/clean.cleaned.odt')
428 os.remove('./tests/data/clean.cleaned.cleaned.odt')
429
430 def test_tiff(self):
431 shutil.copy('./tests/data/dirty.tiff', './tests/data/clean.tiff')
432 p = images.TiffParser('./tests/data/clean.tiff')
433
434 meta = p.get_meta()
435 self.assertEqual(meta['Model'], 'C7070WZ')
436
437 ret = p.remove_all()
438 self.assertTrue(ret)
439
440 p = images.TiffParser('./tests/data/clean.cleaned.tiff')
441 self.assertEqual(p.get_meta(), {})
442 self.assertTrue(p.remove_all())
443
444 os.remove('./tests/data/clean.tiff')
445 os.remove('./tests/data/clean.cleaned.tiff')
446 os.remove('./tests/data/clean.cleaned.cleaned.tiff')
447
448 def test_bmp(self):
449 shutil.copy('./tests/data/dirty.bmp', './tests/data/clean.bmp')
450 p = harmless.HarmlessParser('./tests/data/clean.bmp')
451
452 meta = p.get_meta()
453 self.assertEqual(meta, {}) # bmp has no meta :)
454
455 ret = p.remove_all()
456 self.assertTrue(ret)
457
458 p = harmless.HarmlessParser('./tests/data/clean.cleaned.bmp')
459 self.assertEqual(p.get_meta(), {})
460 self.assertTrue(p.remove_all())
461
462 os.remove('./tests/data/clean.bmp')
463 os.remove('./tests/data/clean.cleaned.bmp')
464 os.remove('./tests/data/clean.cleaned.cleaned.bmp')
465
466 def test_torrent(self):
467 shutil.copy('./tests/data/dirty.torrent', './tests/data/clean.torrent')
468 p = torrent.TorrentParser('./tests/data/clean.torrent')
469
470 meta = p.get_meta()
471 self.assertEqual(meta, {'created by': b'mktorrent 1.0', 'creation date': 1522397702})
472
473 ret = p.remove_all()
474 self.assertTrue(ret)
475
476 p = torrent.TorrentParser('./tests/data/clean.cleaned.torrent')
477 self.assertEqual(p.get_meta(), {})
478 self.assertTrue(p.remove_all())
479
480 os.remove('./tests/data/clean.torrent')
481 os.remove('./tests/data/clean.cleaned.torrent')
482 os.remove('./tests/data/clean.cleaned.cleaned.torrent')
483
484 def test_odf(self):
485 shutil.copy('./tests/data/dirty.odf', './tests/data/clean.odf')
486 p = office.LibreOfficeParser('./tests/data/clean.odf')
487
488 meta = p.get_meta()
489 self.assertEqual(meta['meta.xml']['meta:creation-date'], '2018-04-23T00:18:59.438231281')
490
491 ret = p.remove_all()
492 self.assertTrue(ret)
493 287
494 p = office.LibreOfficeParser('./tests/data/clean.cleaned.odf') 288class TestCleaning(unittest.TestCase):
495 self.assertEqual(p.get_meta(), {}) 289 data = [{
496 self.assertTrue(p.remove_all()) 290 'name': 'pdf',
497 291 'parser': pdf.PDFParser,
498 os.remove('./tests/data/clean.odf') 292 'meta': {'producer': 'pdfTeX-1.40.14'},
499 os.remove('./tests/data/clean.cleaned.odf') 293 'expected_meta': {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1},
500 os.remove('./tests/data/clean.cleaned.cleaned.odf') 294 }, {
501 295 'name': 'png',
502 def test_odg(self): 296 'parser': images.PNGParser,
503 shutil.copy('./tests/data/dirty.odg', './tests/data/clean.odg') 297 'meta': {'Comment': 'This is a comment, be careful!'},
504 p = office.LibreOfficeParser('./tests/data/clean.odg') 298 'expected_meta': {},
505 299 }, {
506 meta = p.get_meta() 300 'name': 'jpg',
507 self.assertEqual(meta['meta.xml']['dc:date'], '2018-04-23T00:26:59.385838550') 301 'parser': images.JPGParser,
508 302 'meta': {'Comment': 'Created with GIMP'},
509 ret = p.remove_all() 303 'expected_meta': {},
510 self.assertTrue(ret) 304 }, {
511 305 'name': 'mp3',
512 p = office.LibreOfficeParser('./tests/data/clean.cleaned.odg') 306 'parser': audio.MP3Parser,
513 self.assertEqual(p.get_meta(), {}) 307 'meta': {'TXXX:I am a': 'various comment'},
514 self.assertTrue(p.remove_all()) 308 'expected_meta': {},
515 309 }, {
516 os.remove('./tests/data/clean.odg') 310 'name': 'ogg',
517 os.remove('./tests/data/clean.cleaned.odg') 311 'parser': audio.OGGParser,
518 os.remove('./tests/data/clean.cleaned.cleaned.odg') 312 'meta': {'title': 'I am so'},
519 313 'expected_meta': {},
520 def test_txt(self): 314 }, {
521 shutil.copy('./tests/data/dirty.txt', './tests/data/clean.txt') 315 'name': 'flac',
522 p = harmless.HarmlessParser('./tests/data/clean.txt') 316 'parser': audio.FLACParser,
523 317 'meta': {'title': 'I am so'},
524 meta = p.get_meta() 318 'expected_meta': {},
525 self.assertEqual(meta, {}) 319 }, {
526 320 'name': 'docx',
527 ret = p.remove_all() 321 'parser': office.MSOfficeParser,
528 self.assertTrue(ret) 322 'meta': {'word/media/image1.png' :
529 323 {'Comment': 'This is a comment, be careful!',
530 p = harmless.HarmlessParser('./tests/data/clean.cleaned.txt') 324 'ModifyDate': '2018:03:20 21:59:25',
531 self.assertEqual(p.get_meta(), {}) 325 'PixelUnits': 'meters',
532 self.assertTrue(p.remove_all()) 326 'PixelsPerUnitX': 2835,
533 327 'PixelsPerUnitY': 2835,
534 os.remove('./tests/data/clean.txt') 328 'create_system': 'Weird',
535 os.remove('./tests/data/clean.cleaned.txt') 329 'date_time': '2018-03-31 13:15:38'} ,
536 os.remove('./tests/data/clean.cleaned.cleaned.txt') 330 },
537 331 'expected_meta': {},
538 def test_avi(self): 332 }, {
539 try: 333 'name': 'odt',
540 video._get_ffmpeg_path() 334 'parser': office.LibreOfficeParser,
541 except RuntimeError: 335 'meta': {
542 raise unittest.SkipTest 336 'Pictures/1000000000000032000000311EC5314D.png': {
543 337 'create_system': 'Weird',
544 shutil.copy('./tests/data/dirty.avi', './tests/data/clean.avi') 338 'date_time': '2011-07-26 02:40:16',
545 p = video.AVIParser('./tests/data/clean.avi') 339 'PixelsPerUnitX': 4847,
546 340 'PixelsPerUnitY': 4760,
547 meta = p.get_meta() 341 'PixelUnits': 'meters',
548 self.assertEqual(meta['Software'], 'MEncoder SVN-r33148-4.0.1') 342 },
549 343 },
550 ret = p.remove_all() 344 'expected_meta': {},
551 self.assertTrue(ret) 345 },{
552 346 'name': 'tiff',
553 p = video.AVIParser('./tests/data/clean.cleaned.avi') 347 'parser': images.TiffParser,
554 self.assertEqual(p.get_meta(), {}) 348 'meta': {'Model': 'C7070WZ'},
555 self.assertTrue(p.remove_all()) 349 'expected_meta':
556 350 {'Orientation': 'Horizontal (normal)',
557 os.remove('./tests/data/clean.avi') 351 'ResolutionUnit': 'inches',
558 os.remove('./tests/data/clean.cleaned.avi') 352 'XResolution': 72,
559 os.remove('./tests/data/clean.cleaned.cleaned.avi') 353 'YResolution': 72}
560 354 },{
561 def test_zip(self): 355 'name': 'bmp',
562 with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout: 356 'parser': harmless.HarmlessParser,
563 zout.write('./tests/data/dirty.flac') 357 'meta': {},
564 zout.write('./tests/data/dirty.docx') 358 'expected_meta': {},
565 zout.write('./tests/data/dirty.jpg') 359 },{
566 p = archive.ZipParser('./tests/data/dirty.zip') 360 'name': 'torrent',
567 meta = p.get_meta() 361 'parser': torrent.TorrentParser,
568 self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') 362 'meta': {'created by': b'mktorrent 1.0', 'creation date': 1522397702},
569 363 'expected_meta': {},
570 ret = p.remove_all() 364 }, {
571 self.assertTrue(ret) 365 'name': 'odf',
572 366 'parser': office.LibreOfficeParser,
573 p = archive.ZipParser('./tests/data/dirty.cleaned.zip') 367 'meta': {'meta.xml': {'create_system': 'Weird', 'date_time':
574 self.assertEqual(p.get_meta(), {}) 368 '2018-04-22 22:20:24', 'meta:initial-creator': 'Julien Voisin',
575 self.assertTrue(p.remove_all()) 369 'meta:creation-date': '2018-04-23T00:18:59.438231281',
576 370 'dc:date': '2018-04-23T00:20:23.978564933', 'dc:creator':
577 os.remove('./tests/data/dirty.zip') 371 'Julien Voisin', 'meta:editing-duration': 'PT1M24S',
578 os.remove('./tests/data/dirty.cleaned.zip') 372 'meta:editing-cycles': '1', 'meta:generator':
579 os.remove('./tests/data/dirty.cleaned.cleaned.zip') 373 'LibreOffice/5.4.6.2$Linux_X86_64 LibreOffice_project/40m0$Build-2'}},
580 374 'expected_meta': {},
581 375 }, {
582 def test_mp4(self): 376 'name': 'odg',
583 try: 377 'parser': office.LibreOfficeParser,
584 video._get_ffmpeg_path() 378 'meta': {'meta.xml': {'create_system': 'Weird', 'date_time':
585 except RuntimeError: 379 '2018-04-22 22:26:58', 'meta:initial-creator': 'Julien Voisin',
586 raise unittest.SkipTest 380 'meta:creation-date': '2018-04-23T00:25:59.953271949',
587 381 'dc:date': '2018-04-23T00:26:59.385838550', 'dc:creator':
588 shutil.copy('./tests/data/dirty.mp4', './tests/data/clean.mp4') 382 'Julien Voisin', 'meta:editing-duration': 'PT59S',
589 p = video.MP4Parser('./tests/data/clean.mp4') 383 'meta:editing-cycles': '1', 'meta:generator':
590 384 'LibreOffice/5.4.6.2$Linux_X86_64 LibreOffice_project/40m0$Build-2'}},
591 meta = p.get_meta() 385 'expected_meta': {},
592 self.assertEqual(meta['Encoder'], 'HandBrake 0.9.4 2009112300') 386 }, {
593 387 'name': 'txt',
594 ret = p.remove_all() 388 'parser': harmless.HarmlessParser,
595 self.assertTrue(ret) 389 'meta': {},
596 390 'expected_meta': {},
597 p = video.MP4Parser('./tests/data/clean.cleaned.mp4') 391 },{
598 self.assertNotIn('Encoder', p.get_meta()) 392 'name': 'gif',
599 self.assertTrue(p.remove_all()) 393 'parser': images.GIFParser,
600 394 'meta': {'Comment': 'this is a test comment'},
601 os.remove('./tests/data/clean.mp4') 395 'expected_meta': {},
602 os.remove('./tests/data/clean.cleaned.mp4') 396 },{
603 os.remove('./tests/data/clean.cleaned.cleaned.mp4') 397 'name': 'css',
604 398 'parser': web.CSSParser,
605 def test_wmv(self): 399 'meta': {
606 try: 400 'harmful data': 'underline is cool',
607 video._get_ffmpeg_path() 401 'version': '1.0',
608 except RuntimeError: 402 'author': 'jvoisin'
609 raise unittest.SkipTest 403 },
610 404 'expected_meta': {},
611 shutil.copy('./tests/data/dirty.wmv', './tests/data/clean.wmv') 405 },{
612 p = video.WMVParser('./tests/data/clean.wmv') 406 'name': 'svg',
613 407 'parser': images.SVGParser,
614 meta = p.get_meta() 408 'meta': {
615 self.assertEqual(meta['EncodingSettings'], 'Lavf52.103.0') 409 'WorkDescription': "This is a test svg image for mat2's testsuite",
616 410 },
617 ret = p.remove_all() 411 'expected_meta': {},
618 self.assertTrue(ret) 412 } ,{
413 'name': 'ppm',
414 'parser': images.PPMParser,
415 'meta': {
416 '1': '# A metadata',
417 },
418 'expected_meta': {},
419 } ,{
420 'name': 'avi',
421 'ffmpeg': 1,
422 'parser': video.AVIParser,
423 'meta': {
424 'Software': 'MEncoder SVN-r33148-4.0.1',
425 },
426 'expected_meta': {},
427 } ,{
428 'name': 'mp4',
429 'ffmpeg': 1,
430 'parser': video.MP4Parser,
431 'meta': {
432 'Encoder': 'HandBrake 0.9.4 2009112300',
433 },
434 'expected_meta':
435 {'CompatibleBrands': ['isom', 'iso2', 'avc1', 'mp41'],
436 'CompressorID': 'avc1',
437 'GraphicsMode': 'srcCopy',
438 'HandlerDescription': 'SoundHandler',
439 'HandlerType': 'Metadata',
440 'HandlerVendorID': 'Apple',
441 'MajorBrand': 'MP4 Base Media v1 [IS0 14496-12:2003]',
442 'MediaHeaderVersion': 0,
443 'MinorVersion': '0.2.0',
444 'MovieDataOffset': 48,
445 'MovieHeaderVersion': 0,
446 'NextTrackID': 3,
447 'PreferredRate': 1,
448 'Rotation': 0,
449 'TimeScale': 1000,
450 'TrackHeaderVersion': 0,
451 'TrackID': 1,
452 'TrackLayer': 0},
453 },{
454 'name': 'wmv',
455 'ffmpeg': 1,
456 'parser': video.WMVParser,
457 'meta': {
458 'EncodingSettings': 'Lavf52.103.0',
459 },
460 'expected_meta': {},
461 }
462 ]
619 463
620 p = video.WMVParser('./tests/data/clean.cleaned.wmv') 464 def test_all_parametred(self):
621 self.assertNotIn('EncodingSettings', p.get_meta()) 465 for case in self.data:
622 self.assertTrue(p.remove_all()) 466 if 'ffmpeg' in case:
467 try:
468 video._get_ffmpeg_path()
469 except RuntimeError:
470 raise unittest.SkipTest
623 471
624 os.remove('./tests/data/clean.wmv') 472 print('[+] Testing %s' % case['name'])
625 os.remove('./tests/data/clean.cleaned.wmv') 473 target = './tests/data/clean.' + case['name']
626 os.remove('./tests/data/clean.cleaned.cleaned.wmv') 474 shutil.copy('./tests/data/dirty.' + case['name'], target)
475 p1 = case['parser'](target)
627 476
628 def test_gif(self): 477 meta = p1.get_meta()
629 shutil.copy('./tests/data/dirty.gif', './tests/data/clean.gif') 478 for k, v in case['meta'].items():
630 p = images.GIFParser('./tests/data/clean.gif') 479 if isinstance(v, dict):
480 for _k, _v in v.items():
481 self.assertEqual(meta[k][_k], _v)
482 else:
483 self.assertEqual(meta[k], v)
631 484
632 meta = p.get_meta() 485 p1.lightweight_cleaning = True
633 self.assertEqual(meta['Comment'], 'this is a test comment') 486 self.assertTrue(p1.remove_all())
634 487
635 ret = p.remove_all() 488 p2 = case['parser'](p1.output_filename)
636 self.assertTrue(ret) 489 self.assertEqual(p2.get_meta(), case['expected_meta'])
490 self.assertTrue(p2.remove_all())
637 491
638 p = images.GIFParser('./tests/data/clean.cleaned.gif') 492 os.remove(target)
639 self.assertNotIn('EncodingSettings', p.get_meta()) 493 os.remove(p1.output_filename)
640 self.assertTrue(p.remove_all()) 494 os.remove(p2.output_filename)
641 495
642 os.remove('./tests/data/clean.gif')
643 os.remove('./tests/data/clean.cleaned.gif')
644 os.remove('./tests/data/clean.cleaned.cleaned.gif')
645 496
646 def test_html(self): 497 def test_html(self):
647 shutil.copy('./tests/data/dirty.html', './tests/data/clean.html') 498 shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
@@ -688,7 +539,6 @@ class TestCleaning(unittest.TestCase):
688 os.remove('./tests/data/clean.html') 539 os.remove('./tests/data/clean.html')
689 os.remove('./tests/data/clean.cleaned.html') 540 os.remove('./tests/data/clean.cleaned.html')
690 541
691
692 def test_epub(self): 542 def test_epub(self):
693 shutil.copy('./tests/data/dirty.epub', './tests/data/clean.epub') 543 shutil.copy('./tests/data/dirty.epub', './tests/data/clean.epub')
694 p = epub.EPUBParser('./tests/data/clean.epub') 544 p = epub.EPUBParser('./tests/data/clean.epub')
@@ -711,25 +561,26 @@ class TestCleaning(unittest.TestCase):
711 os.remove('./tests/data/clean.cleaned.cleaned.epub') 561 os.remove('./tests/data/clean.cleaned.cleaned.epub')
712 562
713 563
714 def test_css(self): 564class TestCleaningArchives(unittest.TestCase):
715 shutil.copy('./tests/data/dirty.css', './tests/data/clean.css') 565 def test_zip(self):
716 p = web.CSSParser('./tests/data/clean.css') 566 with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
717 567 zout.write('./tests/data/dirty.flac')
718 self.assertEqual(p.get_meta(), { 568 zout.write('./tests/data/dirty.docx')
719 'harmful data': 'underline is cool', 569 zout.write('./tests/data/dirty.jpg')
720 'version': '1.0', 570 p = archive.ZipParser('./tests/data/dirty.zip')
721 'author': 'jvoisin'}) 571 meta = p.get_meta()
572 self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
722 573
723 ret = p.remove_all() 574 ret = p.remove_all()
724 self.assertTrue(ret) 575 self.assertTrue(ret)
725 576
726 p = web.CSSParser('./tests/data/clean.cleaned.css') 577 p = archive.ZipParser('./tests/data/dirty.cleaned.zip')
727 self.assertEqual(p.get_meta(), {}) 578 self.assertEqual(p.get_meta(), {})
728 self.assertTrue(p.remove_all()) 579 self.assertTrue(p.remove_all())
729 580
730 os.remove('./tests/data/clean.css') 581 os.remove('./tests/data/dirty.zip')
731 os.remove('./tests/data/clean.cleaned.css') 582 os.remove('./tests/data/dirty.cleaned.zip')
732 os.remove('./tests/data/clean.cleaned.cleaned.css') 583 os.remove('./tests/data/dirty.cleaned.cleaned.zip')
733 584
734 def test_tar(self): 585 def test_tar(self):
735 with tarfile.TarFile.open('./tests/data/dirty.tar', 'w') as zout: 586 with tarfile.TarFile.open('./tests/data/dirty.tar', 'w') as zout:
@@ -870,49 +721,3 @@ class TestCleaning(unittest.TestCase):
870 os.remove('./tests/data/dirty.tar.xz') 721 os.remove('./tests/data/dirty.tar.xz')
871 os.remove('./tests/data/dirty.cleaned.tar.xz') 722 os.remove('./tests/data/dirty.cleaned.tar.xz')
872 os.remove('./tests/data/dirty.cleaned.cleaned.tar.xz') 723 os.remove('./tests/data/dirty.cleaned.cleaned.tar.xz')
873
874 def test_svg(self):
875 shutil.copy('./tests/data/dirty.svg', './tests/data/clean.svg')
876 p = images.SVGParser('./tests/data/clean.svg')
877
878 meta = p.get_meta()
879 self.assertEqual(meta['WorkCreatorAgentTitle'], 'GNOME Design Team')
880 self.assertEqual(meta['WorkSubject'], ['mat2', 'logo', 'metadata'])
881 self.assertEqual(meta['ID'], 'svg11300')
882 self.assertEqual(meta['Output_extension'],
883 'org.inkscape.output.svg.inkscape')
884
885 ret = p.remove_all()
886 self.assertTrue(ret)
887
888 p = images.SVGParser('./tests/data/clean.cleaned.svg')
889 self.assertEqual(p.get_meta(), {})
890 self.assertTrue(p.remove_all())
891
892 os.remove('./tests/data/clean.svg')
893 os.remove('./tests/data/clean.cleaned.svg')
894 os.remove('./tests/data/clean.cleaned.cleaned.svg')
895
896 p = images.SVGParser('./tests/data/weird.svg')
897 self.assertEqual(p.get_meta()['Xmlns'], 'http://www.w3.org/1337/svg')
898
899 def test_ppm(self):
900 shutil.copy('./tests/data/dirty.ppm', './tests/data/clean.ppm')
901 p = images.PPMParser('./tests/data/clean.ppm')
902
903 meta = p.get_meta()
904 print(meta)
905 self.assertEqual(meta['1'], '# A metadata')
906
907 ret = p.remove_all()
908 self.assertTrue(ret)
909
910 p = images.PPMParser('./tests/data/clean.cleaned.ppm')
911 self.assertEqual(p.get_meta(), {})
912 self.assertTrue(p.remove_all())
913
914 os.remove('./tests/data/clean.ppm')
915 os.remove('./tests/data/clean.cleaned.ppm')
916 os.remove('./tests/data/clean.cleaned.cleaned.ppm')
917
918