summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorjvoisin2019-10-12 13:32:04 -0700
committerjvoisin2019-10-12 13:32:04 -0700
commit3cef7fe7fc81c1495a461a8594b1df69467536ea (patch)
tree2ddf7b8e181f7e606227dabdfc020fab4d4b4250 /tests
parent6d19a209355a7d1ef3bbee32ec7c9be93ed666ad (diff)
Refactor tests
Diffstat (limited to 'tests')
-rw-r--r--tests/test_libmat2.py631
-rw-r--r--tests/test_lightweigh_cleaning.py106
-rw-r--r--tests/test_lightweight_cleaning.py76
-rw-r--r--tests/test_policy.py23
4 files changed, 306 insertions, 530 deletions
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py
index d596ff2..13d861d 100644
--- a/tests/test_libmat2.py
+++ b/tests/test_libmat2.py
@@ -120,7 +120,6 @@ class TestGetMeta(unittest.TestCase):
120 self.assertEqual(meta['4'], '# And an other one') 120 self.assertEqual(meta['4'], '# And an other one')
121 self.assertEqual(meta['6'], '# and a final one here') 121 self.assertEqual(meta['6'], '# and a final one here')
122 122
123
124 def test_tiff(self): 123 def test_tiff(self):
125 p = images.TiffParser('./tests/data/dirty.tiff') 124 p = images.TiffParser('./tests/data/dirty.tiff')
126 meta = p.get_meta() 125 meta = p.get_meta()
@@ -222,6 +221,10 @@ class TestGetMeta(unittest.TestCase):
222 self.assertEqual(meta['./tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') 221 self.assertEqual(meta['./tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
223 os.remove('./tests/data/dirty.tar') 222 os.remove('./tests/data/dirty.tar')
224 223
224 def test_svg(self):
225 p = images.SVGParser('./tests/data/weird.svg')
226 self.assertEqual(p.get_meta()['Xmlns'], 'http://www.w3.org/1337/svg')
227
225 228
226class TestRemovingThumbnails(unittest.TestCase): 229class TestRemovingThumbnails(unittest.TestCase):
227 def test_odt(self): 230 def test_odt(self):
@@ -281,367 +284,215 @@ class TestRevisionsCleaning(unittest.TestCase):
281 os.remove('./tests/data/revision_clean.docx') 284 os.remove('./tests/data/revision_clean.docx')
282 os.remove('./tests/data/revision_clean.cleaned.docx') 285 os.remove('./tests/data/revision_clean.cleaned.docx')
283 286
284class TestCleaning(unittest.TestCase):
285 def test_pdf(self):
286 shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')
287 p = pdf.PDFParser('./tests/data/clean.pdf')
288
289 meta = p.get_meta()
290 self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
291
292 ret = p.remove_all()
293 self.assertTrue(ret)
294
295 p = pdf.PDFParser('./tests/data/clean.cleaned.pdf')
296 expected_meta = {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1}
297 self.assertEqual(p.get_meta(), expected_meta)
298 self.assertTrue(p.remove_all())
299
300 os.remove('./tests/data/clean.pdf')
301 os.remove('./tests/data/clean.cleaned.pdf')
302 os.remove('./tests/data/clean.cleaned.cleaned.pdf')
303
304 def test_png(self):
305 shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
306 p = images.PNGParser('./tests/data/clean.png')
307
308 meta = p.get_meta()
309 self.assertEqual(meta['Comment'], 'This is a comment, be careful!')
310
311 ret = p.remove_all()
312 self.assertTrue(ret)
313
314 p = images.PNGParser('./tests/data/clean.cleaned.png')
315 self.assertEqual(p.get_meta(), {})
316 self.assertTrue(p.remove_all())
317
318 os.remove('./tests/data/clean.png')
319 os.remove('./tests/data/clean.cleaned.png')
320 os.remove('./tests/data/clean.cleaned.cleaned.png')
321
322 def test_jpg(self):
323 shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
324 p = images.JPGParser('./tests/data/clean.jpg')
325
326 meta = p.get_meta()
327 self.assertEqual(meta['Comment'], 'Created with GIMP')
328
329 ret = p.remove_all()
330 self.assertTrue(ret)
331
332 p = images.JPGParser('./tests/data/clean.cleaned.jpg')
333 self.assertEqual(p.get_meta(), {})
334 self.assertTrue(p.remove_all())
335
336 os.remove('./tests/data/clean.jpg')
337 os.remove('./tests/data/clean.cleaned.jpg')
338 os.remove('./tests/data/clean.cleaned.cleaned.jpg')
339
340 def test_mp3(self):
341 shutil.copy('./tests/data/dirty.mp3', './tests/data/clean.mp3')
342 p = audio.MP3Parser('./tests/data/clean.mp3')
343
344 meta = p.get_meta()
345 self.assertEqual(meta['TXXX:I am a'], 'various comment')
346
347 ret = p.remove_all()
348 self.assertTrue(ret)
349
350 p = audio.MP3Parser('./tests/data/clean.cleaned.mp3')
351 self.assertEqual(p.get_meta(), {})
352 self.assertTrue(p.remove_all())
353
354 os.remove('./tests/data/clean.mp3')
355 os.remove('./tests/data/clean.cleaned.mp3')
356 os.remove('./tests/data/clean.cleaned.cleaned.mp3')
357
358 def test_ogg(self):
359 shutil.copy('./tests/data/dirty.ogg', './tests/data/clean.ogg')
360 p = audio.OGGParser('./tests/data/clean.ogg')
361
362 meta = p.get_meta()
363 self.assertEqual(meta['title'], 'I am so')
364
365 ret = p.remove_all()
366 self.assertTrue(ret)
367
368 p = audio.OGGParser('./tests/data/clean.cleaned.ogg')
369 self.assertEqual(p.get_meta(), {})
370 self.assertTrue(p.remove_all())
371
372 os.remove('./tests/data/clean.ogg')
373 os.remove('./tests/data/clean.cleaned.ogg')
374 os.remove('./tests/data/clean.cleaned.cleaned.ogg')
375
376 def test_flac(self):
377 shutil.copy('./tests/data/dirty.flac', './tests/data/clean.flac')
378 p = audio.FLACParser('./tests/data/clean.flac')
379
380 meta = p.get_meta()
381 self.assertEqual(meta['title'], 'I am so')
382
383 ret = p.remove_all()
384 self.assertTrue(ret)
385
386 p = audio.FLACParser('./tests/data/clean.cleaned.flac')
387 self.assertEqual(p.get_meta(), {})
388 self.assertTrue(p.remove_all())
389
390 os.remove('./tests/data/clean.flac')
391 os.remove('./tests/data/clean.cleaned.flac')
392 os.remove('./tests/data/clean.cleaned.cleaned.flac')
393
394 def test_office(self):
395 shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx')
396 p = office.MSOfficeParser('./tests/data/clean.docx')
397
398 meta = p.get_meta()
399 self.assertIsNotNone(meta)
400
401 ret = p.remove_all()
402 self.assertTrue(ret)
403
404 p = office.MSOfficeParser('./tests/data/clean.cleaned.docx')
405 self.assertEqual(p.get_meta(), {})
406 self.assertTrue(p.remove_all())
407
408 os.remove('./tests/data/clean.docx')
409 os.remove('./tests/data/clean.cleaned.docx')
410 os.remove('./tests/data/clean.cleaned.cleaned.docx')
411
412 def test_libreoffice(self):
413 shutil.copy('./tests/data/dirty.odt', './tests/data/clean.odt')
414 p = office.LibreOfficeParser('./tests/data/clean.odt')
415
416 meta = p.get_meta()
417 self.assertIsNotNone(meta)
418
419 ret = p.remove_all()
420 self.assertTrue(ret)
421
422 p = office.LibreOfficeParser('./tests/data/clean.cleaned.odt')
423 self.assertEqual(p.get_meta(), {})
424 self.assertTrue(p.remove_all())
425
426 os.remove('./tests/data/clean.odt')
427 os.remove('./tests/data/clean.cleaned.odt')
428 os.remove('./tests/data/clean.cleaned.cleaned.odt')
429
430 def test_tiff(self):
431 shutil.copy('./tests/data/dirty.tiff', './tests/data/clean.tiff')
432 p = images.TiffParser('./tests/data/clean.tiff')
433
434 meta = p.get_meta()
435 self.assertEqual(meta['Model'], 'C7070WZ')
436
437 ret = p.remove_all()
438 self.assertTrue(ret)
439
440 p = images.TiffParser('./tests/data/clean.cleaned.tiff')
441 self.assertEqual(p.get_meta(), {})
442 self.assertTrue(p.remove_all())
443
444 os.remove('./tests/data/clean.tiff')
445 os.remove('./tests/data/clean.cleaned.tiff')
446 os.remove('./tests/data/clean.cleaned.cleaned.tiff')
447
448 def test_bmp(self):
449 shutil.copy('./tests/data/dirty.bmp', './tests/data/clean.bmp')
450 p = harmless.HarmlessParser('./tests/data/clean.bmp')
451
452 meta = p.get_meta()
453 self.assertEqual(meta, {}) # bmp has no meta :)
454
455 ret = p.remove_all()
456 self.assertTrue(ret)
457
458 p = harmless.HarmlessParser('./tests/data/clean.cleaned.bmp')
459 self.assertEqual(p.get_meta(), {})
460 self.assertTrue(p.remove_all())
461
462 os.remove('./tests/data/clean.bmp')
463 os.remove('./tests/data/clean.cleaned.bmp')
464 os.remove('./tests/data/clean.cleaned.cleaned.bmp')
465
466 def test_torrent(self):
467 shutil.copy('./tests/data/dirty.torrent', './tests/data/clean.torrent')
468 p = torrent.TorrentParser('./tests/data/clean.torrent')
469
470 meta = p.get_meta()
471 self.assertEqual(meta, {'created by': b'mktorrent 1.0', 'creation date': 1522397702})
472
473 ret = p.remove_all()
474 self.assertTrue(ret)
475
476 p = torrent.TorrentParser('./tests/data/clean.cleaned.torrent')
477 self.assertEqual(p.get_meta(), {})
478 self.assertTrue(p.remove_all())
479
480 os.remove('./tests/data/clean.torrent')
481 os.remove('./tests/data/clean.cleaned.torrent')
482 os.remove('./tests/data/clean.cleaned.cleaned.torrent')
483
484 def test_odf(self):
485 shutil.copy('./tests/data/dirty.odf', './tests/data/clean.odf')
486 p = office.LibreOfficeParser('./tests/data/clean.odf')
487
488 meta = p.get_meta()
489 self.assertEqual(meta['meta.xml']['meta:creation-date'], '2018-04-23T00:18:59.438231281')
490
491 ret = p.remove_all()
492 self.assertTrue(ret)
493 287
494 p = office.LibreOfficeParser('./tests/data/clean.cleaned.odf') 288class TestCleaning(unittest.TestCase):
495 self.assertEqual(p.get_meta(), {}) 289 data = [{
496 self.assertTrue(p.remove_all()) 290 'name': 'pdf',
497 291 'parser': pdf.PDFParser,
498 os.remove('./tests/data/clean.odf') 292 'meta': {'producer': 'pdfTeX-1.40.14'},
499 os.remove('./tests/data/clean.cleaned.odf') 293 'expected_meta': {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1},
500 os.remove('./tests/data/clean.cleaned.cleaned.odf') 294 }, {
501 295 'name': 'png',
502 def test_odg(self): 296 'parser': images.PNGParser,
503 shutil.copy('./tests/data/dirty.odg', './tests/data/clean.odg') 297 'meta': {'Comment': 'This is a comment, be careful!'},
504 p = office.LibreOfficeParser('./tests/data/clean.odg') 298 'expected_meta': {},
505 299 }, {
506 meta = p.get_meta() 300 'name': 'jpg',
507 self.assertEqual(meta['meta.xml']['dc:date'], '2018-04-23T00:26:59.385838550') 301 'parser': images.JPGParser,
508 302 'meta': {'Comment': 'Created with GIMP'},
509 ret = p.remove_all() 303 'expected_meta': {},
510 self.assertTrue(ret) 304 }, {
511 305 'name': 'mp3',
512 p = office.LibreOfficeParser('./tests/data/clean.cleaned.odg') 306 'parser': audio.MP3Parser,
513 self.assertEqual(p.get_meta(), {}) 307 'meta': {'TXXX:I am a': 'various comment'},
514 self.assertTrue(p.remove_all()) 308 'expected_meta': {},
515 309 }, {
516 os.remove('./tests/data/clean.odg') 310 'name': 'ogg',
517 os.remove('./tests/data/clean.cleaned.odg') 311 'parser': audio.OGGParser,
518 os.remove('./tests/data/clean.cleaned.cleaned.odg') 312 'meta': {'title': 'I am so'},
519 313 'expected_meta': {},
520 def test_txt(self): 314 }, {
521 shutil.copy('./tests/data/dirty.txt', './tests/data/clean.txt') 315 'name': 'flac',
522 p = harmless.HarmlessParser('./tests/data/clean.txt') 316 'parser': audio.FLACParser,
523 317 'meta': {'title': 'I am so'},
524 meta = p.get_meta() 318 'expected_meta': {},
525 self.assertEqual(meta, {}) 319 }, {
526 320 'name': 'docx',
527 ret = p.remove_all() 321 'parser': office.MSOfficeParser,
528 self.assertTrue(ret) 322 'meta': {'word/media/image1.png' :
529 323 {'Comment': 'This is a comment, be careful!',
530 p = harmless.HarmlessParser('./tests/data/clean.cleaned.txt') 324 'ModifyDate': '2018:03:20 21:59:25',
531 self.assertEqual(p.get_meta(), {}) 325 'PixelUnits': 'meters',
532 self.assertTrue(p.remove_all()) 326 'PixelsPerUnitX': 2835,
533 327 'PixelsPerUnitY': 2835,
534 os.remove('./tests/data/clean.txt') 328 'create_system': 'Weird',
535 os.remove('./tests/data/clean.cleaned.txt') 329 'date_time': '2018-03-31 13:15:38'} ,
536 os.remove('./tests/data/clean.cleaned.cleaned.txt') 330 },
537 331 'expected_meta': {},
538 def test_avi(self): 332 }, {
539 try: 333 'name': 'odt',
540 video._get_ffmpeg_path() 334 'parser': office.LibreOfficeParser,
541 except RuntimeError: 335 'meta': {
542 raise unittest.SkipTest 336 'Pictures/1000000000000032000000311EC5314D.png': {
543 337 'create_system': 'Weird',
544 shutil.copy('./tests/data/dirty.avi', './tests/data/clean.avi') 338 'date_time': '2011-07-26 02:40:16',
545 p = video.AVIParser('./tests/data/clean.avi') 339 'PixelsPerUnitX': 4847,
546 340 'PixelsPerUnitY': 4760,
547 meta = p.get_meta() 341 'PixelUnits': 'meters',
548 self.assertEqual(meta['Software'], 'MEncoder SVN-r33148-4.0.1') 342 },
549 343 },
550 ret = p.remove_all() 344 'expected_meta': {},
551 self.assertTrue(ret) 345 },{
552 346 'name': 'tiff',
553 p = video.AVIParser('./tests/data/clean.cleaned.avi') 347 'parser': images.TiffParser,
554 self.assertEqual(p.get_meta(), {}) 348 'meta': {'Model': 'C7070WZ'},
555 self.assertTrue(p.remove_all()) 349 'expected_meta':
556 350 {'Orientation': 'Horizontal (normal)',
557 os.remove('./tests/data/clean.avi') 351 'ResolutionUnit': 'inches',
558 os.remove('./tests/data/clean.cleaned.avi') 352 'XResolution': 72,
559 os.remove('./tests/data/clean.cleaned.cleaned.avi') 353 'YResolution': 72}
560 354 },{
561 def test_zip(self): 355 'name': 'bmp',
562 with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout: 356 'parser': harmless.HarmlessParser,
563 zout.write('./tests/data/dirty.flac') 357 'meta': {},
564 zout.write('./tests/data/dirty.docx') 358 'expected_meta': {},
565 zout.write('./tests/data/dirty.jpg') 359 },{
566 p = archive.ZipParser('./tests/data/dirty.zip') 360 'name': 'torrent',
567 meta = p.get_meta() 361 'parser': torrent.TorrentParser,
568 self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') 362 'meta': {'created by': b'mktorrent 1.0', 'creation date': 1522397702},
569 363 'expected_meta': {},
570 ret = p.remove_all() 364 }, {
571 self.assertTrue(ret) 365 'name': 'odf',
572 366 'parser': office.LibreOfficeParser,
573 p = archive.ZipParser('./tests/data/dirty.cleaned.zip') 367 'meta': {'meta.xml': {'create_system': 'Weird', 'date_time':
574 self.assertEqual(p.get_meta(), {}) 368 '2018-04-22 22:20:24', 'meta:initial-creator': 'Julien Voisin',
575 self.assertTrue(p.remove_all()) 369 'meta:creation-date': '2018-04-23T00:18:59.438231281',
576 370 'dc:date': '2018-04-23T00:20:23.978564933', 'dc:creator':
577 os.remove('./tests/data/dirty.zip') 371 'Julien Voisin', 'meta:editing-duration': 'PT1M24S',
578 os.remove('./tests/data/dirty.cleaned.zip') 372 'meta:editing-cycles': '1', 'meta:generator':
579 os.remove('./tests/data/dirty.cleaned.cleaned.zip') 373 'LibreOffice/5.4.6.2$Linux_X86_64 LibreOffice_project/40m0$Build-2'}},
580 374 'expected_meta': {},
581 375 }, {
582 def test_mp4(self): 376 'name': 'odg',
583 try: 377 'parser': office.LibreOfficeParser,
584 video._get_ffmpeg_path() 378 'meta': {'meta.xml': {'create_system': 'Weird', 'date_time':
585 except RuntimeError: 379 '2018-04-22 22:26:58', 'meta:initial-creator': 'Julien Voisin',
586 raise unittest.SkipTest 380 'meta:creation-date': '2018-04-23T00:25:59.953271949',
587 381 'dc:date': '2018-04-23T00:26:59.385838550', 'dc:creator':
588 shutil.copy('./tests/data/dirty.mp4', './tests/data/clean.mp4') 382 'Julien Voisin', 'meta:editing-duration': 'PT59S',
589 p = video.MP4Parser('./tests/data/clean.mp4') 383 'meta:editing-cycles': '1', 'meta:generator':
590 384 'LibreOffice/5.4.6.2$Linux_X86_64 LibreOffice_project/40m0$Build-2'}},
591 meta = p.get_meta() 385 'expected_meta': {},
592 self.assertEqual(meta['Encoder'], 'HandBrake 0.9.4 2009112300') 386 }, {
593 387 'name': 'txt',
594 ret = p.remove_all() 388 'parser': harmless.HarmlessParser,
595 self.assertTrue(ret) 389 'meta': {},
596 390 'expected_meta': {},
597 p = video.MP4Parser('./tests/data/clean.cleaned.mp4') 391 },{
598 self.assertNotIn('Encoder', p.get_meta()) 392 'name': 'gif',
599 self.assertTrue(p.remove_all()) 393 'parser': images.GIFParser,
600 394 'meta': {'Comment': 'this is a test comment'},
601 os.remove('./tests/data/clean.mp4') 395 'expected_meta': {},
602 os.remove('./tests/data/clean.cleaned.mp4') 396 },{
603 os.remove('./tests/data/clean.cleaned.cleaned.mp4') 397 'name': 'css',
604 398 'parser': web.CSSParser,
605 def test_wmv(self): 399 'meta': {
606 try: 400 'harmful data': 'underline is cool',
607 video._get_ffmpeg_path() 401 'version': '1.0',
608 except RuntimeError: 402 'author': 'jvoisin'
609 raise unittest.SkipTest 403 },
610 404 'expected_meta': {},
611 shutil.copy('./tests/data/dirty.wmv', './tests/data/clean.wmv') 405 },{
612 p = video.WMVParser('./tests/data/clean.wmv') 406 'name': 'svg',
613 407 'parser': images.SVGParser,
614 meta = p.get_meta() 408 'meta': {
615 self.assertEqual(meta['EncodingSettings'], 'Lavf52.103.0') 409 'WorkDescription': "This is a test svg image for mat2's testsuite",
616 410 },
617 ret = p.remove_all() 411 'expected_meta': {},
618 self.assertTrue(ret) 412 } ,{
413 'name': 'ppm',
414 'parser': images.PPMParser,
415 'meta': {
416 '1': '# A metadata',
417 },
418 'expected_meta': {},
419 } ,{
420 'name': 'avi',
421 'ffmpeg': 1,
422 'parser': video.AVIParser,
423 'meta': {
424 'Software': 'MEncoder SVN-r33148-4.0.1',
425 },
426 'expected_meta': {},
427 } ,{
428 'name': 'mp4',
429 'ffmpeg': 1,
430 'parser': video.MP4Parser,
431 'meta': {
432 'Encoder': 'HandBrake 0.9.4 2009112300',
433 },
434 'expected_meta':
435 {'CompatibleBrands': ['isom', 'iso2', 'avc1', 'mp41'],
436 'CompressorID': 'avc1',
437 'GraphicsMode': 'srcCopy',
438 'HandlerDescription': 'SoundHandler',
439 'HandlerType': 'Metadata',
440 'HandlerVendorID': 'Apple',
441 'MajorBrand': 'MP4 Base Media v1 [IS0 14496-12:2003]',
442 'MediaHeaderVersion': 0,
443 'MinorVersion': '0.2.0',
444 'MovieDataOffset': 48,
445 'MovieHeaderVersion': 0,
446 'NextTrackID': 3,
447 'PreferredRate': 1,
448 'Rotation': 0,
449 'TimeScale': 1000,
450 'TrackHeaderVersion': 0,
451 'TrackID': 1,
452 'TrackLayer': 0},
453 },{
454 'name': 'wmv',
455 'ffmpeg': 1,
456 'parser': video.WMVParser,
457 'meta': {
458 'EncodingSettings': 'Lavf52.103.0',
459 },
460 'expected_meta': {},
461 }
462 ]
619 463
620 p = video.WMVParser('./tests/data/clean.cleaned.wmv') 464 def test_all_parametred(self):
621 self.assertNotIn('EncodingSettings', p.get_meta()) 465 for case in self.data:
622 self.assertTrue(p.remove_all()) 466 if 'ffmpeg' in case:
467 try:
468 video._get_ffmpeg_path()
469 except RuntimeError:
470 raise unittest.SkipTest
623 471
624 os.remove('./tests/data/clean.wmv') 472 print('[+] Testing %s' % case['name'])
625 os.remove('./tests/data/clean.cleaned.wmv') 473 target = './tests/data/clean.' + case['name']
626 os.remove('./tests/data/clean.cleaned.cleaned.wmv') 474 shutil.copy('./tests/data/dirty.' + case['name'], target)
475 p1 = case['parser'](target)
627 476
628 def test_gif(self): 477 meta = p1.get_meta()
629 shutil.copy('./tests/data/dirty.gif', './tests/data/clean.gif') 478 for k, v in case['meta'].items():
630 p = images.GIFParser('./tests/data/clean.gif') 479 if isinstance(v, dict):
480 for _k, _v in v.items():
481 self.assertEqual(meta[k][_k], _v)
482 else:
483 self.assertEqual(meta[k], v)
631 484
632 meta = p.get_meta() 485 p1.lightweight_cleaning = True
633 self.assertEqual(meta['Comment'], 'this is a test comment') 486 self.assertTrue(p1.remove_all())
634 487
635 ret = p.remove_all() 488 p2 = case['parser'](p1.output_filename)
636 self.assertTrue(ret) 489 self.assertEqual(p2.get_meta(), case['expected_meta'])
490 self.assertTrue(p2.remove_all())
637 491
638 p = images.GIFParser('./tests/data/clean.cleaned.gif') 492 os.remove(target)
639 self.assertNotIn('EncodingSettings', p.get_meta()) 493 os.remove(p1.output_filename)
640 self.assertTrue(p.remove_all()) 494 os.remove(p2.output_filename)
641 495
642 os.remove('./tests/data/clean.gif')
643 os.remove('./tests/data/clean.cleaned.gif')
644 os.remove('./tests/data/clean.cleaned.cleaned.gif')
645 496
646 def test_html(self): 497 def test_html(self):
647 shutil.copy('./tests/data/dirty.html', './tests/data/clean.html') 498 shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
@@ -688,7 +539,6 @@ class TestCleaning(unittest.TestCase):
688 os.remove('./tests/data/clean.html') 539 os.remove('./tests/data/clean.html')
689 os.remove('./tests/data/clean.cleaned.html') 540 os.remove('./tests/data/clean.cleaned.html')
690 541
691
692 def test_epub(self): 542 def test_epub(self):
693 shutil.copy('./tests/data/dirty.epub', './tests/data/clean.epub') 543 shutil.copy('./tests/data/dirty.epub', './tests/data/clean.epub')
694 p = epub.EPUBParser('./tests/data/clean.epub') 544 p = epub.EPUBParser('./tests/data/clean.epub')
@@ -711,25 +561,26 @@ class TestCleaning(unittest.TestCase):
711 os.remove('./tests/data/clean.cleaned.cleaned.epub') 561 os.remove('./tests/data/clean.cleaned.cleaned.epub')
712 562
713 563
714 def test_css(self): 564class TestCleaningArchives(unittest.TestCase):
715 shutil.copy('./tests/data/dirty.css', './tests/data/clean.css') 565 def test_zip(self):
716 p = web.CSSParser('./tests/data/clean.css') 566 with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
717 567 zout.write('./tests/data/dirty.flac')
718 self.assertEqual(p.get_meta(), { 568 zout.write('./tests/data/dirty.docx')
719 'harmful data': 'underline is cool', 569 zout.write('./tests/data/dirty.jpg')
720 'version': '1.0', 570 p = archive.ZipParser('./tests/data/dirty.zip')
721 'author': 'jvoisin'}) 571 meta = p.get_meta()
572 self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
722 573
723 ret = p.remove_all() 574 ret = p.remove_all()
724 self.assertTrue(ret) 575 self.assertTrue(ret)
725 576
726 p = web.CSSParser('./tests/data/clean.cleaned.css') 577 p = archive.ZipParser('./tests/data/dirty.cleaned.zip')
727 self.assertEqual(p.get_meta(), {}) 578 self.assertEqual(p.get_meta(), {})
728 self.assertTrue(p.remove_all()) 579 self.assertTrue(p.remove_all())
729 580
730 os.remove('./tests/data/clean.css') 581 os.remove('./tests/data/dirty.zip')
731 os.remove('./tests/data/clean.cleaned.css') 582 os.remove('./tests/data/dirty.cleaned.zip')
732 os.remove('./tests/data/clean.cleaned.cleaned.css') 583 os.remove('./tests/data/dirty.cleaned.cleaned.zip')
733 584
734 def test_tar(self): 585 def test_tar(self):
735 with tarfile.TarFile.open('./tests/data/dirty.tar', 'w') as zout: 586 with tarfile.TarFile.open('./tests/data/dirty.tar', 'w') as zout:
@@ -870,49 +721,3 @@ class TestCleaning(unittest.TestCase):
870 os.remove('./tests/data/dirty.tar.xz') 721 os.remove('./tests/data/dirty.tar.xz')
871 os.remove('./tests/data/dirty.cleaned.tar.xz') 722 os.remove('./tests/data/dirty.cleaned.tar.xz')
872 os.remove('./tests/data/dirty.cleaned.cleaned.tar.xz') 723 os.remove('./tests/data/dirty.cleaned.cleaned.tar.xz')
873
874 def test_svg(self):
875 shutil.copy('./tests/data/dirty.svg', './tests/data/clean.svg')
876 p = images.SVGParser('./tests/data/clean.svg')
877
878 meta = p.get_meta()
879 self.assertEqual(meta['WorkCreatorAgentTitle'], 'GNOME Design Team')
880 self.assertEqual(meta['WorkSubject'], ['mat2', 'logo', 'metadata'])
881 self.assertEqual(meta['ID'], 'svg11300')
882 self.assertEqual(meta['Output_extension'],
883 'org.inkscape.output.svg.inkscape')
884
885 ret = p.remove_all()
886 self.assertTrue(ret)
887
888 p = images.SVGParser('./tests/data/clean.cleaned.svg')
889 self.assertEqual(p.get_meta(), {})
890 self.assertTrue(p.remove_all())
891
892 os.remove('./tests/data/clean.svg')
893 os.remove('./tests/data/clean.cleaned.svg')
894 os.remove('./tests/data/clean.cleaned.cleaned.svg')
895
896 p = images.SVGParser('./tests/data/weird.svg')
897 self.assertEqual(p.get_meta()['Xmlns'], 'http://www.w3.org/1337/svg')
898
899 def test_ppm(self):
900 shutil.copy('./tests/data/dirty.ppm', './tests/data/clean.ppm')
901 p = images.PPMParser('./tests/data/clean.ppm')
902
903 meta = p.get_meta()
904 print(meta)
905 self.assertEqual(meta['1'], '# A metadata')
906
907 ret = p.remove_all()
908 self.assertTrue(ret)
909
910 p = images.PPMParser('./tests/data/clean.cleaned.ppm')
911 self.assertEqual(p.get_meta(), {})
912 self.assertTrue(p.remove_all())
913
914 os.remove('./tests/data/clean.ppm')
915 os.remove('./tests/data/clean.cleaned.ppm')
916 os.remove('./tests/data/clean.cleaned.cleaned.ppm')
917
918
diff --git a/tests/test_lightweigh_cleaning.py b/tests/test_lightweigh_cleaning.py
deleted file mode 100644
index a115f05..0000000
--- a/tests/test_lightweigh_cleaning.py
+++ /dev/null
@@ -1,106 +0,0 @@
1#!/usr/bin/env python3
2
3import unittest
4import shutil
5import os
6
7from libmat2 import pdf, images, torrent
8
9class TestLightWeightCleaning(unittest.TestCase):
10 def test_pdf(self):
11 shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')
12 p = pdf.PDFParser('./tests/data/clean.pdf')
13
14 meta = p.get_meta()
15 self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
16
17 p.lightweight_cleaning = True
18 ret = p.remove_all()
19 self.assertTrue(ret)
20
21 p = pdf.PDFParser('./tests/data/clean.cleaned.pdf')
22 expected_meta = {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1}
23 self.assertEqual(p.get_meta(), expected_meta)
24
25 os.remove('./tests/data/clean.pdf')
26 os.remove('./tests/data/clean.cleaned.pdf')
27
28 def test_png(self):
29 shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
30 p = images.PNGParser('./tests/data/clean.png')
31
32 meta = p.get_meta()
33 self.assertEqual(meta['Comment'], 'This is a comment, be careful!')
34
35 p.lightweight_cleaning = True
36 ret = p.remove_all()
37 self.assertTrue(ret)
38
39 p = images.PNGParser('./tests/data/clean.cleaned.png')
40 self.assertEqual(p.get_meta(), {})
41
42 p = images.PNGParser('./tests/data/clean.png')
43 p.lightweight_cleaning = True
44 ret = p.remove_all()
45 self.assertTrue(ret)
46
47 os.remove('./tests/data/clean.png')
48 os.remove('./tests/data/clean.cleaned.png')
49
50 def test_jpg(self):
51 shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
52 p = images.JPGParser('./tests/data/clean.jpg')
53
54 meta = p.get_meta()
55 self.assertEqual(meta['Comment'], 'Created with GIMP')
56
57 p.lightweight_cleaning = True
58 ret = p.remove_all()
59 self.assertTrue(ret)
60
61 p = images.JPGParser('./tests/data/clean.cleaned.jpg')
62 self.assertEqual(p.get_meta(), {})
63
64 os.remove('./tests/data/clean.jpg')
65 os.remove('./tests/data/clean.cleaned.jpg')
66
67 def test_torrent(self):
68 shutil.copy('./tests/data/dirty.torrent', './tests/data/clean.torrent')
69 p = torrent.TorrentParser('./tests/data/clean.torrent')
70
71 meta = p.get_meta()
72 self.assertEqual(meta['created by'], b'mktorrent 1.0')
73
74 p.lightweight_cleaning = True
75 ret = p.remove_all()
76 self.assertTrue(ret)
77
78 p = torrent.TorrentParser('./tests/data/clean.cleaned.torrent')
79 self.assertEqual(p.get_meta(), {})
80
81 os.remove('./tests/data/clean.torrent')
82 os.remove('./tests/data/clean.cleaned.torrent')
83
84 def test_tiff(self):
85 shutil.copy('./tests/data/dirty.tiff', './tests/data/clean.tiff')
86 p = images.TiffParser('./tests/data/clean.tiff')
87
88 meta = p.get_meta()
89 self.assertEqual(meta['ImageDescription'], 'OLYMPUS DIGITAL CAMERA ')
90
91 p.lightweight_cleaning = True
92 ret = p.remove_all()
93 self.assertTrue(ret)
94
95 p = images.TiffParser('./tests/data/clean.cleaned.tiff')
96 self.assertEqual(p.get_meta(),
97 {
98 'Orientation': 'Horizontal (normal)',
99 'ResolutionUnit': 'inches',
100 'XResolution': 72,
101 'YResolution': 72
102 }
103 )
104
105 os.remove('./tests/data/clean.tiff')
106 os.remove('./tests/data/clean.cleaned.tiff')
diff --git a/tests/test_lightweight_cleaning.py b/tests/test_lightweight_cleaning.py
new file mode 100644
index 0000000..38e06dc
--- /dev/null
+++ b/tests/test_lightweight_cleaning.py
@@ -0,0 +1,76 @@
1#!/usr/bin/env python3
2
3import unittest
4import shutil
5import os
6
7from libmat2 import pdf, images, torrent
8
9
10class TestLightWeightCleaning(unittest.TestCase):
11 data = [{
12 'name': 'pdf',
13 'parser': pdf.PDFParser,
14 'meta': {'producer': 'pdfTeX-1.40.14'},
15 'expected_meta': {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1},
16 }, {
17 'name': 'png',
18 'parser': images.PNGParser,
19 'meta': {'Comment': 'This is a comment, be careful!'},
20 'expected_meta': {},
21 }, {
22 'name': 'jpg',
23 'parser': images.JPGParser,
24 'meta': {'Comment': 'Created with GIMP'},
25 'expected_meta': {},
26 }, {
27 'name': 'torrent',
28 'parser': torrent.TorrentParser,
29 'meta': {'created by': b'mktorrent 1.0'},
30 'expected_meta': {},
31 },{
32 'name': 'tiff',
33 'parser': images.TiffParser,
34 'meta': {'ImageDescription': 'OLYMPUS DIGITAL CAMERA '},
35 'expected_meta': {
36 'Orientation': 'Horizontal (normal)',
37 'ResolutionUnit': 'inches',
38 'XResolution': 72,
39 'YResolution': 72
40 }
41 },
42 ]
43
44 def test_all(self):
45 for case in self.data:
46 target = './tests/data/clean.' + case['name']
47 shutil.copy('./tests/data/dirty.' + case['name'], target)
48 p1 = case['parser'](target)
49
50 meta = p1.get_meta()
51 for k, v in case['meta'].items():
52 self.assertEqual(meta[k], v)
53
54 p1.lightweight_cleaning = True
55 self.assertTrue(p1.remove_all())
56
57 p2 = case['parser'](p1.output_filename)
58 self.assertEqual(p2.get_meta(), case['expected_meta'])
59
60 os.remove(target)
61 os.remove(p1.output_filename)
62
63 def test_exiftool_overwrite(self):
64 target = './tests/data/clean.png'
65 shutil.copy('./tests/data/dirty.png', target)
66
67 p1 = images.PNGParser(target)
68 p1.lightweight_cleaning = True
69 shutil.copy('./tests/data/dirty.png', p1.output_filename)
70 self.assertTrue(p1.remove_all())
71
72 p2 = images.PNGParser(p1.output_filename)
73 self.assertEqual(p2.get_meta(), {})
74
75 os.remove(target)
76 os.remove(p1.output_filename)
diff --git a/tests/test_policy.py b/tests/test_policy.py
index ef55644..206c92d 100644
--- a/tests/test_policy.py
+++ b/tests/test_policy.py
@@ -7,25 +7,26 @@ import os
7from libmat2 import office, UnknownMemberPolicy 7from libmat2 import office, UnknownMemberPolicy
8 8
9class TestPolicy(unittest.TestCase): 9class TestPolicy(unittest.TestCase):
10 target = './tests/data/clean.docx'
11
10 def test_policy_omit(self): 12 def test_policy_omit(self):
11 shutil.copy('./tests/data/embedded.docx', './tests/data/clean.docx') 13 shutil.copy('./tests/data/embedded.docx', self.target)
12 p = office.MSOfficeParser('./tests/data/clean.docx') 14 p = office.MSOfficeParser(self.target)
13 p.unknown_member_policy = UnknownMemberPolicy.OMIT 15 p.unknown_member_policy = UnknownMemberPolicy.OMIT
14 self.assertTrue(p.remove_all()) 16 self.assertTrue(p.remove_all())
15 os.remove('./tests/data/clean.docx') 17 os.remove(p.filename)
16 os.remove('./tests/data/clean.cleaned.docx')
17 18
18 def test_policy_keep(self): 19 def test_policy_keep(self):
19 shutil.copy('./tests/data/embedded.docx', './tests/data/clean.docx') 20 shutil.copy('./tests/data/embedded.docx', self.target)
20 p = office.MSOfficeParser('./tests/data/clean.docx') 21 p = office.MSOfficeParser(self.target)
21 p.unknown_member_policy = UnknownMemberPolicy.KEEP 22 p.unknown_member_policy = UnknownMemberPolicy.KEEP
22 self.assertTrue(p.remove_all()) 23 self.assertTrue(p.remove_all())
23 os.remove('./tests/data/clean.docx') 24 os.remove(p.filename)
24 os.remove('./tests/data/clean.cleaned.docx') 25 os.remove(p.output_filename)
25 26
26 def test_policy_unknown(self): 27 def test_policy_unknown(self):
27 shutil.copy('./tests/data/embedded.docx', './tests/data/clean.docx') 28 shutil.copy('./tests/data/embedded.docx', self.target)
28 p = office.MSOfficeParser('./tests/data/clean.docx') 29 p = office.MSOfficeParser(self.target)
29 with self.assertRaises(ValueError): 30 with self.assertRaises(ValueError):
30 p.unknown_member_policy = UnknownMemberPolicy('unknown_policy_name_totally_invalid') 31 p.unknown_member_policy = UnknownMemberPolicy('unknown_policy_name_totally_invalid')
31 os.remove('./tests/data/clean.docx') 32 os.remove(p.filename)