From f1a071d460507fd1bb1721deafd2a8d9f88f5b05 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Tue, 23 Oct 2018 16:14:21 +0200 Subject: Implement lightweight cleaning for png and tiff --- tests/test_corrupted_files.py | 7 +++++ tests/test_libmat2.py | 36 ---------------------- tests/test_lightweigh_cleaning.py | 65 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 36 deletions(-) create mode 100644 tests/test_lightweigh_cleaning.py (limited to 'tests') diff --git a/tests/test_corrupted_files.py b/tests/test_corrupted_files.py index 82c6c3b..181d4d2 100644 --- a/tests/test_corrupted_files.py +++ b/tests/test_corrupted_files.py @@ -194,6 +194,13 @@ class TestCorruptedFiles(unittest.TestCase): images.JPGParser('./tests/data/clean.jpg') os.remove('./tests/data/clean.jpg') + def test_png_lightweight(self): + return + shutil.copy('./tests/data/dirty.torrent', './tests/data/clean.png') + p = images.PNGParser('./tests/data/clean.png') + self.assertTrue(p.remove_all()) + os.remove('./tests/data/clean.png') + def test_avi(self): try: video._get_ffmpeg_path() diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index f5fc9e8..46d6aaa 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -212,42 +212,6 @@ class TestRevisionsCleaning(unittest.TestCase): os.remove('./tests/data/revision_clean.docx') os.remove('./tests/data/revision_clean.cleaned.docx') -class TestLightWeightCleaning(unittest.TestCase): - def test_pdf(self): - shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf') - p = pdf.PDFParser('./tests/data/clean.pdf') - - meta = p.get_meta() - self.assertEqual(meta['producer'], 'pdfTeX-1.40.14') - - p.lightweight_cleaning = True - ret = p.remove_all() - self.assertTrue(ret) - - p = pdf.PDFParser('./tests/data/clean.cleaned.pdf') - expected_meta = {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1} - self.assertEqual(p.get_meta(), expected_meta) - - os.remove('./tests/data/clean.pdf') - os.remove('./tests/data/clean.cleaned.pdf') - - def test_png(self): - shutil.copy('./tests/data/dirty.png', './tests/data/clean.png') - p = images.PNGParser('./tests/data/clean.png') - - meta = p.get_meta() - self.assertEqual(meta['Comment'], 'This is a comment, be careful!') - - p.lightweight_cleaning = True - ret = p.remove_all() - self.assertTrue(ret) - - p = images.PNGParser('./tests/data/clean.cleaned.png') - self.assertEqual(p.get_meta(), {}) - - os.remove('./tests/data/clean.png') - os.remove('./tests/data/clean.cleaned.png') - class TestCleaning(unittest.TestCase): def test_pdf(self): shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf') diff --git a/tests/test_lightweigh_cleaning.py b/tests/test_lightweigh_cleaning.py new file mode 100644 index 0000000..7af31ad --- /dev/null +++ b/tests/test_lightweigh_cleaning.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 + +import unittest +import shutil +import os + +from libmat2 import pdf, images + +class TestLightWeightCleaning(unittest.TestCase): + def test_pdf(self): + shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf') + p = pdf.PDFParser('./tests/data/clean.pdf') + + meta = p.get_meta() + self.assertEqual(meta['producer'], 'pdfTeX-1.40.14') + + p.lightweight_cleaning = True + ret = p.remove_all() + self.assertTrue(ret) + + p = pdf.PDFParser('./tests/data/clean.cleaned.pdf') + expected_meta = {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1} + self.assertEqual(p.get_meta(), expected_meta) + + os.remove('./tests/data/clean.pdf') + os.remove('./tests/data/clean.cleaned.pdf') + + def test_png(self): + shutil.copy('./tests/data/dirty.png', './tests/data/clean.png') + p = images.PNGParser('./tests/data/clean.png') + + meta = p.get_meta() + self.assertEqual(meta['Comment'], 'This is a comment, be careful!') + + p.lightweight_cleaning = True + ret = p.remove_all() + self.assertTrue(ret) + + p = images.PNGParser('./tests/data/clean.cleaned.png') + self.assertEqual(p.get_meta(), {}) + + p = images.PNGParser('./tests/data/clean.png') + p.lightweight_cleaning = True + ret = p.remove_all() + self.assertTrue(ret) + + os.remove('./tests/data/clean.png') + os.remove('./tests/data/clean.cleaned.png') + + def test_jpg(self): + shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg') + p = images.JPGParser('./tests/data/clean.jpg') + + meta = p.get_meta() + self.assertEqual(meta['Comment'], 'Created with GIMP') + + p.lightweight_cleaning = True + ret = p.remove_all() + self.assertTrue(ret) + + p = images.JPGParser('./tests/data/clean.cleaned.jpg') + self.assertEqual(p.get_meta(), {}) + + os.remove('./tests/data/clean.jpg') + os.remove('./tests/data/clean.cleaned.jpg') -- cgit v1.3