From 82cc822a1dc7090f7a6af977ed6d4b7b945d038a Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sat, 27 Apr 2019 04:05:36 -0700 Subject: Add tar archive support --- tests/test_corrupted_files.py | 29 +++++++++++++++++++++++-- tests/test_libmat2.py | 50 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 2 deletions(-) (limited to 'tests') diff --git a/tests/test_corrupted_files.py b/tests/test_corrupted_files.py index 4a16d51..1331f1c 100644 --- a/tests/test_corrupted_files.py +++ b/tests/test_corrupted_files.py @@ -1,13 +1,15 @@ #!/usr/bin/env python3 import unittest +import time import shutil import os import logging import zipfile +import tarfile from libmat2 import pdf, images, audio, office, parser_factory, torrent -from libmat2 import harmless, video, web +from libmat2 import harmless, video, web, archive # No need to logging messages, should something go wrong, # the testsuite _will_ fail. @@ -278,7 +280,6 @@ class TestCorruptedFiles(unittest.TestCase): p.remove_all() os.remove('./tests/data/clean.html') - def test_epub(self): with zipfile.ZipFile('./tests/data/clean.epub', 'w') as zout: zout.write('./tests/data/dirty.jpg', 'OEBPS/content.opf') @@ -291,3 +292,27 @@ class TestCorruptedFiles(unittest.TestCase): self.assertFalse(p.remove_all()) os.remove('./tests/data/clean.epub') + def test_tar(self): + with tarfile.TarFile('./tests/data/clean.tar', 'w') as zout: + zout.add('./tests/data/dirty.flac') + zout.add('./tests/data/dirty.docx') + zout.add('./tests/data/dirty.jpg') + zout.add('./tests/data/embedded_corrupted.docx') + tarinfo = tarfile.TarInfo(name='./tests/data/dirty.png') + tarinfo.mtime = time.time() + tarinfo.uid = 1337 + tarinfo.gid = 1338 + with open('./tests/data/dirty.png', 'rb') as f: + zout.addfile(tarinfo, f) + p, mimetype = parser_factory.get_parser('./tests/data/clean.tar') + self.assertEqual(mimetype, 'application/x-tar') + meta = p.get_meta() + self.assertEqual(meta['./tests/data/dirty.flac']['comments'], 'Thank you for using MAT !') + self.assertEqual(meta['./tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') + self.assertFalse(p.remove_all()) + os.remove('./tests/data/clean.tar') + + shutil.copy('./tests/data/dirty.png', './tests/data/clean.tar') + with self.assertRaises(ValueError): + archive.TarParser('./tests/data/clean.tar') + os.remove('./tests/data/clean.tar') diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index 397855e..1d2a22a 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -4,6 +4,8 @@ import unittest import shutil import os import re +import tarfile +import tempfile import zipfile from libmat2 import pdf, images, audio, office, parser_factory, torrent, harmless @@ -195,6 +197,19 @@ class TestGetMeta(unittest.TestCase): self.assertEqual(meta['version'], '1.0') self.assertEqual(meta['harmful data'], 'underline is cool') + def test_tar(self): + with tarfile.TarFile('./tests/data/dirty.tar', 'w') as tout: + tout.add('./tests/data/dirty.flac') + tout.add('./tests/data/dirty.docx') + tout.add('./tests/data/dirty.jpg') + p, mimetype = parser_factory.get_parser('./tests/data/dirty.tar') + self.assertEqual(mimetype, 'application/x-tar') + meta = p.get_meta() + self.assertEqual(meta['./tests/data/dirty.flac']['comments'], 'Thank you for using MAT !') + self.assertEqual(meta['./tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') + os.remove('./tests/data/dirty.tar') + + class TestRemovingThumbnails(unittest.TestCase): def test_odt(self): shutil.copy('./tests/data/revision.odt', './tests/data/clean.odt') @@ -702,3 +717,38 @@ class TestCleaning(unittest.TestCase): os.remove('./tests/data/clean.css') os.remove('./tests/data/clean.cleaned.css') os.remove('./tests/data/clean.cleaned.cleaned.css') + + def test_tar(self): + with tarfile.TarFile('./tests/data/dirty.tar', 'w') as zout: + zout.add('./tests/data/dirty.flac') + zout.add('./tests/data/dirty.docx') + zout.add('./tests/data/dirty.jpg') + p = archive.TarParser('./tests/data/dirty.tar') + meta = p.get_meta() + self.assertEqual(meta['./tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') + + ret = p.remove_all() + self.assertTrue(ret) + + p = archive.TarParser('./tests/data/dirty.cleaned.tar') + self.assertEqual(p.get_meta(), {}) + self.assertTrue(p.remove_all()) + + tmp_dir = tempfile.mkdtemp() + with tarfile.open('./tests/data/dirty.cleaned.tar') as zout: + zout.extractall(path=tmp_dir) + zout.close() + + number_of_files = 0 + for root, _, fnames in os.walk(tmp_dir): + for f in fnames: + complete_path = os.path.join(root, f) + p, _ = parser_factory.get_parser(complete_path) + self.assertIsNotNone(p) + self.assertEqual(p.get_meta(), {}) + number_of_files += 1 + self.assertEqual(number_of_files, 3) + + os.remove('./tests/data/dirty.tar') + os.remove('./tests/data/dirty.cleaned.tar') + os.remove('./tests/data/dirty.cleaned.cleaned.tar') -- cgit v1.3