diff options
| author | jvoisin | 2019-04-27 04:05:36 -0700 |
|---|---|---|
| committer | jvoisin | 2019-04-27 04:05:36 -0700 |
| commit | 82cc822a1dc7090f7a6af977ed6d4b7b945d038a (patch) | |
| tree | 8ab4dd83c074395f18e4b53730fd4a62edbffa02 /tests | |
| parent | 20ed5eb7d665ac9cb8b33929b4898c0a837fdb66 (diff) | |
Add tar archive support
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/test_corrupted_files.py | 29 | ||||
| -rw-r--r-- | tests/test_libmat2.py | 50 |
2 files changed, 77 insertions, 2 deletions
diff --git a/tests/test_corrupted_files.py b/tests/test_corrupted_files.py index 4a16d51..1331f1c 100644 --- a/tests/test_corrupted_files.py +++ b/tests/test_corrupted_files.py | |||
| @@ -1,13 +1,15 @@ | |||
| 1 | #!/usr/bin/env python3 | 1 | #!/usr/bin/env python3 |
| 2 | 2 | ||
| 3 | import unittest | 3 | import unittest |
| 4 | import time | ||
| 4 | import shutil | 5 | import shutil |
| 5 | import os | 6 | import os |
| 6 | import logging | 7 | import logging |
| 7 | import zipfile | 8 | import zipfile |
| 9 | import tarfile | ||
| 8 | 10 | ||
| 9 | from libmat2 import pdf, images, audio, office, parser_factory, torrent | 11 | from libmat2 import pdf, images, audio, office, parser_factory, torrent |
| 10 | from libmat2 import harmless, video, web | 12 | from libmat2 import harmless, video, web, archive |
| 11 | 13 | ||
| 12 | # No need to logging messages, should something go wrong, | 14 | # No need to logging messages, should something go wrong, |
| 13 | # the testsuite _will_ fail. | 15 | # the testsuite _will_ fail. |
| @@ -278,7 +280,6 @@ class TestCorruptedFiles(unittest.TestCase): | |||
| 278 | p.remove_all() | 280 | p.remove_all() |
| 279 | os.remove('./tests/data/clean.html') | 281 | os.remove('./tests/data/clean.html') |
| 280 | 282 | ||
| 281 | |||
| 282 | def test_epub(self): | 283 | def test_epub(self): |
| 283 | with zipfile.ZipFile('./tests/data/clean.epub', 'w') as zout: | 284 | with zipfile.ZipFile('./tests/data/clean.epub', 'w') as zout: |
| 284 | zout.write('./tests/data/dirty.jpg', 'OEBPS/content.opf') | 285 | zout.write('./tests/data/dirty.jpg', 'OEBPS/content.opf') |
| @@ -291,3 +292,27 @@ class TestCorruptedFiles(unittest.TestCase): | |||
| 291 | self.assertFalse(p.remove_all()) | 292 | self.assertFalse(p.remove_all()) |
| 292 | os.remove('./tests/data/clean.epub') | 293 | os.remove('./tests/data/clean.epub') |
| 293 | 294 | ||
| 295 | def test_tar(self): | ||
| 296 | with tarfile.TarFile('./tests/data/clean.tar', 'w') as zout: | ||
| 297 | zout.add('./tests/data/dirty.flac') | ||
| 298 | zout.add('./tests/data/dirty.docx') | ||
| 299 | zout.add('./tests/data/dirty.jpg') | ||
| 300 | zout.add('./tests/data/embedded_corrupted.docx') | ||
| 301 | tarinfo = tarfile.TarInfo(name='./tests/data/dirty.png') | ||
| 302 | tarinfo.mtime = time.time() | ||
| 303 | tarinfo.uid = 1337 | ||
| 304 | tarinfo.gid = 1338 | ||
| 305 | with open('./tests/data/dirty.png', 'rb') as f: | ||
| 306 | zout.addfile(tarinfo, f) | ||
| 307 | p, mimetype = parser_factory.get_parser('./tests/data/clean.tar') | ||
| 308 | self.assertEqual(mimetype, 'application/x-tar') | ||
| 309 | meta = p.get_meta() | ||
| 310 | self.assertEqual(meta['./tests/data/dirty.flac']['comments'], 'Thank you for using MAT !') | ||
| 311 | self.assertEqual(meta['./tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') | ||
| 312 | self.assertFalse(p.remove_all()) | ||
| 313 | os.remove('./tests/data/clean.tar') | ||
| 314 | |||
| 315 | shutil.copy('./tests/data/dirty.png', './tests/data/clean.tar') | ||
| 316 | with self.assertRaises(ValueError): | ||
| 317 | archive.TarParser('./tests/data/clean.tar') | ||
| 318 | os.remove('./tests/data/clean.tar') | ||
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index 397855e..1d2a22a 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py | |||
| @@ -4,6 +4,8 @@ import unittest | |||
| 4 | import shutil | 4 | import shutil |
| 5 | import os | 5 | import os |
| 6 | import re | 6 | import re |
| 7 | import tarfile | ||
| 8 | import tempfile | ||
| 7 | import zipfile | 9 | import zipfile |
| 8 | 10 | ||
| 9 | from libmat2 import pdf, images, audio, office, parser_factory, torrent, harmless | 11 | from libmat2 import pdf, images, audio, office, parser_factory, torrent, harmless |
| @@ -195,6 +197,19 @@ class TestGetMeta(unittest.TestCase): | |||
| 195 | self.assertEqual(meta['version'], '1.0') | 197 | self.assertEqual(meta['version'], '1.0') |
| 196 | self.assertEqual(meta['harmful data'], 'underline is cool') | 198 | self.assertEqual(meta['harmful data'], 'underline is cool') |
| 197 | 199 | ||
| 200 | def test_tar(self): | ||
| 201 | with tarfile.TarFile('./tests/data/dirty.tar', 'w') as tout: | ||
| 202 | tout.add('./tests/data/dirty.flac') | ||
| 203 | tout.add('./tests/data/dirty.docx') | ||
| 204 | tout.add('./tests/data/dirty.jpg') | ||
| 205 | p, mimetype = parser_factory.get_parser('./tests/data/dirty.tar') | ||
| 206 | self.assertEqual(mimetype, 'application/x-tar') | ||
| 207 | meta = p.get_meta() | ||
| 208 | self.assertEqual(meta['./tests/data/dirty.flac']['comments'], 'Thank you for using MAT !') | ||
| 209 | self.assertEqual(meta['./tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') | ||
| 210 | os.remove('./tests/data/dirty.tar') | ||
| 211 | |||
| 212 | |||
| 198 | class TestRemovingThumbnails(unittest.TestCase): | 213 | class TestRemovingThumbnails(unittest.TestCase): |
| 199 | def test_odt(self): | 214 | def test_odt(self): |
| 200 | shutil.copy('./tests/data/revision.odt', './tests/data/clean.odt') | 215 | shutil.copy('./tests/data/revision.odt', './tests/data/clean.odt') |
| @@ -702,3 +717,38 @@ class TestCleaning(unittest.TestCase): | |||
| 702 | os.remove('./tests/data/clean.css') | 717 | os.remove('./tests/data/clean.css') |
| 703 | os.remove('./tests/data/clean.cleaned.css') | 718 | os.remove('./tests/data/clean.cleaned.css') |
| 704 | os.remove('./tests/data/clean.cleaned.cleaned.css') | 719 | os.remove('./tests/data/clean.cleaned.cleaned.css') |
| 720 | |||
| 721 | def test_tar(self): | ||
| 722 | with tarfile.TarFile('./tests/data/dirty.tar', 'w') as zout: | ||
| 723 | zout.add('./tests/data/dirty.flac') | ||
| 724 | zout.add('./tests/data/dirty.docx') | ||
| 725 | zout.add('./tests/data/dirty.jpg') | ||
| 726 | p = archive.TarParser('./tests/data/dirty.tar') | ||
| 727 | meta = p.get_meta() | ||
| 728 | self.assertEqual(meta['./tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') | ||
| 729 | |||
| 730 | ret = p.remove_all() | ||
| 731 | self.assertTrue(ret) | ||
| 732 | |||
| 733 | p = archive.TarParser('./tests/data/dirty.cleaned.tar') | ||
| 734 | self.assertEqual(p.get_meta(), {}) | ||
| 735 | self.assertTrue(p.remove_all()) | ||
| 736 | |||
| 737 | tmp_dir = tempfile.mkdtemp() | ||
| 738 | with tarfile.open('./tests/data/dirty.cleaned.tar') as zout: | ||
| 739 | zout.extractall(path=tmp_dir) | ||
| 740 | zout.close() | ||
| 741 | |||
| 742 | number_of_files = 0 | ||
| 743 | for root, _, fnames in os.walk(tmp_dir): | ||
| 744 | for f in fnames: | ||
| 745 | complete_path = os.path.join(root, f) | ||
| 746 | p, _ = parser_factory.get_parser(complete_path) | ||
| 747 | self.assertIsNotNone(p) | ||
| 748 | self.assertEqual(p.get_meta(), {}) | ||
| 749 | number_of_files += 1 | ||
| 750 | self.assertEqual(number_of_files, 3) | ||
| 751 | |||
| 752 | os.remove('./tests/data/dirty.tar') | ||
| 753 | os.remove('./tests/data/dirty.cleaned.tar') | ||
| 754 | os.remove('./tests/data/dirty.cleaned.cleaned.tar') | ||
