diff options
| -rw-r--r-- | libmat2/parser_factory.py | 13 | ||||
| -rwxr-xr-x | mat2 | 12 | ||||
| -rw-r--r-- | tests/test_corrupted_files.py | 26 |
3 files changed, 30 insertions, 21 deletions
diff --git a/libmat2/parser_factory.py b/libmat2/parser_factory.py index 23529db..842f0d7 100644 --- a/libmat2/parser_factory.py +++ b/libmat2/parser_factory.py | |||
| @@ -40,7 +40,10 @@ def _get_parsers() -> List[T]: | |||
| 40 | 40 | ||
| 41 | 41 | ||
| 42 | def get_parser(filename: str) -> Tuple[Optional[T], Optional[str]]: | 42 | def get_parser(filename: str) -> Tuple[Optional[T], Optional[str]]: |
| 43 | """ Return the appropriate parser for a given filename. """ | 43 | """ Return the appropriate parser for a given filename. |
| 44 | |||
| 45 | :raises ValueError: Raised if the instantiation of the parser went wrong. | ||
| 46 | """ | ||
| 44 | mtype, _ = mimetypes.guess_type(filename) | 47 | mtype, _ = mimetypes.guess_type(filename) |
| 45 | 48 | ||
| 46 | _, extension = os.path.splitext(filename) | 49 | _, extension = os.path.splitext(filename) |
| @@ -53,10 +56,6 @@ def get_parser(filename: str) -> Tuple[Optional[T], Optional[str]]: | |||
| 53 | 56 | ||
| 54 | for parser_class in _get_parsers(): # type: ignore | 57 | for parser_class in _get_parsers(): # type: ignore |
| 55 | if mtype in parser_class.mimetypes: | 58 | if mtype in parser_class.mimetypes: |
| 56 | try: | 59 | # This instantiation might raise a ValueError on malformed files |
| 57 | return parser_class(filename), mtype | 60 | return parser_class(filename), mtype |
| 58 | except ValueError as e: | ||
| 59 | logging.info("Got an exception when trying to instantiate " | ||
| 60 | "%s for %s: %s", parser_class, filename, e) | ||
| 61 | return None, mtype | ||
| 62 | return None, mtype | 61 | return None, mtype |
| @@ -85,7 +85,11 @@ def show_meta(filename: str, sandbox: bool): | |||
| 85 | if not __check_file(filename): | 85 | if not __check_file(filename): |
| 86 | return | 86 | return |
| 87 | 87 | ||
| 88 | p, mtype = parser_factory.get_parser(filename) # type: ignore | 88 | try: |
| 89 | p, mtype = parser_factory.get_parser(filename) # type: ignore | ||
| 90 | except ValueError as e: | ||
| 91 | print("[-] something went wrong when processing %s: %s" % (filename, e)) | ||
| 92 | return | ||
| 89 | if p is None: | 93 | if p is None: |
| 90 | print("[-] %s's format (%s) is not supported" % (filename, mtype)) | 94 | print("[-] %s's format (%s) is not supported" % (filename, mtype)) |
| 91 | return | 95 | return |
| @@ -126,7 +130,11 @@ def clean_meta(filename: str, is_lightweight: bool, inplace: bool, sandbox: bool | |||
| 126 | if not __check_file(filename, mode): | 130 | if not __check_file(filename, mode): |
| 127 | return False | 131 | return False |
| 128 | 132 | ||
| 129 | p, mtype = parser_factory.get_parser(filename) # type: ignore | 133 | try: |
| 134 | p, mtype = parser_factory.get_parser(filename) # type: ignore | ||
| 135 | except ValueError as e: | ||
| 136 | print("[-] something went wrong when cleaning %s: %s" % (filename, e)) | ||
| 137 | return False | ||
| 130 | if p is None: | 138 | if p is None: |
| 131 | print("[-] %s's format (%s) is not supported" % (filename, mtype)) | 139 | print("[-] %s's format (%s) is not supported" % (filename, mtype)) |
| 132 | return False | 140 | return False |
diff --git a/tests/test_corrupted_files.py b/tests/test_corrupted_files.py index 8a8cffe..2adf42e 100644 --- a/tests/test_corrupted_files.py +++ b/tests/test_corrupted_files.py | |||
| @@ -65,8 +65,10 @@ class TestCorruptedEmbedded(unittest.TestCase): | |||
| 65 | def test_docx(self): | 65 | def test_docx(self): |
| 66 | shutil.copy('./tests/data/embedded_corrupted.docx', './tests/data/clean.docx') | 66 | shutil.copy('./tests/data/embedded_corrupted.docx', './tests/data/clean.docx') |
| 67 | parser, _ = parser_factory.get_parser('./tests/data/clean.docx') | 67 | parser, _ = parser_factory.get_parser('./tests/data/clean.docx') |
| 68 | self.assertFalse(parser.remove_all()) | 68 | with self.assertRaises(ValueError): |
| 69 | self.assertIsNotNone(parser.get_meta()) | 69 | parser.remove_all() |
| 70 | with self.assertRaises(ValueError): | ||
| 71 | self.assertIsNotNone(parser.get_meta()) | ||
| 70 | os.remove('./tests/data/clean.docx') | 72 | os.remove('./tests/data/clean.docx') |
| 71 | 73 | ||
| 72 | def test_odt(self): | 74 | def test_odt(self): |
| @@ -120,8 +122,8 @@ class TestCorruptedFiles(unittest.TestCase): | |||
| 120 | 122 | ||
| 121 | def test_png2(self): | 123 | def test_png2(self): |
| 122 | shutil.copy('./tests/test_libmat2.py', './tests/clean.png') | 124 | shutil.copy('./tests/test_libmat2.py', './tests/clean.png') |
| 123 | parser, _ = parser_factory.get_parser('./tests/clean.png') | 125 | with self.assertRaises(ValueError): |
| 124 | self.assertIsNone(parser) | 126 | parser_factory.get_parser('./tests/clean.png') |
| 125 | os.remove('./tests/clean.png') | 127 | os.remove('./tests/clean.png') |
| 126 | 128 | ||
| 127 | def test_torrent(self): | 129 | def test_torrent(self): |
| @@ -237,10 +239,10 @@ class TestCorruptedFiles(unittest.TestCase): | |||
| 237 | zout.write('./tests/data/embedded_corrupted.docx') | 239 | zout.write('./tests/data/embedded_corrupted.docx') |
| 238 | p, mimetype = parser_factory.get_parser('./tests/data/clean.zip') | 240 | p, mimetype = parser_factory.get_parser('./tests/data/clean.zip') |
| 239 | self.assertEqual(mimetype, 'application/zip') | 241 | self.assertEqual(mimetype, 'application/zip') |
| 240 | meta = p.get_meta() | 242 | with self.assertRaises(ValueError): |
| 241 | self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !') | 243 | p.get_meta() |
| 242 | self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') | 244 | with self.assertRaises(ValueError): |
| 243 | self.assertFalse(p.remove_all()) | 245 | self.assertFalse(p.remove_all()) |
| 244 | os.remove('./tests/data/clean.zip') | 246 | os.remove('./tests/data/clean.zip') |
| 245 | 247 | ||
| 246 | def test_html(self): | 248 | def test_html(self): |
| @@ -315,10 +317,10 @@ class TestCorruptedFiles(unittest.TestCase): | |||
| 315 | zout.addfile(tarinfo, f) | 317 | zout.addfile(tarinfo, f) |
| 316 | p, mimetype = parser_factory.get_parser('./tests/data/clean.tar') | 318 | p, mimetype = parser_factory.get_parser('./tests/data/clean.tar') |
| 317 | self.assertEqual(mimetype, 'application/x-tar') | 319 | self.assertEqual(mimetype, 'application/x-tar') |
| 318 | meta = p.get_meta() | 320 | with self.assertRaises(ValueError): |
| 319 | self.assertEqual(meta['./tests/data/dirty.flac']['comments'], 'Thank you for using MAT !') | 321 | p.get_meta() |
| 320 | self.assertEqual(meta['./tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') | 322 | with self.assertRaises(ValueError): |
| 321 | self.assertFalse(p.remove_all()) | 323 | self.assertFalse(p.remove_all()) |
| 322 | os.remove('./tests/data/clean.tar') | 324 | os.remove('./tests/data/clean.tar') |
| 323 | 325 | ||
| 324 | shutil.copy('./tests/data/dirty.png', './tests/data/clean.tar') | 326 | shutil.copy('./tests/data/dirty.png', './tests/data/clean.tar') |
