diff options
| -rw-r--r-- | libmat2/abstract.py | 8 | ||||
| -rw-r--r-- | libmat2/pdf.py | 9 | ||||
| -rwxr-xr-x | mat2 | 3 | ||||
| -rw-r--r-- | tests/test_libmat2.py | 6 |
4 files changed, 13 insertions, 13 deletions
diff --git a/libmat2/abstract.py b/libmat2/abstract.py index cd72f2c..5bcaa69 100644 --- a/libmat2/abstract.py +++ b/libmat2/abstract.py | |||
| @@ -19,6 +19,7 @@ class AbstractParser(abc.ABC): | |||
| 19 | self.filename = filename | 19 | self.filename = filename |
| 20 | fname, extension = os.path.splitext(filename) | 20 | fname, extension = os.path.splitext(filename) |
| 21 | self.output_filename = fname + '.cleaned' + extension | 21 | self.output_filename = fname + '.cleaned' + extension |
| 22 | self.lightweight_cleaning = False | ||
| 22 | 23 | ||
| 23 | @abc.abstractmethod | 24 | @abc.abstractmethod |
| 24 | def get_meta(self) -> Dict[str, str]: | 25 | def get_meta(self) -> Dict[str, str]: |
| @@ -27,10 +28,3 @@ class AbstractParser(abc.ABC): | |||
| 27 | @abc.abstractmethod | 28 | @abc.abstractmethod |
| 28 | def remove_all(self) -> bool: | 29 | def remove_all(self) -> bool: |
| 29 | pass # pragma: no cover | 30 | pass # pragma: no cover |
| 30 | |||
| 31 | def remove_all_lightweight(self) -> bool: | ||
| 32 | """ This method removes _SOME_ metadata. | ||
| 33 | It might be useful to implement it for fileformats that do | ||
| 34 | not support non-destructive cleaning. | ||
| 35 | """ | ||
| 36 | return self.remove_all() | ||
diff --git a/libmat2/pdf.py b/libmat2/pdf.py index c8769aa..140b4f4 100644 --- a/libmat2/pdf.py +++ b/libmat2/pdf.py | |||
| @@ -37,7 +37,12 @@ class PDFParser(abstract.AbstractParser): | |||
| 37 | except GLib.GError: # Invalid PDF | 37 | except GLib.GError: # Invalid PDF |
| 38 | raise ValueError | 38 | raise ValueError |
| 39 | 39 | ||
| 40 | def remove_all_lightweight(self): | 40 | def remove_all(self) -> bool: |
| 41 | if self.lightweight_cleaning is True: | ||
| 42 | return self.__remove_all_lightweight() | ||
| 43 | return self.__remove_all_thorough() | ||
| 44 | |||
| 45 | def __remove_all_lightweight(self) -> bool: | ||
| 41 | """ | 46 | """ |
| 42 | Load the document into Poppler, render pages on a new PDFSurface. | 47 | Load the document into Poppler, render pages on a new PDFSurface. |
| 43 | """ | 48 | """ |
| @@ -64,7 +69,7 @@ class PDFParser(abstract.AbstractParser): | |||
| 64 | 69 | ||
| 65 | return True | 70 | return True |
| 66 | 71 | ||
| 67 | def remove_all(self): | 72 | def __remove_all_thorough(self) -> bool: |
| 68 | """ | 73 | """ |
| 69 | Load the document into Poppler, render pages on PNG, | 74 | Load the document into Poppler, render pages on PNG, |
| 70 | and shove those PNG into a new PDF. | 75 | and shove those PNG into a new PDF. |
| @@ -94,8 +94,7 @@ def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy) | |||
| 94 | print("[-] %s's format (%s) is not supported" % (filename, mtype)) | 94 | print("[-] %s's format (%s) is not supported" % (filename, mtype)) |
| 95 | return False | 95 | return False |
| 96 | p.unknown_member_policy = policy | 96 | p.unknown_member_policy = policy |
| 97 | if is_lightweight: | 97 | p.lightweight_cleaning = is_lightweight |
| 98 | return p.remove_all_lightweight() | ||
| 99 | return p.remove_all() | 98 | return p.remove_all() |
| 100 | 99 | ||
| 101 | 100 | ||
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index 6a2af91..665bab0 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py | |||
| @@ -190,7 +190,8 @@ class TestLightWeightCleaning(unittest.TestCase): | |||
| 190 | meta = p.get_meta() | 190 | meta = p.get_meta() |
| 191 | self.assertEqual(meta['producer'], 'pdfTeX-1.40.14') | 191 | self.assertEqual(meta['producer'], 'pdfTeX-1.40.14') |
| 192 | 192 | ||
| 193 | ret = p.remove_all_lightweight() | 193 | p.lightweight_cleaning = True |
| 194 | ret = p.remove_all() | ||
| 194 | self.assertTrue(ret) | 195 | self.assertTrue(ret) |
| 195 | 196 | ||
| 196 | p = pdf.PDFParser('./tests/data/clean.cleaned.pdf') | 197 | p = pdf.PDFParser('./tests/data/clean.cleaned.pdf') |
| @@ -207,7 +208,8 @@ class TestLightWeightCleaning(unittest.TestCase): | |||
| 207 | meta = p.get_meta() | 208 | meta = p.get_meta() |
| 208 | self.assertEqual(meta['Comment'], 'This is a comment, be careful!') | 209 | self.assertEqual(meta['Comment'], 'This is a comment, be careful!') |
| 209 | 210 | ||
| 210 | ret = p.remove_all_lightweight() | 211 | p.lightweight_cleaning = True |
| 212 | ret = p.remove_all() | ||
| 211 | self.assertTrue(ret) | 213 | self.assertTrue(ret) |
| 212 | 214 | ||
| 213 | p = images.PNGParser('./tests/data/clean.cleaned.png') | 215 | p = images.PNGParser('./tests/data/clean.cleaned.png') |
