summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libmat2/abstract.py8
-rw-r--r--libmat2/pdf.py9
-rwxr-xr-xmat23
-rw-r--r--tests/test_libmat2.py6
4 files changed, 13 insertions, 13 deletions
diff --git a/libmat2/abstract.py b/libmat2/abstract.py
index cd72f2c..5bcaa69 100644
--- a/libmat2/abstract.py
+++ b/libmat2/abstract.py
@@ -19,6 +19,7 @@ class AbstractParser(abc.ABC):
19 self.filename = filename 19 self.filename = filename
20 fname, extension = os.path.splitext(filename) 20 fname, extension = os.path.splitext(filename)
21 self.output_filename = fname + '.cleaned' + extension 21 self.output_filename = fname + '.cleaned' + extension
22 self.lightweight_cleaning = False
22 23
23 @abc.abstractmethod 24 @abc.abstractmethod
24 def get_meta(self) -> Dict[str, str]: 25 def get_meta(self) -> Dict[str, str]:
@@ -27,10 +28,3 @@ class AbstractParser(abc.ABC):
27 @abc.abstractmethod 28 @abc.abstractmethod
28 def remove_all(self) -> bool: 29 def remove_all(self) -> bool:
29 pass # pragma: no cover 30 pass # pragma: no cover
30
31 def remove_all_lightweight(self) -> bool:
32 """ This method removes _SOME_ metadata.
33 It might be useful to implement it for fileformats that do
34 not support non-destructive cleaning.
35 """
36 return self.remove_all()
diff --git a/libmat2/pdf.py b/libmat2/pdf.py
index c8769aa..140b4f4 100644
--- a/libmat2/pdf.py
+++ b/libmat2/pdf.py
@@ -37,7 +37,12 @@ class PDFParser(abstract.AbstractParser):
37 except GLib.GError: # Invalid PDF 37 except GLib.GError: # Invalid PDF
38 raise ValueError 38 raise ValueError
39 39
40 def remove_all_lightweight(self): 40 def remove_all(self) -> bool:
41 if self.lightweight_cleaning is True:
42 return self.__remove_all_lightweight()
43 return self.__remove_all_thorough()
44
45 def __remove_all_lightweight(self) -> bool:
41 """ 46 """
42 Load the document into Poppler, render pages on a new PDFSurface. 47 Load the document into Poppler, render pages on a new PDFSurface.
43 """ 48 """
@@ -64,7 +69,7 @@ class PDFParser(abstract.AbstractParser):
64 69
65 return True 70 return True
66 71
67 def remove_all(self): 72 def __remove_all_thorough(self) -> bool:
68 """ 73 """
69 Load the document into Poppler, render pages on PNG, 74 Load the document into Poppler, render pages on PNG,
70 and shove those PNG into a new PDF. 75 and shove those PNG into a new PDF.
diff --git a/mat2 b/mat2
index b4a6033..ba1f0ac 100755
--- a/mat2
+++ b/mat2
@@ -94,8 +94,7 @@ def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy)
94 print("[-] %s's format (%s) is not supported" % (filename, mtype)) 94 print("[-] %s's format (%s) is not supported" % (filename, mtype))
95 return False 95 return False
96 p.unknown_member_policy = policy 96 p.unknown_member_policy = policy
97 if is_lightweight: 97 p.lightweight_cleaning = is_lightweight
98 return p.remove_all_lightweight()
99 return p.remove_all() 98 return p.remove_all()
100 99
101 100
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py
index 6a2af91..665bab0 100644
--- a/tests/test_libmat2.py
+++ b/tests/test_libmat2.py
@@ -190,7 +190,8 @@ class TestLightWeightCleaning(unittest.TestCase):
190 meta = p.get_meta() 190 meta = p.get_meta()
191 self.assertEqual(meta['producer'], 'pdfTeX-1.40.14') 191 self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
192 192
193 ret = p.remove_all_lightweight() 193 p.lightweight_cleaning = True
194 ret = p.remove_all()
194 self.assertTrue(ret) 195 self.assertTrue(ret)
195 196
196 p = pdf.PDFParser('./tests/data/clean.cleaned.pdf') 197 p = pdf.PDFParser('./tests/data/clean.cleaned.pdf')
@@ -207,7 +208,8 @@ class TestLightWeightCleaning(unittest.TestCase):
207 meta = p.get_meta() 208 meta = p.get_meta()
208 self.assertEqual(meta['Comment'], 'This is a comment, be careful!') 209 self.assertEqual(meta['Comment'], 'This is a comment, be careful!')
209 210
210 ret = p.remove_all_lightweight() 211 p.lightweight_cleaning = True
212 ret = p.remove_all()
211 self.assertTrue(ret) 213 self.assertTrue(ret)
212 214
213 p = images.PNGParser('./tests/data/clean.cleaned.png') 215 p = images.PNGParser('./tests/data/clean.cleaned.png')