summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjvoisin2019-08-31 10:31:08 -0700
committerjvoisin2019-08-31 10:31:08 -0700
commit40669186c937a36fa73c16d5bba0f343005f398d (patch)
tree015fbf9b6befa9563f5e01726163fca129a082bf
parentd76a6cbb1832fb3337171edeb38e38b498ba4ef9 (diff)
Add support for inplace cleaning
-rwxr-xr-xmat217
-rw-r--r--tests/test_climat2.py35
2 files changed, 46 insertions, 6 deletions
diff --git a/mat2 b/mat2
index 70712b8..b9f02f2 100755
--- a/mat2
+++ b/mat2
@@ -53,6 +53,8 @@ def create_arg_parser() -> argparse.ArgumentParser:
53 help='how to handle unknown members of archive-style ' 53 help='how to handle unknown members of archive-style '
54 'files (policy should be one of: %s) [Default: abort]' % 54 'files (policy should be one of: %s) [Default: abort]' %
55 ', '.join(p.value for p in UnknownMemberPolicy)) 55 ', '.join(p.value for p in UnknownMemberPolicy))
56 parser.add_argument('--inplace', action='store_true',
57 help='clean in place, without backup')
56 58
57 excl_group = parser.add_mutually_exclusive_group() 59 excl_group = parser.add_mutually_exclusive_group()
58 excl_group.add_argument('files', nargs='*', help='the files to process', 60 excl_group.add_argument('files', nargs='*', help='the files to process',
@@ -114,8 +116,10 @@ def __print_meta(filename: str, metadata: dict, depth: int = 1):
114 print(padding + " %s: harmful content" % k) 116 print(padding + " %s: harmful content" % k)
115 117
116 118
117def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy) -> bool: 119def clean_meta(filename: str, is_lightweight: bool, inplace: bool,
118 if not __check_file(filename, os.R_OK): 120 policy: UnknownMemberPolicy) -> bool:
121 mode = (os.R_OK | os.W_OK) if inplace else os.R_OK
122 if not __check_file(filename, mode):
119 return False 123 return False
120 124
121 p, mtype = parser_factory.get_parser(filename) # type: ignore 125 p, mtype = parser_factory.get_parser(filename) # type: ignore
@@ -127,7 +131,10 @@ def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy)
127 131
128 try: 132 try:
129 logging.debug('Cleaning %s…', filename) 133 logging.debug('Cleaning %s…', filename)
130 return p.remove_all() 134 ret = p.remove_all()
135 if inplace is True:
136 os.rename(p.output_filename, filename)
137 return ret
131 except RuntimeError as e: 138 except RuntimeError as e:
132 print("[-] %s can't be cleaned: %s" % (filename, e)) 139 print("[-] %s can't be cleaned: %s" % (filename, e))
133 return False 140 return False
@@ -190,6 +197,7 @@ def main() -> int:
190 return 0 197 return 0
191 198
192 else: 199 else:
200 inplace = args.inplace
193 policy = UnknownMemberPolicy(args.unknown_members) 201 policy = UnknownMemberPolicy(args.unknown_members)
194 if policy == UnknownMemberPolicy.KEEP: 202 if policy == UnknownMemberPolicy.KEEP:
195 logging.warning('Keeping unknown member files may leak metadata in the resulting file!') 203 logging.warning('Keeping unknown member files may leak metadata in the resulting file!')
@@ -201,7 +209,8 @@ def main() -> int:
201 with concurrent.futures.ProcessPoolExecutor() as executor: 209 with concurrent.futures.ProcessPoolExecutor() as executor:
202 futures = list() 210 futures = list()
203 for f in files: 211 for f in files:
204 future = executor.submit(clean_meta, f, args.lightweight, policy) 212 future = executor.submit(clean_meta, f, args.lightweight,
213 inplace, policy)
205 futures.append(future) 214 futures.append(future)
206 for future in concurrent.futures.as_completed(futures): 215 for future in concurrent.futures.as_completed(futures):
207 no_failure &= future.result() 216 no_failure &= future.result()
diff --git a/tests/test_climat2.py b/tests/test_climat2.py
index bbb9c06..6cf8a39 100644
--- a/tests/test_climat2.py
+++ b/tests/test_climat2.py
@@ -20,7 +20,7 @@ class TestHelp(unittest.TestCase):
20 def test_help(self): 20 def test_help(self):
21 proc = subprocess.Popen(mat2_binary + ['--help'], stdout=subprocess.PIPE) 21 proc = subprocess.Popen(mat2_binary + ['--help'], stdout=subprocess.PIPE)
22 stdout, _ = proc.communicate() 22 stdout, _ = proc.communicate()
23 self.assertIn(b'mat2 [-h] [-V] [--unknown-members policy] [-v] [-l]', 23 self.assertIn(b'mat2 [-h] [-V] [--unknown-members policy] [--inplace] [-v] [-l]',
24 stdout) 24 stdout)
25 self.assertIn(b'[--check-dependencies] [-L | -s]', stdout) 25 self.assertIn(b'[--check-dependencies] [-L | -s]', stdout)
26 self.assertIn(b'[files [files ...]]', stdout) 26 self.assertIn(b'[files [files ...]]', stdout)
@@ -28,7 +28,7 @@ class TestHelp(unittest.TestCase):
28 def test_no_arg(self): 28 def test_no_arg(self):
29 proc = subprocess.Popen(mat2_binary, stdout=subprocess.PIPE) 29 proc = subprocess.Popen(mat2_binary, stdout=subprocess.PIPE)
30 stdout, _ = proc.communicate() 30 stdout, _ = proc.communicate()
31 self.assertIn(b'mat2 [-h] [-V] [--unknown-members policy] [-v] [-l]', 31 self.assertIn(b'mat2 [-h] [-V] [--unknown-members policy] [--inplace] [-v] [-l]',
32 stdout) 32 stdout)
33 self.assertIn(b'[--check-dependencies] [-L | -s]', stdout) 33 self.assertIn(b'[--check-dependencies] [-L | -s]', stdout)
34 self.assertIn(b'[files [files ...]]', stdout) 34 self.assertIn(b'[files [files ...]]', stdout)
@@ -241,3 +241,34 @@ class TestCommandLineParallel(unittest.TestCase):
241 os.remove('./tests/data/dirty_%d.cleaned.jpg' % i) 241 os.remove('./tests/data/dirty_%d.cleaned.jpg' % i)
242 os.remove(path) 242 os.remove(path)
243 os.remove('./tests/data/dirty_%d.docx' % i) 243 os.remove('./tests/data/dirty_%d.docx' % i)
244
245class TestInplaceCleaning(unittest.TestCase):
246 def test_cleaning(self):
247 shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
248 proc = subprocess.Popen(mat2_binary + ['--inplace', './tests/data/clean.jpg'],
249 stdout=subprocess.PIPE)
250 stdout, _ = proc.communicate()
251 proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/clean.jpg'],
252 stdout=subprocess.PIPE)
253 stdout, _ = proc.communicate()
254 self.assertIn(b' No metadata found in ./tests/data/clean.jpg.\n', stdout)
255 os.remove('./tests/data/clean.jpg')
256
257 def test_cleaning_multiple_one_fails(self):
258 files = ['./tests/data/clean_%d.jpg' % i for i in range(9)]
259 for f in files:
260 shutil.copy('./tests/data/dirty.jpg', f)
261 shutil.copy('./tests/data/dirty.torrent', './tests/data/clean_9.jpg')
262
263 proc = subprocess.Popen(mat2_binary + ['--inplace'] + files,
264 stdout=subprocess.PIPE)
265 stdout, _ = proc.communicate()
266
267 for f in files:
268 p = images.JPGParser(f)
269 meta = p.get_meta()
270 self.assertEqual(meta, {})
271
272 for i in range(10):
273 os.remove('./tests/data/clean_%d.jpg' % i)
274