summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xcli.py21
-rw-r--r--lib/mat.py10
-rw-r--r--lib/misc.py5
-rw-r--r--lib/parser.py28
4 files changed, 56 insertions, 8 deletions
diff --git a/cli.py b/cli.py
index f2bec52..1a8713d 100755
--- a/cli.py
+++ b/cli.py
@@ -18,7 +18,10 @@ def parse():
18 help='Check if a file is free of harmfull metadatas') 18 help='Check if a file is free of harmfull metadatas')
19 common.add_option('--version', action='callback', callback=displayVersion, 19 common.add_option('--version', action='callback', callback=displayVersion,
20 help='Display version and exit') 20 help='Display version and exit')
21 21 common.add_option('--backup', '-b', action='store_true', default=False,
22 help='Keep a backup copy')
23 common.add_option('--ugly', '-u', action='store_true', default=False,
24 help='Remove harmful meta, but with loss')
22 parser.add_option_group(common) 25 parser.add_option_group(common)
23 26
24 values, arguments = parser.parse_args() 27 values, arguments = parser.parse_args()
@@ -60,6 +63,18 @@ def clean_meta(class_file, filename):
60 class_file.remove_all() 63 class_file.remove_all()
61 print('%s cleaned !' % filename) 64 print('%s cleaned !' % filename)
62 65
66def clean_meta_ugly(class_file, filename):
67 '''
68 Clean the file 'filename', ugly way
69 '''
70 print('[+] Cleaning %s' % filename)
71 if class_file.is_clean():
72 print('%s is already clean' % filename)
73 else:
74 class_file.remove_all_ugly()
75 print('%s cleaned !' % filename)
76
77
63def main(): 78def main():
64 args, filenames = parse() 79 args, filenames = parse()
65 80
@@ -68,11 +83,13 @@ def main():
68 func = list_meta 83 func = list_meta
69 elif args.check is True: #only check if the file is clean 84 elif args.check is True: #only check if the file is clean
70 func = is_clean 85 func = is_clean
86 elif args.ugly is True:
87 func = clean_meta_ugly
71 else: #clean the file 88 else: #clean the file
72 func = clean_meta 89 func = clean_meta
73 90
74 for filename in filenames: 91 for filename in filenames:
75 class_file = mat.create_class_file(filename) 92 class_file = mat.create_class_file(filename, args.backup)
76 func(class_file, filename) 93 func(class_file, filename)
77 print('\n') 94 print('\n')
78 95
diff --git a/lib/mat.py b/lib/mat.py
index a9b8e17..6abcd64 100644
--- a/lib/mat.py
+++ b/lib/mat.py
@@ -14,7 +14,7 @@ import hachoir_editor
14import images 14import images
15import audio 15import audio
16import misc 16import misc
17import archive 17#import archive
18 18
19__version__ = "0.1" 19__version__ = "0.1"
20__author__ = "jvoisin" 20__author__ = "jvoisin"
@@ -24,10 +24,10 @@ strippers = {
24 hachoir_parser.image.PngFile: images.PngStripper, 24 hachoir_parser.image.PngFile: images.PngStripper,
25 hachoir_parser.audio.MpegAudioFile: audio.MpegAudioStripper, 25 hachoir_parser.audio.MpegAudioFile: audio.MpegAudioStripper,
26 hachoir_parser.misc.PDFDocument: misc.PdfStripper, 26 hachoir_parser.misc.PDFDocument: misc.PdfStripper,
27 hachoir_parser.archive.TarFile: archive.TarStripper, 27 #hachoir_parser.archive.TarFile: archive.TarStripper,
28} 28}
29 29
30def create_class_file(name): 30def create_class_file(name, backup):
31 ''' 31 '''
32 return a $FILETYPEStripper() class, 32 return a $FILETYPEStripper() class,
33 corresponding to the filetype of the given file 33 corresponding to the filetype of the given file
@@ -57,5 +57,5 @@ def create_class_file(name):
57 print("Don't have stripper for file type: %s" % editor.description) 57 print("Don't have stripper for file type: %s" % editor.description)
58 sys.exit(1) 58 sys.exit(1)
59 if editor.input.__class__ == hachoir_parser.misc.PDFDocument: 59 if editor.input.__class__ == hachoir_parser.misc.PDFDocument:
60 return stripper_class(filename) 60 return stripper_class(filename, backup)
61 return stripper_class(realname, filename, parser, editor) 61 return stripper_class(realname, filename, parser, editor, backup)
diff --git a/lib/misc.py b/lib/misc.py
index 56c2274..a8070f1 100644
--- a/lib/misc.py
+++ b/lib/misc.py
@@ -1,5 +1,6 @@
1import parser 1import parser
2import pdfrw 2import pdfrw
3import shutil
3 4
4class PdfStripper(parser.Generic_parser): 5class PdfStripper(parser.Generic_parser):
5 ''' 6 '''
@@ -23,6 +24,10 @@ class PdfStripper(parser.Generic_parser):
23 24
24 self.writer.trailer = self.trailer 25 self.writer.trailer = self.trailer
25 self.writer.write(self.filename + parser.POSTFIX) 26 self.writer.write(self.filename + parser.POSTFIX)
27 if self.backup is False:
28 self.secure_remove() #remove the old file
29 shutil.rename(self.filename+ POSTFIX, self.filename)#rename the new
30
26 31
27 def is_clean(self): 32 def is_clean(self):
28 ''' 33 '''
diff --git a/lib/parser.py b/lib/parser.py
index 1084de5..d629619 100644
--- a/lib/parser.py
+++ b/lib/parser.py
@@ -6,15 +6,27 @@ import hachoir_core.error
6import hachoir_parser 6import hachoir_parser
7import hachoir_editor 7import hachoir_editor
8import sys 8import sys
9import shutil
9 10
10POSTFIX = ".cleaned" 11POSTFIX = ".cleaned"
11 12
12class Generic_parser(): 13class Generic_parser():
13 def __init__(self, realname, filename, parser, editor): 14 def __init__(self, realname, filename, parser, editor, backup):
14 self.filename = filename 15 self.filename = filename
15 self.realname = realname 16 self.realname = realname
16 self.parser = parser 17 self.parser = parser
17 self.editor = editor 18 self.editor = editor
19 self.backup = backup
20
21 def secure_remove(self):
22 '''
23 securely remove the file
24 '''
25 #FIXME : not secure at all !
26 try:
27 shutil.rmtree(self.filename)
28 except:
29 print('Unable to remove %s' % self.filename)
18 30
19 def is_clean(self): 31 def is_clean(self):
20 ''' 32 '''
@@ -33,6 +45,20 @@ class Generic_parser():
33 if self._should_remove(field): 45 if self._should_remove(field):
34 self._remove(field) 46 self._remove(field)
35 hachoir_core.field.writeIntoFile(self.editor, self.filename + POSTFIX) 47 hachoir_core.field.writeIntoFile(self.editor, self.filename + POSTFIX)
48 if self.backup is False:
49 self.secure_remove() #remove the old file
50 shutil.rename(self.filename+ POSTFIX, self.filename)#rename the new
51
52 def remove_all_ugly(self):
53 '''
54 If the remove_all() is not efficient enough,
55 this method is implemented :
56 It is efficient, but destructive.
57 In a perfect world, with nice fileformat,
58 this method does not exist.
59 '''
60 raise NotImplementedError()
61
36 62
37 def _remove(self, field): 63 def _remove(self, field):
38 ''' 64 '''