diff options
| author | jvoisin | 2011-06-23 19:00:44 +0200 |
|---|---|---|
| committer | jvoisin | 2011-06-23 19:00:44 +0200 |
| commit | e163520a130145496ceeeeeb94d950f17e7fd76e (patch) | |
| tree | 19d9bcfbc997f5884ea798f8edeaaad8da18915d | |
| parent | 9e69adbe1b065707f8be4f146cc3c05660cef711 (diff) | |
Implementation of the --backup option, to keep a backup of the treated file (disabled by default)
| -rwxr-xr-x | cli.py | 21 | ||||
| -rw-r--r-- | lib/mat.py | 10 | ||||
| -rw-r--r-- | lib/misc.py | 5 | ||||
| -rw-r--r-- | lib/parser.py | 28 |
4 files changed, 56 insertions, 8 deletions
| @@ -18,7 +18,10 @@ def parse(): | |||
| 18 | help='Check if a file is free of harmfull metadatas') | 18 | help='Check if a file is free of harmfull metadatas') |
| 19 | common.add_option('--version', action='callback', callback=displayVersion, | 19 | common.add_option('--version', action='callback', callback=displayVersion, |
| 20 | help='Display version and exit') | 20 | help='Display version and exit') |
| 21 | 21 | common.add_option('--backup', '-b', action='store_true', default=False, | |
| 22 | help='Keep a backup copy') | ||
| 23 | common.add_option('--ugly', '-u', action='store_true', default=False, | ||
| 24 | help='Remove harmful meta, but with loss') | ||
| 22 | parser.add_option_group(common) | 25 | parser.add_option_group(common) |
| 23 | 26 | ||
| 24 | values, arguments = parser.parse_args() | 27 | values, arguments = parser.parse_args() |
| @@ -60,6 +63,18 @@ def clean_meta(class_file, filename): | |||
| 60 | class_file.remove_all() | 63 | class_file.remove_all() |
| 61 | print('%s cleaned !' % filename) | 64 | print('%s cleaned !' % filename) |
| 62 | 65 | ||
| 66 | def clean_meta_ugly(class_file, filename): | ||
| 67 | ''' | ||
| 68 | Clean the file 'filename', ugly way | ||
| 69 | ''' | ||
| 70 | print('[+] Cleaning %s' % filename) | ||
| 71 | if class_file.is_clean(): | ||
| 72 | print('%s is already clean' % filename) | ||
| 73 | else: | ||
| 74 | class_file.remove_all_ugly() | ||
| 75 | print('%s cleaned !' % filename) | ||
| 76 | |||
| 77 | |||
| 63 | def main(): | 78 | def main(): |
| 64 | args, filenames = parse() | 79 | args, filenames = parse() |
| 65 | 80 | ||
| @@ -68,11 +83,13 @@ def main(): | |||
| 68 | func = list_meta | 83 | func = list_meta |
| 69 | elif args.check is True: #only check if the file is clean | 84 | elif args.check is True: #only check if the file is clean |
| 70 | func = is_clean | 85 | func = is_clean |
| 86 | elif args.ugly is True: | ||
| 87 | func = clean_meta_ugly | ||
| 71 | else: #clean the file | 88 | else: #clean the file |
| 72 | func = clean_meta | 89 | func = clean_meta |
| 73 | 90 | ||
| 74 | for filename in filenames: | 91 | for filename in filenames: |
| 75 | class_file = mat.create_class_file(filename) | 92 | class_file = mat.create_class_file(filename, args.backup) |
| 76 | func(class_file, filename) | 93 | func(class_file, filename) |
| 77 | print('\n') | 94 | print('\n') |
| 78 | 95 | ||
| @@ -14,7 +14,7 @@ import hachoir_editor | |||
| 14 | import images | 14 | import images |
| 15 | import audio | 15 | import audio |
| 16 | import misc | 16 | import misc |
| 17 | import archive | 17 | #import archive |
| 18 | 18 | ||
| 19 | __version__ = "0.1" | 19 | __version__ = "0.1" |
| 20 | __author__ = "jvoisin" | 20 | __author__ = "jvoisin" |
| @@ -24,10 +24,10 @@ strippers = { | |||
| 24 | hachoir_parser.image.PngFile: images.PngStripper, | 24 | hachoir_parser.image.PngFile: images.PngStripper, |
| 25 | hachoir_parser.audio.MpegAudioFile: audio.MpegAudioStripper, | 25 | hachoir_parser.audio.MpegAudioFile: audio.MpegAudioStripper, |
| 26 | hachoir_parser.misc.PDFDocument: misc.PdfStripper, | 26 | hachoir_parser.misc.PDFDocument: misc.PdfStripper, |
| 27 | hachoir_parser.archive.TarFile: archive.TarStripper, | 27 | #hachoir_parser.archive.TarFile: archive.TarStripper, |
| 28 | } | 28 | } |
| 29 | 29 | ||
| 30 | def create_class_file(name): | 30 | def create_class_file(name, backup): |
| 31 | ''' | 31 | ''' |
| 32 | return a $FILETYPEStripper() class, | 32 | return a $FILETYPEStripper() class, |
| 33 | corresponding to the filetype of the given file | 33 | corresponding to the filetype of the given file |
| @@ -57,5 +57,5 @@ def create_class_file(name): | |||
| 57 | print("Don't have stripper for file type: %s" % editor.description) | 57 | print("Don't have stripper for file type: %s" % editor.description) |
| 58 | sys.exit(1) | 58 | sys.exit(1) |
| 59 | if editor.input.__class__ == hachoir_parser.misc.PDFDocument: | 59 | if editor.input.__class__ == hachoir_parser.misc.PDFDocument: |
| 60 | return stripper_class(filename) | 60 | return stripper_class(filename, backup) |
| 61 | return stripper_class(realname, filename, parser, editor) | 61 | return stripper_class(realname, filename, parser, editor, backup) |
diff --git a/lib/misc.py b/lib/misc.py index 56c2274..a8070f1 100644 --- a/lib/misc.py +++ b/lib/misc.py | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | import parser | 1 | import parser |
| 2 | import pdfrw | 2 | import pdfrw |
| 3 | import shutil | ||
| 3 | 4 | ||
| 4 | class PdfStripper(parser.Generic_parser): | 5 | class PdfStripper(parser.Generic_parser): |
| 5 | ''' | 6 | ''' |
| @@ -23,6 +24,10 @@ class PdfStripper(parser.Generic_parser): | |||
| 23 | 24 | ||
| 24 | self.writer.trailer = self.trailer | 25 | self.writer.trailer = self.trailer |
| 25 | self.writer.write(self.filename + parser.POSTFIX) | 26 | self.writer.write(self.filename + parser.POSTFIX) |
| 27 | if self.backup is False: | ||
| 28 | self.secure_remove() #remove the old file | ||
| 29 | shutil.rename(self.filename+ POSTFIX, self.filename)#rename the new | ||
| 30 | |||
| 26 | 31 | ||
| 27 | def is_clean(self): | 32 | def is_clean(self): |
| 28 | ''' | 33 | ''' |
diff --git a/lib/parser.py b/lib/parser.py index 1084de5..d629619 100644 --- a/lib/parser.py +++ b/lib/parser.py | |||
| @@ -6,15 +6,27 @@ import hachoir_core.error | |||
| 6 | import hachoir_parser | 6 | import hachoir_parser |
| 7 | import hachoir_editor | 7 | import hachoir_editor |
| 8 | import sys | 8 | import sys |
| 9 | import shutil | ||
| 9 | 10 | ||
| 10 | POSTFIX = ".cleaned" | 11 | POSTFIX = ".cleaned" |
| 11 | 12 | ||
| 12 | class Generic_parser(): | 13 | class Generic_parser(): |
| 13 | def __init__(self, realname, filename, parser, editor): | 14 | def __init__(self, realname, filename, parser, editor, backup): |
| 14 | self.filename = filename | 15 | self.filename = filename |
| 15 | self.realname = realname | 16 | self.realname = realname |
| 16 | self.parser = parser | 17 | self.parser = parser |
| 17 | self.editor = editor | 18 | self.editor = editor |
| 19 | self.backup = backup | ||
| 20 | |||
| 21 | def secure_remove(self): | ||
| 22 | ''' | ||
| 23 | securely remove the file | ||
| 24 | ''' | ||
| 25 | #FIXME : not secure at all ! | ||
| 26 | try: | ||
| 27 | shutil.rmtree(self.filename) | ||
| 28 | except: | ||
| 29 | print('Unable to remove %s' % self.filename) | ||
| 18 | 30 | ||
| 19 | def is_clean(self): | 31 | def is_clean(self): |
| 20 | ''' | 32 | ''' |
| @@ -33,6 +45,20 @@ class Generic_parser(): | |||
| 33 | if self._should_remove(field): | 45 | if self._should_remove(field): |
| 34 | self._remove(field) | 46 | self._remove(field) |
| 35 | hachoir_core.field.writeIntoFile(self.editor, self.filename + POSTFIX) | 47 | hachoir_core.field.writeIntoFile(self.editor, self.filename + POSTFIX) |
| 48 | if self.backup is False: | ||
| 49 | self.secure_remove() #remove the old file | ||
| 50 | shutil.rename(self.filename+ POSTFIX, self.filename)#rename the new | ||
| 51 | |||
| 52 | def remove_all_ugly(self): | ||
| 53 | ''' | ||
| 54 | If the remove_all() is not efficient enough, | ||
| 55 | this method is implemented : | ||
| 56 | It is efficient, but destructive. | ||
| 57 | In a perfect world, with nice fileformat, | ||
| 58 | this method does not exist. | ||
| 59 | ''' | ||
| 60 | raise NotImplementedError() | ||
| 61 | |||
| 36 | 62 | ||
| 37 | def _remove(self, field): | 63 | def _remove(self, field): |
| 38 | ''' | 64 | ''' |
