From e163520a130145496ceeeeeb94d950f17e7fd76e Mon Sep 17 00:00:00 2001 From: jvoisin Date: Thu, 23 Jun 2011 19:00:44 +0200 Subject: Implementation of the --backup option, to keep a backup of the treated file (disabled by default) --- cli.py | 21 +++++++++++++++++++-- lib/mat.py | 10 +++++----- lib/misc.py | 5 +++++ lib/parser.py | 28 +++++++++++++++++++++++++++- 4 files changed, 56 insertions(+), 8 deletions(-) diff --git a/cli.py b/cli.py index f2bec52..1a8713d 100755 --- a/cli.py +++ b/cli.py @@ -18,7 +18,10 @@ def parse(): help='Check if a file is free of harmfull metadatas') common.add_option('--version', action='callback', callback=displayVersion, help='Display version and exit') - + common.add_option('--backup', '-b', action='store_true', default=False, + help='Keep a backup copy') + common.add_option('--ugly', '-u', action='store_true', default=False, + help='Remove harmful meta, but with loss') parser.add_option_group(common) values, arguments = parser.parse_args() @@ -60,6 +63,18 @@ def clean_meta(class_file, filename): class_file.remove_all() print('%s cleaned !' % filename) +def clean_meta_ugly(class_file, filename): + ''' + Clean the file 'filename', ugly way + ''' + print('[+] Cleaning %s' % filename) + if class_file.is_clean(): + print('%s is already clean' % filename) + else: + class_file.remove_all_ugly() + print('%s cleaned !' % filename) + + def main(): args, filenames = parse() @@ -68,11 +83,13 @@ def main(): func = list_meta elif args.check is True: #only check if the file is clean func = is_clean + elif args.ugly is True: + func = clean_meta_ugly else: #clean the file func = clean_meta for filename in filenames: - class_file = mat.create_class_file(filename) + class_file = mat.create_class_file(filename, args.backup) func(class_file, filename) print('\n') diff --git a/lib/mat.py b/lib/mat.py index a9b8e17..6abcd64 100644 --- a/lib/mat.py +++ b/lib/mat.py @@ -14,7 +14,7 @@ import hachoir_editor import images import audio import misc -import archive +#import archive __version__ = "0.1" __author__ = "jvoisin" @@ -24,10 +24,10 @@ strippers = { hachoir_parser.image.PngFile: images.PngStripper, hachoir_parser.audio.MpegAudioFile: audio.MpegAudioStripper, hachoir_parser.misc.PDFDocument: misc.PdfStripper, - hachoir_parser.archive.TarFile: archive.TarStripper, + #hachoir_parser.archive.TarFile: archive.TarStripper, } -def create_class_file(name): +def create_class_file(name, backup): ''' return a $FILETYPEStripper() class, corresponding to the filetype of the given file @@ -57,5 +57,5 @@ def create_class_file(name): print("Don't have stripper for file type: %s" % editor.description) sys.exit(1) if editor.input.__class__ == hachoir_parser.misc.PDFDocument: - return stripper_class(filename) - return stripper_class(realname, filename, parser, editor) + return stripper_class(filename, backup) + return stripper_class(realname, filename, parser, editor, backup) diff --git a/lib/misc.py b/lib/misc.py index 56c2274..a8070f1 100644 --- a/lib/misc.py +++ b/lib/misc.py @@ -1,5 +1,6 @@ import parser import pdfrw +import shutil class PdfStripper(parser.Generic_parser): ''' @@ -23,6 +24,10 @@ class PdfStripper(parser.Generic_parser): self.writer.trailer = self.trailer self.writer.write(self.filename + parser.POSTFIX) + if self.backup is False: + self.secure_remove() #remove the old file + shutil.rename(self.filename+ POSTFIX, self.filename)#rename the new + def is_clean(self): ''' diff --git a/lib/parser.py b/lib/parser.py index 1084de5..d629619 100644 --- a/lib/parser.py +++ b/lib/parser.py @@ -6,15 +6,27 @@ import hachoir_core.error import hachoir_parser import hachoir_editor import sys +import shutil POSTFIX = ".cleaned" class Generic_parser(): - def __init__(self, realname, filename, parser, editor): + def __init__(self, realname, filename, parser, editor, backup): self.filename = filename self.realname = realname self.parser = parser self.editor = editor + self.backup = backup + + def secure_remove(self): + ''' + securely remove the file + ''' + #FIXME : not secure at all ! + try: + shutil.rmtree(self.filename) + except: + print('Unable to remove %s' % self.filename) def is_clean(self): ''' @@ -33,6 +45,20 @@ class Generic_parser(): if self._should_remove(field): self._remove(field) hachoir_core.field.writeIntoFile(self.editor, self.filename + POSTFIX) + if self.backup is False: + self.secure_remove() #remove the old file + shutil.rename(self.filename+ POSTFIX, self.filename)#rename the new + + def remove_all_ugly(self): + ''' + If the remove_all() is not efficient enough, + this method is implemented : + It is efficient, but destructive. + In a perfect world, with nice fileformat, + this method does not exist. + ''' + raise NotImplementedError() + def _remove(self, field): ''' -- cgit v1.3