From e163520a130145496ceeeeeb94d950f17e7fd76e Mon Sep 17 00:00:00 2001 From: jvoisin Date: Thu, 23 Jun 2011 19:00:44 +0200 Subject: Implementation of the --backup option, to keep a backup of the treated file (disabled by default) --- lib/mat.py | 10 +++++----- lib/misc.py | 5 +++++ lib/parser.py | 28 +++++++++++++++++++++++++++- 3 files changed, 37 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/mat.py b/lib/mat.py index a9b8e17..6abcd64 100644 --- a/lib/mat.py +++ b/lib/mat.py @@ -14,7 +14,7 @@ import hachoir_editor import images import audio import misc -import archive +#import archive __version__ = "0.1" __author__ = "jvoisin" @@ -24,10 +24,10 @@ strippers = { hachoir_parser.image.PngFile: images.PngStripper, hachoir_parser.audio.MpegAudioFile: audio.MpegAudioStripper, hachoir_parser.misc.PDFDocument: misc.PdfStripper, - hachoir_parser.archive.TarFile: archive.TarStripper, + #hachoir_parser.archive.TarFile: archive.TarStripper, } -def create_class_file(name): +def create_class_file(name, backup): ''' return a $FILETYPEStripper() class, corresponding to the filetype of the given file @@ -57,5 +57,5 @@ def create_class_file(name): print("Don't have stripper for file type: %s" % editor.description) sys.exit(1) if editor.input.__class__ == hachoir_parser.misc.PDFDocument: - return stripper_class(filename) - return stripper_class(realname, filename, parser, editor) + return stripper_class(filename, backup) + return stripper_class(realname, filename, parser, editor, backup) diff --git a/lib/misc.py b/lib/misc.py index 56c2274..a8070f1 100644 --- a/lib/misc.py +++ b/lib/misc.py @@ -1,5 +1,6 @@ import parser import pdfrw +import shutil class PdfStripper(parser.Generic_parser): ''' @@ -23,6 +24,10 @@ class PdfStripper(parser.Generic_parser): self.writer.trailer = self.trailer self.writer.write(self.filename + parser.POSTFIX) + if self.backup is False: + self.secure_remove() #remove the old file + shutil.rename(self.filename+ POSTFIX, self.filename)#rename the new + def is_clean(self): ''' diff --git a/lib/parser.py b/lib/parser.py index 1084de5..d629619 100644 --- a/lib/parser.py +++ b/lib/parser.py @@ -6,15 +6,27 @@ import hachoir_core.error import hachoir_parser import hachoir_editor import sys +import shutil POSTFIX = ".cleaned" class Generic_parser(): - def __init__(self, realname, filename, parser, editor): + def __init__(self, realname, filename, parser, editor, backup): self.filename = filename self.realname = realname self.parser = parser self.editor = editor + self.backup = backup + + def secure_remove(self): + ''' + securely remove the file + ''' + #FIXME : not secure at all ! + try: + shutil.rmtree(self.filename) + except: + print('Unable to remove %s' % self.filename) def is_clean(self): ''' @@ -33,6 +45,20 @@ class Generic_parser(): if self._should_remove(field): self._remove(field) hachoir_core.field.writeIntoFile(self.editor, self.filename + POSTFIX) + if self.backup is False: + self.secure_remove() #remove the old file + shutil.rename(self.filename+ POSTFIX, self.filename)#rename the new + + def remove_all_ugly(self): + ''' + If the remove_all() is not efficient enough, + this method is implemented : + It is efficient, but destructive. + In a perfect world, with nice fileformat, + this method does not exist. + ''' + raise NotImplementedError() + def _remove(self, field): ''' -- cgit v1.3