From 0546f5ce28718c3ecf8e379633341234029bf794 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sun, 3 Jul 2011 13:47:42 +0200 Subject: Fix the unicode problem --- lib/mat.py | 3 +-- lib/misc.py | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/mat.py b/lib/mat.py index c9547c1..858fbc9 100644 --- a/lib/mat.py +++ b/lib/mat.py @@ -34,7 +34,6 @@ def secure_remove(filename): ''' securely remove the file ''' - #FIXME : Vulnerable to shell injection ? try: subprocess.call('shred --remove %s' % filename, shell=True) except: @@ -78,5 +77,5 @@ def create_class_file(name, backup): print("Don't have stripper for file type: %s" % editor.description) sys.exit(1) if editor.input.__class__ == hachoir_parser.misc.PDFDocument: - return stripper_class(filename, backup) + return stripper_class(filename, realname, backup) return stripper_class(realname, filename, parser, editor, backup) diff --git a/lib/misc.py b/lib/misc.py index 8063ae1..8e1b5c2 100644 --- a/lib/misc.py +++ b/lib/misc.py @@ -10,9 +10,10 @@ class PdfStripper(parser.Generic_parser): ''' Represent a pdf file, with the help of pdfrw ''' - def __init__(self, filename, backup): + def __init__(self, filename, realname, backup): self.filename = filename self.backup = backup + self.realname = realname self.trailer = pdfrw.PdfReader(self.filename) self.writer = pdfrw.PdfWriter() @@ -38,21 +39,27 @@ class PdfStripper(parser.Generic_parser): Transform each pages into a jpg, clean them, then re-assemble them into a new pdf ''' - output_file = self.filename + parser.POSTFIX + '.pdf' + output_file = self.realname + parser.POSTFIX + '.pdf' _, self.tmpdir = tempfile.mkstemp() subprocess.call('gm convert %s %s' % (self.filename, self.tmpdir + - 'temp.jpg'), shell=True) + 'temp.jpg'), shell=True)#Convert pages to jpg + for current_file in glob.glob(self.tmpdir + 'temp*'): + #Clean every jpg image class_file = mat.create_class_file(current_file, False) class_file.remove_all() + subprocess.call('gm convert %s %s' % (self.tmpdir + - 'temp.jpg*', output_file), shell=True) + 'temp.jpg*', output_file), shell=True)#Assemble jpg into pdf + for current_file in glob.glob(self.tmpdir + 'temp*'): + #remove jpg files mat.secure_remove(current_file) + if self.backup is False: mat.secure_remove(self.filename) #remove the old file os.rename(output_file, self.filename)#rename the new - name = self.filename + name = self.realname else: name = output_file class_file = mat.create_class_file(name, False) @@ -75,4 +82,3 @@ class PdfStripper(parser.Generic_parser): for key, value in self.trailer.Info.iteritems(): metadata[key[1:]] = value[1:-1] return metadata - -- cgit v1.3