From 347daf394f9e770ad031601b95abd32b96609adb Mon Sep 17 00:00:00 2001 From: jvoisin Date: Thu, 30 Jun 2011 19:28:24 +0200 Subject: almost done with pdf. Can't figure how to get rid of the 'unicode' problem now. --- lib/misc.py | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/misc.py b/lib/misc.py index a8070f1..8063ae1 100644 --- a/lib/misc.py +++ b/lib/misc.py @@ -1,19 +1,24 @@ import parser import pdfrw -import shutil +import os +import mat +import subprocess +import tempfile +import glob class PdfStripper(parser.Generic_parser): ''' Represent a pdf file, with the help of pdfrw ''' - def __init__(self, filename): + def __init__(self, filename, backup): self.filename = filename + self.backup = backup self.trailer = pdfrw.PdfReader(self.filename) self.writer = pdfrw.PdfWriter() def remove_all(self): ''' - Remove all the files that are compromizing + Remove all the meta fields that are compromizing ''' self.trailer.Info.Title = '' self.trailer.Info.Author = '' @@ -25,9 +30,33 @@ class PdfStripper(parser.Generic_parser): self.writer.trailer = self.trailer self.writer.write(self.filename + parser.POSTFIX) if self.backup is False: - self.secure_remove() #remove the old file - shutil.rename(self.filename+ POSTFIX, self.filename)#rename the new + mat.secure_remove(self.filename) #remove the old file + os.rename(self.filename + parser.POSTFIX, self.filename) + def remove_all_ugly(self): + ''' + Transform each pages into a jpg, clean them, + then re-assemble them into a new pdf + ''' + output_file = self.filename + parser.POSTFIX + '.pdf' + _, self.tmpdir = tempfile.mkstemp() + subprocess.call('gm convert %s %s' % (self.filename, self.tmpdir + + 'temp.jpg'), shell=True) + for current_file in glob.glob(self.tmpdir + 'temp*'): + class_file = mat.create_class_file(current_file, False) + class_file.remove_all() + subprocess.call('gm convert %s %s' % (self.tmpdir + + 'temp.jpg*', output_file), shell=True) + for current_file in glob.glob(self.tmpdir + 'temp*'): + mat.secure_remove(current_file) + if self.backup is False: + mat.secure_remove(self.filename) #remove the old file + os.rename(output_file, self.filename)#rename the new + name = self.filename + else: + name = output_file + class_file = mat.create_class_file(name, False) + class_file.remove_all() def is_clean(self): ''' -- cgit v1.3