summaryrefslogtreecommitdiff
path: root/lib/misc.py
diff options
context:
space:
mode:
authorjvoisin2011-06-30 19:28:24 +0200
committerjvoisin2011-06-30 19:28:24 +0200
commit347daf394f9e770ad031601b95abd32b96609adb (patch)
tree6044c872877c5790b1292b6bd6a71c4649cc8519 /lib/misc.py
parent3bc343109528964f6bec075e83106d8f54710b99 (diff)
almost done with pdf. Can't figure how to get rid of the 'unicode' problem now.
Diffstat (limited to '')
-rw-r--r--lib/misc.py39
1 files changed, 34 insertions, 5 deletions
diff --git a/lib/misc.py b/lib/misc.py
index a8070f1..8063ae1 100644
--- a/lib/misc.py
+++ b/lib/misc.py
@@ -1,19 +1,24 @@
1import parser 1import parser
2import pdfrw 2import pdfrw
3import shutil 3import os
4import mat
5import subprocess
6import tempfile
7import glob
4 8
5class PdfStripper(parser.Generic_parser): 9class PdfStripper(parser.Generic_parser):
6 ''' 10 '''
7 Represent a pdf file, with the help of pdfrw 11 Represent a pdf file, with the help of pdfrw
8 ''' 12 '''
9 def __init__(self, filename): 13 def __init__(self, filename, backup):
10 self.filename = filename 14 self.filename = filename
15 self.backup = backup
11 self.trailer = pdfrw.PdfReader(self.filename) 16 self.trailer = pdfrw.PdfReader(self.filename)
12 self.writer = pdfrw.PdfWriter() 17 self.writer = pdfrw.PdfWriter()
13 18
14 def remove_all(self): 19 def remove_all(self):
15 ''' 20 '''
16 Remove all the files that are compromizing 21 Remove all the meta fields that are compromizing
17 ''' 22 '''
18 self.trailer.Info.Title = '' 23 self.trailer.Info.Title = ''
19 self.trailer.Info.Author = '' 24 self.trailer.Info.Author = ''
@@ -25,9 +30,33 @@ class PdfStripper(parser.Generic_parser):
25 self.writer.trailer = self.trailer 30 self.writer.trailer = self.trailer
26 self.writer.write(self.filename + parser.POSTFIX) 31 self.writer.write(self.filename + parser.POSTFIX)
27 if self.backup is False: 32 if self.backup is False:
28 self.secure_remove() #remove the old file 33 mat.secure_remove(self.filename) #remove the old file
29 shutil.rename(self.filename+ POSTFIX, self.filename)#rename the new 34 os.rename(self.filename + parser.POSTFIX, self.filename)
30 35
36 def remove_all_ugly(self):
37 '''
38 Transform each pages into a jpg, clean them,
39 then re-assemble them into a new pdf
40 '''
41 output_file = self.filename + parser.POSTFIX + '.pdf'
42 _, self.tmpdir = tempfile.mkstemp()
43 subprocess.call('gm convert %s %s' % (self.filename, self.tmpdir +
44 'temp.jpg'), shell=True)
45 for current_file in glob.glob(self.tmpdir + 'temp*'):
46 class_file = mat.create_class_file(current_file, False)
47 class_file.remove_all()
48 subprocess.call('gm convert %s %s' % (self.tmpdir +
49 'temp.jpg*', output_file), shell=True)
50 for current_file in glob.glob(self.tmpdir + 'temp*'):
51 mat.secure_remove(current_file)
52 if self.backup is False:
53 mat.secure_remove(self.filename) #remove the old file
54 os.rename(output_file, self.filename)#rename the new
55 name = self.filename
56 else:
57 name = output_file
58 class_file = mat.create_class_file(name, False)
59 class_file.remove_all()
31 60
32 def is_clean(self): 61 def is_clean(self):
33 ''' 62 '''