summaryrefslogtreecommitdiff
path: root/lib/office.py
diff options
context:
space:
mode:
authorjvoisin2011-07-26 15:14:48 +0200
committerjvoisin2011-07-26 15:14:48 +0200
commit962e9aec5ffcdaae39e06f277dd47d1943205c37 (patch)
tree85d57c245f010579dc3eb9199fe3ed6e379e96d8 /lib/office.py
parent7c9edd6514854f707b87e150a1ffa327ebd8dcac (diff)
Bugfixes (especially for pdf), and more pylint conformity
Diffstat (limited to 'lib/office.py')
-rw-r--r--lib/office.py23
1 files changed, 15 insertions, 8 deletions
diff --git a/lib/office.py b/lib/office.py
index 432bc0b..5fa475d 100644
--- a/lib/office.py
+++ b/lib/office.py
@@ -6,6 +6,7 @@ import glob
6import logging 6import logging
7import zipfile 7import zipfile
8import re 8import re
9import shutil
9from xml.etree import ElementTree 10from xml.etree import ElementTree
10 11
11 12
@@ -97,7 +98,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
97 return True 98 return True
98 99
99 100
100class PdfStripper(parser.Generic_parser): 101class PdfStripper(parser.GenericParser):
101 ''' 102 '''
102 Represent a pdf file, with the help of pdfrw 103 Represent a pdf file, with the help of pdfrw
103 ''' 104 '''
@@ -109,10 +110,17 @@ class PdfStripper(parser.Generic_parser):
109 self.realname = realname 110 self.realname = realname
110 self.shortname = os.path.basename(filename) 111 self.shortname = os.path.basename(filename)
111 self.mime = mimetypes.guess_type(filename)[0] 112 self.mime = mimetypes.guess_type(filename)[0]
113 self.tempdir = tempfile.mkdtemp()
112 self.trailer = pdfrw.PdfReader(self.filename) 114 self.trailer = pdfrw.PdfReader(self.filename)
113 self.writer = pdfrw.PdfWriter() 115 self.writer = pdfrw.PdfWriter()
114 self.convert = 'gm convert -antialias -enhance %s %s' 116 self.convert = 'gm convert -antialias -enhance %s %s'
115 117
118 def __del__(self):
119 '''
120 Remove the temp dir
121 '''
122 shutil.rmtree(self.tempdir)
123
116 def remove_all(self): 124 def remove_all(self):
117 ''' 125 '''
118 Remove all the meta fields that are compromizing 126 Remove all the meta fields that are compromizing
@@ -133,19 +141,18 @@ class PdfStripper(parser.Generic_parser):
133 Transform each pages into a jpg, clean them, 141 Transform each pages into a jpg, clean them,
134 then re-assemble them into a new pdf 142 then re-assemble them into a new pdf
135 ''' 143 '''
136 _, self.tmpdir = tempfile.mkstemp() 144 subprocess.call(self.convert % (self.filename, self.tempdir +
137 subprocess.call(self.convert % (self.filename, self.tmpdir +
138 'temp.jpg'), shell=True) # Convert pages to jpg 145 'temp.jpg'), shell=True) # Convert pages to jpg
139 146
140 for current_file in glob.glob(self.tmpdir + 'temp*'): 147 for current_file in glob.glob(self.tempdir + 'temp*'):
141 #Clean every jpg image 148 #Clean every jpg image
142 class_file = mat.create_class_file(current_file, False) 149 class_file = mat.create_class_file(current_file, False, False)
143 class_file.remove_all() 150 class_file.remove_all()
144 151
145 subprocess.call(self.convert % (self.tmpdir + 152 subprocess.call(self.convert % (self.tempdir +
146 'temp.jpg*', self.output), shell=True) # Assemble jpg into pdf 153 'temp.jpg*', self.output), shell=True) # Assemble jpg into pdf
147 154
148 for current_file in glob.glob(self.tmpdir + 'temp*'): 155 for current_file in glob.glob(self.tempdir + 'temp*'):
149 #remove jpg files 156 #remove jpg files
150 mat.secure_remove(current_file) 157 mat.secure_remove(current_file)
151 158
@@ -155,7 +162,7 @@ class PdfStripper(parser.Generic_parser):
155 name = self.realname 162 name = self.realname
156 else: 163 else:
157 name = self.output 164 name = self.output
158 class_file = mat.create_class_file(name, False) 165 class_file = mat.create_class_file(name, False, False)
159 class_file.remove_all() 166 class_file.remove_all()
160 167
161 def is_clean(self): 168 def is_clean(self):