summaryrefslogtreecommitdiff
path: root/lib/office.py
diff options
context:
space:
mode:
authorjvoisin2011-07-25 03:03:12 +0200
committerjvoisin2011-07-25 03:03:12 +0200
commit7bec354973580216c64889b925e1f7d6a224d7dd (patch)
tree7ddf33ae6a1ffd5c9d03522ae508f67632f638cb /lib/office.py
parentac248b5b4979aafa0c05f8253e2f9e1bdba305e6 (diff)
more abstraction, and changed the name of the outputed file
Diffstat (limited to 'lib/office.py')
-rw-r--r--lib/office.py24
1 files changed, 9 insertions, 15 deletions
diff --git a/lib/office.py b/lib/office.py
index f87f357..2302dbc 100644
--- a/lib/office.py
+++ b/lib/office.py
@@ -27,7 +27,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
27 method here : http://bugs.python.org/issue6818 27 method here : http://bugs.python.org/issue6818
28 ''' 28 '''
29 zipin = zipfile.ZipFile(self.filename, 'r') 29 zipin = zipfile.ZipFile(self.filename, 'r')
30 zipout = zipfile.ZipFile(self.filename + parser.POSTFIX, 'w', 30 zipout = zipfile.ZipFile(self.basename + parser.POSTFIX + self.ext, 'w',
31 allowZip64=True) 31 allowZip64=True)
32 for item in zipin.namelist(): 32 for item in zipin.namelist():
33 name = os.path.join(self.tempdir, item) 33 name = os.path.join(self.tempdir, item)
@@ -65,10 +65,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
65 logging.info('%s treated' % self.filename) 65 logging.info('%s treated' % self.filename)
66 zipin.close() 66 zipin.close()
67 zipout.close() 67 zipout.close()
68 68 self.do_backup()
69 if self.backup is False:
70 mat.secure_remove(self.filename) #remove the old file
71 os.rename(self.filename + parser.POSTFIX, self.filename)
72 69
73 def is_clean(self): 70 def is_clean(self):
74 zipin = zipfile.ZipFile(self.filename, 'r') 71 zipin = zipfile.ZipFile(self.filename, 'r')
@@ -106,9 +103,7 @@ class TorrentStripper(parser.Generic_parser):
106 del self.editor['/root/' + field.name] 103 del self.editor['/root/' + field.name]
107 hachoir_core.field.writeIntoFile(self.editor, 104 hachoir_core.field.writeIntoFile(self.editor,
108 self.filename + parser.POSTFIX) 105 self.filename + parser.POSTFIX)
109 if self.backup is False: 106 self.do_backup()
110 mat.secure_remove(self.filename) #remove the old file
111 os.rename(self.filename + parser.POSTFIX, self.filename)
112 107
113 def is_clean(self): 108 def is_clean(self):
114 for field in self.editor['root']: 109 for field in self.editor['root']:
@@ -138,6 +133,8 @@ class PdfStripper(parser.Generic_parser):
138 Represent a pdf file, with the help of pdfrw 133 Represent a pdf file, with the help of pdfrw
139 ''' 134 '''
140 def __init__(self, filename, realname, backup): 135 def __init__(self, filename, realname, backup):
136 name, path = os.path.splitext(filename)
137 self.output = name + '.cleaned.' + ext
141 self.filename = filename 138 self.filename = filename
142 self.backup = backup 139 self.backup = backup
143 self.realname = realname 140 self.realname = realname
@@ -159,17 +156,14 @@ class PdfStripper(parser.Generic_parser):
159 self.trailer.Info.ModDate = '' 156 self.trailer.Info.ModDate = ''
160 157
161 self.writer.trailer = self.trailer 158 self.writer.trailer = self.trailer
162 self.writer.write(self.filename + parser.POSTFIX) 159 self.writer.write(self.output)
163 if self.backup is False: 160 self.do_backup()
164 mat.secure_remove(self.filename) #remove the old file
165 os.rename(self.filename + parser.POSTFIX, self.filename)
166 161
167 def remove_all_ugly(self): 162 def remove_all_ugly(self):
168 ''' 163 '''
169 Transform each pages into a jpg, clean them, 164 Transform each pages into a jpg, clean them,
170 then re-assemble them into a new pdf 165 then re-assemble them into a new pdf
171 ''' 166 '''
172 output_file = self.realname + parser.POSTFIX + '.pdf'
173 _, self.tmpdir = tempfile.mkstemp() 167 _, self.tmpdir = tempfile.mkstemp()
174 subprocess.call(self.convert % (self.filename, self.tmpdir + 168 subprocess.call(self.convert % (self.filename, self.tmpdir +
175 'temp.jpg'), shell=True)#Convert pages to jpg 169 'temp.jpg'), shell=True)#Convert pages to jpg
@@ -180,7 +174,7 @@ class PdfStripper(parser.Generic_parser):
180 class_file.remove_all() 174 class_file.remove_all()
181 175
182 subprocess.call(self.convert % (self.tmpdir + 176 subprocess.call(self.convert % (self.tmpdir +
183 'temp.jpg*', output_file), shell=True)#Assemble jpg into pdf 177 'temp.jpg*', self.output), shell=True)#Assemble jpg into pdf
184 178
185 for current_file in glob.glob(self.tmpdir + 'temp*'): 179 for current_file in glob.glob(self.tmpdir + 'temp*'):
186 #remove jpg files 180 #remove jpg files
@@ -188,7 +182,7 @@ class PdfStripper(parser.Generic_parser):
188 182
189 if self.backup is False: 183 if self.backup is False:
190 mat.secure_remove(self.filename) #remove the old file 184 mat.secure_remove(self.filename) #remove the old file
191 os.rename(output_file, self.filename)#rename the new 185 os.rename(self.output, self.filename)#rename the new
192 name = self.realname 186 name = self.realname
193 else: 187 else:
194 name = output_file 188 name = output_file