diff options
| author | jvoisin | 2011-07-26 15:14:48 +0200 |
|---|---|---|
| committer | jvoisin | 2011-07-26 15:14:48 +0200 |
| commit | 962e9aec5ffcdaae39e06f277dd47d1943205c37 (patch) | |
| tree | 85d57c245f010579dc3eb9199fe3ed6e379e96d8 /lib/office.py | |
| parent | 7c9edd6514854f707b87e150a1ffa327ebd8dcac (diff) | |
Bugfixes (especially for pdf), and more pylint conformity
Diffstat (limited to 'lib/office.py')
| -rw-r--r-- | lib/office.py | 23 |
1 files changed, 15 insertions, 8 deletions
diff --git a/lib/office.py b/lib/office.py index 432bc0b..5fa475d 100644 --- a/lib/office.py +++ b/lib/office.py | |||
| @@ -6,6 +6,7 @@ import glob | |||
| 6 | import logging | 6 | import logging |
| 7 | import zipfile | 7 | import zipfile |
| 8 | import re | 8 | import re |
| 9 | import shutil | ||
| 9 | from xml.etree import ElementTree | 10 | from xml.etree import ElementTree |
| 10 | 11 | ||
| 11 | 12 | ||
| @@ -97,7 +98,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): | |||
| 97 | return True | 98 | return True |
| 98 | 99 | ||
| 99 | 100 | ||
| 100 | class PdfStripper(parser.Generic_parser): | 101 | class PdfStripper(parser.GenericParser): |
| 101 | ''' | 102 | ''' |
| 102 | Represent a pdf file, with the help of pdfrw | 103 | Represent a pdf file, with the help of pdfrw |
| 103 | ''' | 104 | ''' |
| @@ -109,10 +110,17 @@ class PdfStripper(parser.Generic_parser): | |||
| 109 | self.realname = realname | 110 | self.realname = realname |
| 110 | self.shortname = os.path.basename(filename) | 111 | self.shortname = os.path.basename(filename) |
| 111 | self.mime = mimetypes.guess_type(filename)[0] | 112 | self.mime = mimetypes.guess_type(filename)[0] |
| 113 | self.tempdir = tempfile.mkdtemp() | ||
| 112 | self.trailer = pdfrw.PdfReader(self.filename) | 114 | self.trailer = pdfrw.PdfReader(self.filename) |
| 113 | self.writer = pdfrw.PdfWriter() | 115 | self.writer = pdfrw.PdfWriter() |
| 114 | self.convert = 'gm convert -antialias -enhance %s %s' | 116 | self.convert = 'gm convert -antialias -enhance %s %s' |
| 115 | 117 | ||
| 118 | def __del__(self): | ||
| 119 | ''' | ||
| 120 | Remove the temp dir | ||
| 121 | ''' | ||
| 122 | shutil.rmtree(self.tempdir) | ||
| 123 | |||
| 116 | def remove_all(self): | 124 | def remove_all(self): |
| 117 | ''' | 125 | ''' |
| 118 | Remove all the meta fields that are compromizing | 126 | Remove all the meta fields that are compromizing |
| @@ -133,19 +141,18 @@ class PdfStripper(parser.Generic_parser): | |||
| 133 | Transform each pages into a jpg, clean them, | 141 | Transform each pages into a jpg, clean them, |
| 134 | then re-assemble them into a new pdf | 142 | then re-assemble them into a new pdf |
| 135 | ''' | 143 | ''' |
| 136 | _, self.tmpdir = tempfile.mkstemp() | 144 | subprocess.call(self.convert % (self.filename, self.tempdir + |
| 137 | subprocess.call(self.convert % (self.filename, self.tmpdir + | ||
| 138 | 'temp.jpg'), shell=True) # Convert pages to jpg | 145 | 'temp.jpg'), shell=True) # Convert pages to jpg |
| 139 | 146 | ||
| 140 | for current_file in glob.glob(self.tmpdir + 'temp*'): | 147 | for current_file in glob.glob(self.tempdir + 'temp*'): |
| 141 | #Clean every jpg image | 148 | #Clean every jpg image |
| 142 | class_file = mat.create_class_file(current_file, False) | 149 | class_file = mat.create_class_file(current_file, False, False) |
| 143 | class_file.remove_all() | 150 | class_file.remove_all() |
| 144 | 151 | ||
| 145 | subprocess.call(self.convert % (self.tmpdir + | 152 | subprocess.call(self.convert % (self.tempdir + |
| 146 | 'temp.jpg*', self.output), shell=True) # Assemble jpg into pdf | 153 | 'temp.jpg*', self.output), shell=True) # Assemble jpg into pdf |
| 147 | 154 | ||
| 148 | for current_file in glob.glob(self.tmpdir + 'temp*'): | 155 | for current_file in glob.glob(self.tempdir + 'temp*'): |
| 149 | #remove jpg files | 156 | #remove jpg files |
| 150 | mat.secure_remove(current_file) | 157 | mat.secure_remove(current_file) |
| 151 | 158 | ||
| @@ -155,7 +162,7 @@ class PdfStripper(parser.Generic_parser): | |||
| 155 | name = self.realname | 162 | name = self.realname |
| 156 | else: | 163 | else: |
| 157 | name = self.output | 164 | name = self.output |
| 158 | class_file = mat.create_class_file(name, False) | 165 | class_file = mat.create_class_file(name, False, False) |
| 159 | class_file.remove_all() | 166 | class_file.remove_all() |
| 160 | 167 | ||
| 161 | def is_clean(self): | 168 | def is_clean(self): |
