diff options
| author | jvoisin | 2011-08-05 11:42:40 +0200 |
|---|---|---|
| committer | jvoisin | 2011-08-05 11:42:40 +0200 |
| commit | 503e926812d35032ed527c81e78444f362a5d527 (patch) | |
| tree | 599cd3940e3883b8fc275fb924d5f2ca0ae34a07 /lib | |
| parent | 943b0faf21883089827d976c48a231293c091640 (diff) | |
Support of openxml office format
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/archive.py | 2 | ||||
| -rw-r--r-- | lib/office.py | 43 |
2 files changed, 44 insertions, 1 deletions
diff --git a/lib/archive.py b/lib/archive.py index 5956a1e..fb15705 100644 --- a/lib/archive.py +++ b/lib/archive.py | |||
| @@ -97,7 +97,7 @@ harmless format' % item.filename) | |||
| 97 | _, ext = os.path.splitext(name) | 97 | _, ext = os.path.splitext(name) |
| 98 | bname = os.path.basename(item.filename) | 98 | bname = os.path.basename(item.filename) |
| 99 | if ext not in parser.NOMETA: | 99 | if ext not in parser.NOMETA: |
| 100 | if bname != 'mimetype': | 100 | if bname != 'mimetype' and bname != '.rels': |
| 101 | return False | 101 | return False |
| 102 | mat.secure_remove(name) | 102 | mat.secure_remove(name) |
| 103 | zipin.close() | 103 | zipin.close() |
diff --git a/lib/office.py b/lib/office.py index 6fdcf2d..b7c607f 100644 --- a/lib/office.py +++ b/lib/office.py | |||
| @@ -188,6 +188,49 @@ class OpenXmlStripper(archive.GenericArchiveStripper): | |||
| 188 | It contains mostly xml, but can have media blobs, crap, ... | 188 | It contains mostly xml, but can have media blobs, crap, ... |
| 189 | (I don't like this format.) | 189 | (I don't like this format.) |
| 190 | ''' | 190 | ''' |
| 191 | def _remove_all(self, method): | ||
| 192 | ''' | ||
| 193 | FIXME ? | ||
| 194 | There is a patch implementing the Zipfile.remove() | ||
| 195 | method here : http://bugs.python.org/issue6818 | ||
| 196 | ''' | ||
| 197 | zipin = zipfile.ZipFile(self.filename, 'r') | ||
| 198 | zipout = zipfile.ZipFile(self.output, 'w', | ||
| 199 | allowZip64=True) | ||
| 200 | for item in zipin.namelist(): | ||
| 201 | name = os.path.join(self.tempdir, item) | ||
| 202 | _, ext = os.path.splitext(name) | ||
| 203 | if item.startswith('docProps/'): # metadatas | ||
| 204 | pass | ||
| 205 | elif ext in parser.NOMETA or item == '.rels': | ||
| 206 | #keep parser.NOMETA files, and the file named ".rels" | ||
| 207 | zipin.extract(item, self.tempdir) | ||
| 208 | zipout.write(name, item) | ||
| 209 | mat.secure_remove(name) | ||
| 210 | else: | ||
| 211 | zipin.extract(item, self.tempdir) | ||
| 212 | if os.path.isfile(name): | ||
| 213 | try: | ||
| 214 | cfile = mat.create_class_file(name, False, | ||
| 215 | self.add2archive) | ||
| 216 | if method == 'normal': | ||
| 217 | cfile.remove_all() | ||
| 218 | else: | ||
| 219 | cfile.remove_all_ugly() | ||
| 220 | logging.debug('Processing %s from %s' % (item, | ||
| 221 | self.filename)) | ||
| 222 | zipout.write(name, item) | ||
| 223 | except: | ||
| 224 | logging.info('%s\' fileformat is not supported' % item) | ||
| 225 | if self.add2archive: | ||
| 226 | zipout.write(name, item) | ||
| 227 | mat.secure_remove(name) | ||
| 228 | zipout.comment = '' | ||
| 229 | logging.info('%s treated' % self.filename) | ||
| 230 | zipin.close() | ||
| 231 | zipout.close() | ||
| 232 | self.do_backup() | ||
| 233 | |||
| 191 | def is_clean(self): | 234 | def is_clean(self): |
| 192 | ''' | 235 | ''' |
| 193 | Check if the file is clean from harmful metadatas | 236 | Check if the file is clean from harmful metadatas |
