diff options
| author | jvoisin | 2011-07-24 02:30:50 +0200 |
|---|---|---|
| committer | jvoisin | 2011-07-24 02:30:50 +0200 |
| commit | ace3d8213921a9308d30afc057fc21221420e12e (patch) | |
| tree | be506ef8b0534127ad080f7d1ad7d4ca9a020a67 /lib/office.py | |
| parent | bcc0ad2e7491c212ef35ca250fb8c5f2c53572da (diff) | |
First implementation of open document format
Diffstat (limited to 'lib/office.py')
| -rw-r--r-- | lib/office.py | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/lib/office.py b/lib/office.py index de38129..5d62732 100644 --- a/lib/office.py +++ b/lib/office.py | |||
| @@ -3,12 +3,98 @@ import mimetypes | |||
| 3 | import subprocess | 3 | import subprocess |
| 4 | import tempfile | 4 | import tempfile |
| 5 | import glob | 5 | import glob |
| 6 | import logging | ||
| 7 | import zipfile | ||
| 8 | import shutil | ||
| 6 | 9 | ||
| 7 | import hachoir_core | 10 | import hachoir_core |
| 8 | 11 | ||
| 9 | import pdfrw | 12 | import pdfrw |
| 10 | import mat | 13 | import mat |
| 11 | import parser | 14 | import parser |
| 15 | import archive | ||
| 16 | |||
| 17 | class OpenDocumentStripper(archive.GenericArchiveStripper): | ||
| 18 | ''' | ||
| 19 | An open document file is a zip, with xml file into. | ||
| 20 | The one that interest us is meta.xml | ||
| 21 | ''' | ||
| 22 | |||
| 23 | def remove_folder(self, folder_list): | ||
| 24 | for folder in folder_list: | ||
| 25 | dirname = folder.split('/')[0] | ||
| 26 | try: | ||
| 27 | shutil.rmtree(dirname) | ||
| 28 | except:#Some folder or open document format are buggies | ||
| 29 | pass | ||
| 30 | self.folder_list = [] | ||
| 31 | |||
| 32 | def _remove_all(self, method): | ||
| 33 | ''' | ||
| 34 | FIXME ? | ||
| 35 | There is a patch implementing the Zipfile.remove() | ||
| 36 | method here : http://bugs.python.org/issue6818 | ||
| 37 | ''' | ||
| 38 | zipin = zipfile.ZipFile(self.filename, 'r') | ||
| 39 | zipout = zipfile.ZipFile(self.filename + parser.POSTFIX, 'w', | ||
| 40 | allowZip64=True) | ||
| 41 | folder_list = [] | ||
| 42 | for item in zipin.namelist(): | ||
| 43 | if os.path.basename(item) is not item:#add folders to folder_list | ||
| 44 | folder_list.insert(0, os.path.dirname(item)) | ||
| 45 | if item.endswith('.xml') or item.startswith('manifest'): | ||
| 46 | if item != 'meta.xml':#contains the metadata | ||
| 47 | zipin.extract(item) | ||
| 48 | zipout.write(item) | ||
| 49 | mat.secure_remove(item) | ||
| 50 | elif item == 'mimetype': | ||
| 51 | zipin.extract(item) | ||
| 52 | #remove line meta.xml | ||
| 53 | zipout.write(item) | ||
| 54 | mat.secure_remove(item) | ||
| 55 | else: | ||
| 56 | zipin.extract(item) | ||
| 57 | if os.path.isfile(item): | ||
| 58 | try: | ||
| 59 | cfile = mat.create_class_file(item, False, | ||
| 60 | self.add2archive) | ||
| 61 | if method == 'normal': | ||
| 62 | cfile.remove_all() | ||
| 63 | else: | ||
| 64 | cfile.remove_all_ugly() | ||
| 65 | logging.debug('Processing %s from %s' % (item, | ||
| 66 | self.filename)) | ||
| 67 | zipout.write(item) | ||
| 68 | except: | ||
| 69 | logging.info('%s\' fileformat is not supported' % | ||
| 70 | item) | ||
| 71 | if self.add2archive: | ||
| 72 | zipout.write(item) | ||
| 73 | mat.secure_remove(item) | ||
| 74 | zipout.comment = '' | ||
| 75 | logging.info('%s treated' % self.filename) | ||
| 76 | zipin.close() | ||
| 77 | zipout.close() | ||
| 78 | self.remove_folder(folder_list) | ||
| 79 | |||
| 80 | if self.backup is False: | ||
| 81 | mat.secure_remove(self.filename) #remove the old file | ||
| 82 | os.rename(self.filename + parser.POSTFIX, self.filename) | ||
| 83 | |||
| 84 | def is_clean(self): | ||
| 85 | zipin = zipfile.ZipFile(self.filename, 'r') | ||
| 86 | try: | ||
| 87 | zipin.getinfo('meta.xml') | ||
| 88 | except KeyError:#no meta.xml in the file | ||
| 89 | zipin.close() | ||
| 90 | czf = archive.ZipStripper(self.realname, self.filename, | ||
| 91 | self.parser, self.editor, self.backup, self.add2archive) | ||
| 92 | if czf.is_clean(): | ||
| 93 | return True | ||
| 94 | else: | ||
| 95 | return False | ||
| 96 | return False | ||
| 97 | |||
| 12 | 98 | ||
| 13 | class TorrentStripper(parser.Generic_parser): | 99 | class TorrentStripper(parser.Generic_parser): |
| 14 | ''' | 100 | ''' |
