diff options
| author | jvoisin | 2011-07-26 04:39:17 +0200 |
|---|---|---|
| committer | jvoisin | 2011-07-26 04:39:17 +0200 |
| commit | 342cbf4bd69e638f62319ea0e6660af5dc717131 (patch) | |
| tree | 442b57f29573d1400cf255322393e8a44d246e7e | |
| parent | d2f657c2279ee19e7d818476f6ee4213f32892c5 (diff) | |
Full support of opendocument format
| -rw-r--r-- | lib/office.py | 68 |
1 files changed, 20 insertions, 48 deletions
diff --git a/lib/office.py b/lib/office.py index 9a5be3e..b7fa555 100644 --- a/lib/office.py +++ b/lib/office.py | |||
| @@ -6,6 +6,8 @@ import glob | |||
| 6 | import logging | 6 | import logging |
| 7 | import zipfile | 7 | import zipfile |
| 8 | import shutil | 8 | import shutil |
| 9 | import re | ||
| 10 | from xml.etree import ElementTree | ||
| 9 | 11 | ||
| 10 | import hachoir_core | 12 | import hachoir_core |
| 11 | 13 | ||
| @@ -20,6 +22,22 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): | |||
| 20 | The one that interest us is meta.xml | 22 | The one that interest us is meta.xml |
| 21 | ''' | 23 | ''' |
| 22 | 24 | ||
| 25 | def get_meta(self): | ||
| 26 | zipin = zipfile.ZipFile(self.filename, 'r') | ||
| 27 | metadata = {} | ||
| 28 | try: | ||
| 29 | content = zipin.read('meta.xml') | ||
| 30 | zipin.close() | ||
| 31 | tree = ElementTree.fromstring(content) | ||
| 32 | for node in tree.iter(): | ||
| 33 | key = re.sub('{.*}', '', node.tag) | ||
| 34 | metadata[key] = node.text | ||
| 35 | except KeyError:#no meta.xml file found | ||
| 36 | logging.debug('%s has no opendocument metadata' % self.filename) | ||
| 37 | metadata[self.filename] = '' | ||
| 38 | return metadata | ||
| 39 | |||
| 40 | |||
| 23 | def _remove_all(self, method): | 41 | def _remove_all(self, method): |
| 24 | ''' | 42 | ''' |
| 25 | FIXME ? | 43 | FIXME ? |
| @@ -56,8 +74,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): | |||
| 56 | self.filename)) | 74 | self.filename)) |
| 57 | zipout.write(name, item) | 75 | zipout.write(name, item) |
| 58 | except: | 76 | except: |
| 59 | logging.info('%s\' fileformat is not supported' % | 77 | logging.info('%s\' fileformat is not supported' % item) |
| 60 | item) | ||
| 61 | if self.add2archive: | 78 | if self.add2archive: |
| 62 | zipout.write(item, name) | 79 | zipout.write(item, name) |
| 63 | mat.secure_remove(name) | 80 | mat.secure_remove(name) |
| @@ -71,6 +88,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): | |||
| 71 | zipin = zipfile.ZipFile(self.filename, 'r') | 88 | zipin = zipfile.ZipFile(self.filename, 'r') |
| 72 | try: | 89 | try: |
| 73 | zipin.getinfo('meta.xml') | 90 | zipin.getinfo('meta.xml') |
| 91 | return False | ||
| 74 | except KeyError:#no meta.xml in the file | 92 | except KeyError:#no meta.xml in the file |
| 75 | zipin.close() | 93 | zipin.close() |
| 76 | czf = archive.ZipStripper(self.realname, self.filename, | 94 | czf = archive.ZipStripper(self.realname, self.filename, |
| @@ -79,54 +97,8 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): | |||
| 79 | return True | 97 | return True |
| 80 | else: | 98 | else: |
| 81 | return False | 99 | return False |
| 82 | return False | ||
| 83 | |||
| 84 | |||
| 85 | class TorrentStripper(parser.Generic_parser): | ||
| 86 | ''' | ||
| 87 | A torrent file looks like: | ||
| 88 | -root | ||
| 89 | -start | ||
| 90 | -announce | ||
| 91 | -announce-list | ||
| 92 | -comment | ||
| 93 | -created_by | ||
| 94 | -creation_date | ||
| 95 | -encoding | ||
| 96 | -info | ||
| 97 | -end | ||
| 98 | ''' | ||
| 99 | def remove_all(self): | ||
| 100 | for field in self.editor['root']: | ||
| 101 | if self._should_remove(field): | ||
| 102 | #FIXME : hachoir does not support torrent metadata editing :< | ||
| 103 | del self.editor['/root/' + field.name] | ||
| 104 | hachoir_core.field.writeIntoFile(self.editor, | ||
| 105 | self.filename + parser.POSTFIX) | ||
| 106 | self.do_backup() | ||
| 107 | |||
| 108 | def is_clean(self): | ||
| 109 | for field in self.editor['root']: | ||
| 110 | if self._should_remove(field): | ||
| 111 | return False | ||
| 112 | return True | 100 | return True |
| 113 | 101 | ||
| 114 | def get_meta(self): | ||
| 115 | metadata = {} | ||
| 116 | for field in self.editor['root']: | ||
| 117 | if self._should_remove(field): | ||
| 118 | try:#FIXME | ||
| 119 | metadata[field.name] = field.value | ||
| 120 | except: | ||
| 121 | metadata[field.name] = 'harmful content' | ||
| 122 | return metadata | ||
| 123 | |||
| 124 | def _should_remove(self, field): | ||
| 125 | if field.name in ('comment', 'created_by', 'creation_date', 'info'): | ||
| 126 | return True | ||
| 127 | else: | ||
| 128 | return False | ||
| 129 | |||
| 130 | 102 | ||
| 131 | class PdfStripper(parser.Generic_parser): | 103 | class PdfStripper(parser.Generic_parser): |
| 132 | ''' | 104 | ''' |
