summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjvoisin2011-07-26 04:39:17 +0200
committerjvoisin2011-07-26 04:39:17 +0200
commit342cbf4bd69e638f62319ea0e6660af5dc717131 (patch)
tree442b57f29573d1400cf255322393e8a44d246e7e
parentd2f657c2279ee19e7d818476f6ee4213f32892c5 (diff)
Full support of opendocument format
-rw-r--r--lib/office.py68
1 files changed, 20 insertions, 48 deletions
diff --git a/lib/office.py b/lib/office.py
index 9a5be3e..b7fa555 100644
--- a/lib/office.py
+++ b/lib/office.py
@@ -6,6 +6,8 @@ import glob
6import logging 6import logging
7import zipfile 7import zipfile
8import shutil 8import shutil
9import re
10from xml.etree import ElementTree
9 11
10import hachoir_core 12import hachoir_core
11 13
@@ -20,6 +22,22 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
20 The one that interest us is meta.xml 22 The one that interest us is meta.xml
21 ''' 23 '''
22 24
25 def get_meta(self):
26 zipin = zipfile.ZipFile(self.filename, 'r')
27 metadata = {}
28 try:
29 content = zipin.read('meta.xml')
30 zipin.close()
31 tree = ElementTree.fromstring(content)
32 for node in tree.iter():
33 key = re.sub('{.*}', '', node.tag)
34 metadata[key] = node.text
35 except KeyError:#no meta.xml file found
36 logging.debug('%s has no opendocument metadata' % self.filename)
37 metadata[self.filename] = ''
38 return metadata
39
40
23 def _remove_all(self, method): 41 def _remove_all(self, method):
24 ''' 42 '''
25 FIXME ? 43 FIXME ?
@@ -56,8 +74,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
56 self.filename)) 74 self.filename))
57 zipout.write(name, item) 75 zipout.write(name, item)
58 except: 76 except:
59 logging.info('%s\' fileformat is not supported' % 77 logging.info('%s\' fileformat is not supported' % item)
60 item)
61 if self.add2archive: 78 if self.add2archive:
62 zipout.write(item, name) 79 zipout.write(item, name)
63 mat.secure_remove(name) 80 mat.secure_remove(name)
@@ -71,6 +88,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
71 zipin = zipfile.ZipFile(self.filename, 'r') 88 zipin = zipfile.ZipFile(self.filename, 'r')
72 try: 89 try:
73 zipin.getinfo('meta.xml') 90 zipin.getinfo('meta.xml')
91 return False
74 except KeyError:#no meta.xml in the file 92 except KeyError:#no meta.xml in the file
75 zipin.close() 93 zipin.close()
76 czf = archive.ZipStripper(self.realname, self.filename, 94 czf = archive.ZipStripper(self.realname, self.filename,
@@ -79,54 +97,8 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
79 return True 97 return True
80 else: 98 else:
81 return False 99 return False
82 return False
83
84
85class TorrentStripper(parser.Generic_parser):
86 '''
87 A torrent file looks like:
88 -root
89 -start
90 -announce
91 -announce-list
92 -comment
93 -created_by
94 -creation_date
95 -encoding
96 -info
97 -end
98 '''
99 def remove_all(self):
100 for field in self.editor['root']:
101 if self._should_remove(field):
102 #FIXME : hachoir does not support torrent metadata editing :<
103 del self.editor['/root/' + field.name]
104 hachoir_core.field.writeIntoFile(self.editor,
105 self.filename + parser.POSTFIX)
106 self.do_backup()
107
108 def is_clean(self):
109 for field in self.editor['root']:
110 if self._should_remove(field):
111 return False
112 return True 100 return True
113 101
114 def get_meta(self):
115 metadata = {}
116 for field in self.editor['root']:
117 if self._should_remove(field):
118 try:#FIXME
119 metadata[field.name] = field.value
120 except:
121 metadata[field.name] = 'harmful content'
122 return metadata
123
124 def _should_remove(self, field):
125 if field.name in ('comment', 'created_by', 'creation_date', 'info'):
126 return True
127 else:
128 return False
129
130 102
131class PdfStripper(parser.Generic_parser): 103class PdfStripper(parser.Generic_parser):
132 ''' 104 '''