diff options
| author | jvoisin | 2011-12-23 19:51:22 +0100 |
|---|---|---|
| committer | jvoisin | 2011-12-23 19:51:22 +0100 |
| commit | 6225574709d26422101963d6c344c777fa772ace (patch) | |
| tree | d762d1a2a805b8ed9e75754a24b69064379d02a8 | |
| parent | a068cca895b8a92b29e666cf75658fceb5a8d0a2 (diff) | |
Proper support of opendocument metadata
| -rw-r--r-- | mat/office.py | 15 |
1 files changed, 9 insertions, 6 deletions
diff --git a/mat/office.py b/mat/office.py index 331d9bf..e3febba 100644 --- a/mat/office.py +++ b/mat/office.py | |||
| @@ -35,12 +35,15 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): | |||
| 35 | try: | 35 | try: |
| 36 | content = zipin.read('meta.xml') | 36 | content = zipin.read('meta.xml') |
| 37 | dom1 = minidom.parseString(content) | 37 | dom1 = minidom.parseString(content) |
| 38 | a = dom1.getElementsByTagName('office:meta') | 38 | elements = dom1.getElementsByTagName('office:meta') |
| 39 | for i in a[0].childNodes: | 39 | for i in elements[0].childNodes: |
| 40 | msg = '' | 40 | if i.tagName != 'meta:document-statistic': |
| 41 | for j in i.childNodes: | 41 | nodename = ''.join([k for k in i.nodeName.split(':')[1:]]) |
| 42 | msg += j.data | 42 | metadata[nodename] = ''.join([j.data for j in i.childNodes]) |
| 43 | metadata[i.tagName] = msg | 43 | else: |
| 44 | # thank you w3c for not providing a nice | ||
| 45 | # method to get all attributes from a node | ||
| 46 | pass | ||
| 44 | zipin.close() | 47 | zipin.close() |
| 45 | except KeyError: # no meta.xml file found | 48 | except KeyError: # no meta.xml file found |
| 46 | logging.debug('%s has no opendocument metadata' % self.filename) | 49 | logging.debug('%s has no opendocument metadata' % self.filename) |
