summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libmat2/office.py16
1 files changed, 12 insertions, 4 deletions
diff --git a/libmat2/office.py b/libmat2/office.py
index 914fd39..6ab7e80 100644
--- a/libmat2/office.py
+++ b/libmat2/office.py
@@ -78,8 +78,12 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
78 for item in zipin.infolist(): 78 for item in zipin.infolist():
79 if item.filename.startswith('docProps/') and item.filename.endswith('.xml'): 79 if item.filename.startswith('docProps/') and item.filename.endswith('.xml'):
80 content = zipin.read(item).decode('utf-8') 80 content = zipin.read(item).decode('utf-8')
81 for (key, value) in re.findall(r"<(.+)>(.+)</\1>", content, re.I): 81 try:
82 metadata[key] = value 82 results = re.findall(r"<(.+)>(.+)</\1>", content, re.I|re.M)
83 for (key, value) in results:
84 metadata[key] = value
85 except TypeError: # We didn't manage to parse the xml file
86 pass
83 if not metadata: # better safe than sorry 87 if not metadata: # better safe than sorry
84 metadata[item] = 'harmful content' 88 metadata[item] = 'harmful content'
85 89
@@ -140,8 +144,12 @@ class LibreOfficeParser(ArchiveBasedAbstractParser):
140 for item in zipin.infolist(): 144 for item in zipin.infolist():
141 if item.filename == 'meta.xml': 145 if item.filename == 'meta.xml':
142 content = zipin.read(item).decode('utf-8') 146 content = zipin.read(item).decode('utf-8')
143 for (key, value) in re.findall(r"<((?:meta|dc|cp).+?)>(.+)</\1>", content, re.I): 147 try:
144 metadata[key] = value 148 results = re.findall(r"<((?:meta|dc|cp).+?)>(.+)</\1>", content, re.I|re.M)
149 for (key, value) in results:
150 metadata[key] = value
151 except TypeError: # We didn't manage to parse the xml file
152 pass
145 if not metadata: # better safe than sorry 153 if not metadata: # better safe than sorry
146 metadata[item] = 'harmful content' 154 metadata[item] = 'harmful content'
147 for key, value in self._get_zipinfo_meta(item).items(): 155 for key, value in self._get_zipinfo_meta(item).items():