summaryrefslogtreecommitdiff
path: root/libmat2
diff options
context:
space:
mode:
Diffstat (limited to 'libmat2')
-rw-r--r--libmat2/office.py28
1 files changed, 11 insertions, 17 deletions
diff --git a/libmat2/office.py b/libmat2/office.py
index dfad3b3..0c9caa8 100644
--- a/libmat2/office.py
+++ b/libmat2/office.py
@@ -2,7 +2,7 @@ import logging
2import os 2import os
3import re 3import re
4import zipfile 4import zipfile
5from typing import Dict, Set, Pattern, Tuple, Union, Any 5from typing import Dict, Set, Pattern, Tuple, Any
6 6
7import xml.etree.ElementTree as ET # type: ignore 7import xml.etree.ElementTree as ET # type: ignore
8 8
@@ -375,23 +375,17 @@ class LibreOfficeParser(ArchiveBasedAbstractParser):
375 return False 375 return False
376 return True 376 return True
377 377
378 def get_meta(self) -> Dict[str, Union[str, dict]]: 378 def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]:
379 """ 379 """
380 Yes, I know that parsing xml with regexp ain't pretty, 380 Yes, I know that parsing xml with regexp ain't pretty,
381 be my guest and fix it if you want. 381 be my guest and fix it if you want.
382 """ 382 """
383 metadata = {} 383 if file_path != 'meta.xml':
384 zipin = zipfile.ZipFile(self.filename) 384 return {}
385 for item in zipin.infolist(): 385 with open(full_path, encoding='utf-8') as f:
386 if item.filename == 'meta.xml': 386 try:
387 try: 387 results = re.findall(r"<((?:meta|dc|cp).+?)>(.+)</\1>", f.read(), re.I|re.M)
388 content = zipin.read(item).decode('utf-8') 388 return {k:v for (k, v) in results}
389 results = re.findall(r"<((?:meta|dc|cp).+?)>(.+)</\1>", content, re.I|re.M) 389 except (TypeError, UnicodeDecodeError): # We didn't manage to parse the xml file
390 for (key, value) in results: 390 # We didn't manage to parse the xml file
391 metadata[key] = value 391 return {file_path: 'harmful content', }
392 except (TypeError, UnicodeDecodeError): # We didn't manage to parse the xml file
393 metadata[item.filename] = 'harmful content'
394 for key, value in self._get_zipinfo_meta(item).items():
395 metadata[key] = value
396 zipin.close()
397 return metadata