summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/pdf.py11
-rw-r--r--tests/test_libmat2.py4
2 files changed, 15 insertions, 0 deletions
diff --git a/src/pdf.py b/src/pdf.py
index 96eec13..c119449 100644
--- a/src/pdf.py
+++ b/src/pdf.py
@@ -3,6 +3,7 @@
3""" 3"""
4 4
5import os 5import os
6import re
6import logging 7import logging
7import tempfile 8import tempfile
8import io 9import io
@@ -76,6 +77,13 @@ class PDFParser(abstract.AbstractParser):
76 77
77 return True 78 return True
78 79
80
81 def __parse_metadata_field(self, data:str) -> dict:
82 metadata = {}
83 for (_, key, value) in re.findall(r"<(xmp|pdfx|pdf|xmpMM):(.+)>(.+)</\1:\2>", data, re.I):
84 metadata[key] = value
85 return metadata
86
79 def get_meta(self): 87 def get_meta(self):
80 """ Return a dict with all the meta of the file 88 """ Return a dict with all the meta of the file
81 """ 89 """
@@ -84,4 +92,7 @@ class PDFParser(abstract.AbstractParser):
84 for key in self.meta_list: 92 for key in self.meta_list:
85 if document.get_property(key): 93 if document.get_property(key):
86 metadata[key] = document.get_property(key) 94 metadata[key] = document.get_property(key)
95 if 'metadata' in metadata:
96 parsed_meta = self.__parse_metadata_field(metadata['metadata'])
97 return {**metadata, **parsed_meta}
87 return metadata 98 return metadata
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py
index 4cfb80a..6141dbe 100644
--- a/tests/test_libmat2.py
+++ b/tests/test_libmat2.py
@@ -23,6 +23,10 @@ class TestGetMeta(unittest.TestCase):
23 meta = p.get_meta() 23 meta = p.get_meta()
24 self.assertEqual(meta['producer'], 'pdfTeX-1.40.14') 24 self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
25 self.assertEqual(meta['creator'], "'Certified by IEEE PDFeXpress at 03/19/2016 2:56:07 AM'") 25 self.assertEqual(meta['creator'], "'Certified by IEEE PDFeXpress at 03/19/2016 2:56:07 AM'")
26 self.assertEqual(meta['DocumentID'], "uuid:4a1a79c8-404e-4d38-9580-5bc081036e61")
27 self.assertEqual(meta['PTEX.Fullbanner'], "This is pdfTeX, Version " \
28 "3.1415926-2.5-1.40.14 (TeX Live 2013/Debian) kpathsea " \
29 "version 6.1.1")
26 30
27 def test_png(self): 31 def test_png(self):
28 p = images.PNGParser('./tests/data/dirty.png') 32 p = images.PNGParser('./tests/data/dirty.png')