diff options
| author | jvoisin | 2018-03-19 23:43:49 +0100 |
|---|---|---|
| committer | jvoisin | 2018-03-19 23:43:49 +0100 |
| commit | 8f44616366f9ca395314d59a98840e2912f488df (patch) | |
| tree | 88d3d6fbdd45a6f9e35abf7646e9950933980e3a /src/parsers/pdf.py | |
| parent | d262f780f7653c7e6c5d3b30c5ceedbb25f41787 (diff) | |
Implement mimetype detection
Diffstat (limited to 'src/parsers/pdf.py')
| -rw-r--r-- | src/parsers/pdf.py | 13 |
1 files changed, 7 insertions, 6 deletions
diff --git a/src/parsers/pdf.py b/src/parsers/pdf.py index 26985c6..e7bd00d 100644 --- a/src/parsers/pdf.py +++ b/src/parsers/pdf.py | |||
| @@ -20,13 +20,14 @@ logging.basicConfig(level=logging.DEBUG) | |||
| 20 | 20 | ||
| 21 | 21 | ||
| 22 | class PDFParser(abstract.AbstractParser): | 22 | class PDFParser(abstract.AbstractParser): |
| 23 | mimetypes = {'application/pdf', } | ||
| 24 | meta_list = {'author', 'creation-date', 'creator', 'format', 'keywords', | ||
| 25 | 'metadata', 'mod-date', 'producer', 'subject', 'title', | ||
| 26 | 'viewer-preferences'} | ||
| 27 | |||
| 23 | def __init__(self, filename): | 28 | def __init__(self, filename): |
| 24 | super().__init__(filename) | 29 | super().__init__(filename) |
| 25 | self.meta_list = {'author', 'creation-date', 'creator', 'format', 'keywords', | ||
| 26 | 'metadata', 'mod-date', 'producer', 'subject', 'title', | ||
| 27 | 'viewer-preferences'} | ||
| 28 | self.uri = 'file://' + os.path.abspath(self.filename) | 30 | self.uri = 'file://' + os.path.abspath(self.filename) |
| 29 | self.password = None | ||
| 30 | 31 | ||
| 31 | def remove_all(self): | 32 | def remove_all(self): |
| 32 | """ | 33 | """ |
| @@ -35,7 +36,7 @@ class PDFParser(abstract.AbstractParser): | |||
| 35 | PDF are removed via Poppler, because there is no way to tell | 36 | PDF are removed via Poppler, because there is no way to tell |
| 36 | cairo to not add "created by cairo" during rendering. | 37 | cairo to not add "created by cairo" during rendering. |
| 37 | """ | 38 | """ |
| 38 | document = Poppler.Document.new_from_file(self.uri, self.password) | 39 | document = Poppler.Document.new_from_file(self.uri, None) |
| 39 | pages_count = document.get_n_pages() | 40 | pages_count = document.get_n_pages() |
| 40 | 41 | ||
| 41 | _, tmp_path = tempfile.mkstemp() | 42 | _, tmp_path = tempfile.mkstemp() |
| @@ -80,7 +81,7 @@ class PDFParser(abstract.AbstractParser): | |||
| 80 | """ Return a dict with all the meta of the file | 81 | """ Return a dict with all the meta of the file |
| 81 | """ | 82 | """ |
| 82 | print("URI: %s", self.uri) | 83 | print("URI: %s", self.uri) |
| 83 | document = Poppler.Document.new_from_file(self.uri, self.password) | 84 | document = Poppler.Document.new_from_file(self.uri, None) |
| 84 | metadata = {} | 85 | metadata = {} |
| 85 | for key in self.meta_list: | 86 | for key in self.meta_list: |
| 86 | if document.get_property(key): | 87 | if document.get_property(key): |
