summaryrefslogtreecommitdiff
path: root/src/pdf.py
diff options
context:
space:
mode:
authorjvoisin2018-03-31 15:46:17 +0200
committerjvoisin2018-03-31 15:46:17 +0200
commitf391c9603c36a8ec80942c23ac6ba39fca5df72a (patch)
tree7fdc2053c01f103a675274ebd3e6abcffba4dfbe /src/pdf.py
parent088c3d013ce4515920dea5e0becb98b36afa9a31 (diff)
Change a bit the source code organisation
Diffstat (limited to 'src/pdf.py')
-rw-r--r--src/pdf.py89
1 files changed, 89 insertions, 0 deletions
diff --git a/src/pdf.py b/src/pdf.py
new file mode 100644
index 0000000..90f05e1
--- /dev/null
+++ b/src/pdf.py
@@ -0,0 +1,89 @@
1""" Handle PDF
2
3"""
4
5import os
6import logging
7import tempfile
8import shutil
9import io
10import tempfile
11
12import cairo
13import gi
14gi.require_version('Poppler', '0.18')
15from gi.repository import Poppler
16
17from . import abstract
18
19logging.basicConfig(level=logging.DEBUG)
20
21
22class PDFParser(abstract.AbstractParser):
23 mimetypes = {'application/pdf', }
24 meta_list = {'author', 'creation-date', 'creator', 'format', 'keywords',
25 'metadata', 'mod-date', 'producer', 'subject', 'title',
26 'viewer-preferences'}
27
28 def __init__(self, filename):
29 super().__init__(filename)
30 self.uri = 'file://' + os.path.abspath(self.filename)
31 self.__scale = 2
32
33 def remove_all(self):
34 """
35 Load the document into Poppler, render pages on PNG,
36 and shove those PNG into a new PDF. Metadata from the new
37 PDF are removed via Poppler, because there is no way to tell
38 cairo to not add "created by cairo" during rendering.
39 """
40 document = Poppler.Document.new_from_file(self.uri, None)
41 pages_count = document.get_n_pages()
42
43 _, tmp_path = tempfile.mkstemp()
44 pdf_surface = cairo.PDFSurface(tmp_path, 128, 128)
45 pdf_context = cairo.Context(pdf_surface)
46
47 for pagenum in range(pages_count):
48 page = document.get_page(pagenum)
49 page_width, page_height = page.get_size()
50 logging.info("Rendering page %d/%d", pagenum + 1, pages_count)
51
52 img_surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, int(page_width) * self.__scale, int(page_height) * self.__scale)
53 img_context = cairo.Context(img_surface)
54
55 img_context.scale(self.__scale, self.__scale)
56 page.render_for_printing(img_context)
57 img_context.show_page()
58
59 buf = io.BytesIO()
60 img_surface.write_to_png(buf)
61 img_surface.finish()
62 buf.seek(0)
63
64 img = cairo.ImageSurface.create_from_png(buf)
65 pdf_surface.set_size(page_width*2, page_height*2)
66 pdf_context.set_source_surface(img, 0, 0)
67 pdf_context.paint()
68 pdf_context.show_page()
69
70 pdf_surface.finish()
71
72 # This is removing metadata added by Poppler
73 document = Poppler.Document.new_from_file('file://' + tmp_path)
74 document.set_producer('')
75 document.set_creator('')
76 document.save('file://' + os.path.abspath(self.output_filename))
77 os.remove(tmp_path)
78
79 return True
80
81 def get_meta(self):
82 """ Return a dict with all the meta of the file
83 """
84 document = Poppler.Document.new_from_file(self.uri, None)
85 metadata = {}
86 for key in self.meta_list:
87 if document.get_property(key):
88 metadata[key] = document.get_property(key)
89 return metadata