summaryrefslogtreecommitdiff
path: root/libmat2
diff options
context:
space:
mode:
authorjvoisin2018-03-18 21:42:12 +0100
committerjvoisin2018-03-18 21:42:12 +0100
commitdf3c27d79dec231809deb4e617070a16858c306d (patch)
tree7d7d565e989faea26f0da8ea727bf7c2b2897abd /libmat2
parent069765376d5a5a1123901a997581c559b4a461f6 (diff)
Improve the testsuite
Diffstat (limited to 'libmat2')
-rw-r--r--libmat2/__init__.py1
-rw-r--r--libmat2/parsers/__init__.py0
-rw-r--r--libmat2/parsers/abstract.py10
-rw-r--r--libmat2/parsers/pdf.py105
4 files changed, 116 insertions, 0 deletions
diff --git a/libmat2/__init__.py b/libmat2/__init__.py
new file mode 100644
index 0000000..3b3dacb
--- /dev/null
+++ b/libmat2/__init__.py
@@ -0,0 +1 @@
__version__ = '2.0'
diff --git a/libmat2/parsers/__init__.py b/libmat2/parsers/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/libmat2/parsers/__init__.py
diff --git a/libmat2/parsers/abstract.py b/libmat2/parsers/abstract.py
new file mode 100644
index 0000000..a9129cc
--- /dev/null
+++ b/libmat2/parsers/abstract.py
@@ -0,0 +1,10 @@
1class AbstractParser(object):
2 def __init__(self, filename: str):
3 self.filename = filename
4 self.meta_list = set()
5
6 def get_meta(self):
7 raise NotImplementedError
8
9 def remove_all(self):
10 raise NotImplementedError
diff --git a/libmat2/parsers/pdf.py b/libmat2/parsers/pdf.py
new file mode 100644
index 0000000..f6bc110
--- /dev/null
+++ b/libmat2/parsers/pdf.py
@@ -0,0 +1,105 @@
1""" Handle PDF
2
3"""
4
5import os
6import logging
7import tempfile
8import shutil
9import io
10
11import cairo
12import gi
13gi.require_version('Poppler', '0.18')
14from gi.repository import Poppler, Gio, GLib
15
16try:
17 from PIL import Image
18except ImportError:
19 Image = None
20
21from . import abstract
22
23logging.basicConfig(level=logging.DEBUG)
24
25
26class PDFParser(abstract.AbstractParser):
27 def __init__(self, filename):
28 super().__init__(filename)
29 self.meta_list = {'title', 'author', 'subject',
30 'keywords', 'creator', 'producer', 'metadata'}
31 self.uri = 'file://' + os.path.abspath(self.filename)
32 self.password = None
33
34 def remove_all(self):
35 """
36 Load the document into Poppler, render pages on PNG,
37 and shove those PNG into a new PDF. Metadata from the new
38 PDF are removed via Poppler, because there is no way to tell
39 cairo to not add "created by cairo" during rendering.
40
41 TODO: Improve the resolution
42 TODO: Don't use a temp file
43 """
44 document = Poppler.Document.new_from_file(self.uri, self.password)
45
46 pdf_out = io.BytesIO()
47 pdf_surface = cairo.PDFSurface(pdf_out, 128, 128)
48 pdf_context = cairo.Context(pdf_surface)
49
50 for pagenum in range(document.get_n_pages()):
51 page = document.get_page(pagenum)
52 page_width, page_height = page.get_size()
53 logging.info("Rendering page %d/%d", pagenum + 1, document.get_n_pages())
54
55 img_surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, int(page_width)*2, int(page_height)*2)
56 img_context = cairo.Context(img_surface)
57
58 img_context.scale(2, 2)
59 page.render_for_printing_with_options(img_context, Poppler.PrintFlags.DOCUMENT)
60 img_context.show_page()
61
62 buf = io.BytesIO()
63 img_surface.write_to_png(buf)
64 img_surface.finish()
65 buf.seek(0)
66
67 img = cairo.ImageSurface.create_from_png(buf)
68 pdf_surface.set_size(page_width*2, page_height*2)
69 pdf_context.set_source_surface(img, 0, 0)
70 pdf_context.paint()
71 pdf_context.show_page()
72
73 pdf_surface.finish()
74
75 b = GLib.Bytes(pdf_out.getvalue())
76 input_stream = Gio.MemoryInputStream.new_from_bytes(b)
77 out_document = Poppler.Document.new_from_stream(input_stream, -1, self.password, None)
78 metadata = {}
79 for key in self.meta_list:
80 if out_document.get_property(key):
81 metadata[key] = str(out_document.get_property(key))
82 out_document.set_producer('totally not MAT2 ;)')
83 out_document.set_creator('')
84 print("AFTER")
85 metadata = {}
86 for key in self.meta_list:
87 if out_document.get_property(key):
88 metadata[key] = str(out_document.get_property(key))
89 print("LOL")
90 out_document.save('file://' + os.path.abspath("olol.pdf"))
91
92 print(metadata)
93
94 return True
95
96 def get_meta(self):
97 """ Return a dict with all the meta of the file
98 """
99 print("URI: %s", self.uri)
100 document = Poppler.Document.new_from_file(self.uri, self.password)
101 metadata = {}
102 for key in self.meta_list:
103 if document.get_property(key):
104 metadata[key] = str(document.get_property(key))
105 return metadata