summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libmat2/__init__.py1
-rw-r--r--libmat2/parsers/__init__.py0
-rw-r--r--libmat2/parsers/abstract.py10
-rw-r--r--libmat2/parsers/pdf.py105
4 files changed, 0 insertions, 116 deletions
diff --git a/libmat2/__init__.py b/libmat2/__init__.py
deleted file mode 100644
index 3b3dacb..0000000
--- a/libmat2/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
1__version__ = '2.0'
diff --git a/libmat2/parsers/__init__.py b/libmat2/parsers/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/libmat2/parsers/__init__.py
+++ /dev/null
diff --git a/libmat2/parsers/abstract.py b/libmat2/parsers/abstract.py
deleted file mode 100644
index a9129cc..0000000
--- a/libmat2/parsers/abstract.py
+++ /dev/null
@@ -1,10 +0,0 @@
1class AbstractParser(object):
2 def __init__(self, filename: str):
3 self.filename = filename
4 self.meta_list = set()
5
6 def get_meta(self):
7 raise NotImplementedError
8
9 def remove_all(self):
10 raise NotImplementedError
diff --git a/libmat2/parsers/pdf.py b/libmat2/parsers/pdf.py
deleted file mode 100644
index f6bc110..0000000
--- a/libmat2/parsers/pdf.py
+++ /dev/null
@@ -1,105 +0,0 @@
1""" Handle PDF
2
3"""
4
5import os
6import logging
7import tempfile
8import shutil
9import io
10
11import cairo
12import gi
13gi.require_version('Poppler', '0.18')
14from gi.repository import Poppler, Gio, GLib
15
16try:
17 from PIL import Image
18except ImportError:
19 Image = None
20
21from . import abstract
22
23logging.basicConfig(level=logging.DEBUG)
24
25
26class PDFParser(abstract.AbstractParser):
27 def __init__(self, filename):
28 super().__init__(filename)
29 self.meta_list = {'title', 'author', 'subject',
30 'keywords', 'creator', 'producer', 'metadata'}
31 self.uri = 'file://' + os.path.abspath(self.filename)
32 self.password = None
33
34 def remove_all(self):
35 """
36 Load the document into Poppler, render pages on PNG,
37 and shove those PNG into a new PDF. Metadata from the new
38 PDF are removed via Poppler, because there is no way to tell
39 cairo to not add "created by cairo" during rendering.
40
41 TODO: Improve the resolution
42 TODO: Don't use a temp file
43 """
44 document = Poppler.Document.new_from_file(self.uri, self.password)
45
46 pdf_out = io.BytesIO()
47 pdf_surface = cairo.PDFSurface(pdf_out, 128, 128)
48 pdf_context = cairo.Context(pdf_surface)
49
50 for pagenum in range(document.get_n_pages()):
51 page = document.get_page(pagenum)
52 page_width, page_height = page.get_size()
53 logging.info("Rendering page %d/%d", pagenum + 1, document.get_n_pages())
54
55 img_surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, int(page_width)*2, int(page_height)*2)
56 img_context = cairo.Context(img_surface)
57
58 img_context.scale(2, 2)
59 page.render_for_printing_with_options(img_context, Poppler.PrintFlags.DOCUMENT)
60 img_context.show_page()
61
62 buf = io.BytesIO()
63 img_surface.write_to_png(buf)
64 img_surface.finish()
65 buf.seek(0)
66
67 img = cairo.ImageSurface.create_from_png(buf)
68 pdf_surface.set_size(page_width*2, page_height*2)
69 pdf_context.set_source_surface(img, 0, 0)
70 pdf_context.paint()
71 pdf_context.show_page()
72
73 pdf_surface.finish()
74
75 b = GLib.Bytes(pdf_out.getvalue())
76 input_stream = Gio.MemoryInputStream.new_from_bytes(b)
77 out_document = Poppler.Document.new_from_stream(input_stream, -1, self.password, None)
78 metadata = {}
79 for key in self.meta_list:
80 if out_document.get_property(key):
81 metadata[key] = str(out_document.get_property(key))
82 out_document.set_producer('totally not MAT2 ;)')
83 out_document.set_creator('')
84 print("AFTER")
85 metadata = {}
86 for key in self.meta_list:
87 if out_document.get_property(key):
88 metadata[key] = str(out_document.get_property(key))
89 print("LOL")
90 out_document.save('file://' + os.path.abspath("olol.pdf"))
91
92 print(metadata)
93
94 return True
95
96 def get_meta(self):
97 """ Return a dict with all the meta of the file
98 """
99 print("URI: %s", self.uri)
100 document = Poppler.Document.new_from_file(self.uri, self.password)
101 metadata = {}
102 for key in self.meta_list:
103 if document.get_property(key):
104 metadata[key] = str(document.get_property(key))
105 return metadata