diff options
| author | jvoisin | 2018-03-19 00:04:00 +0100 |
|---|---|---|
| committer | jvoisin | 2018-03-19 00:04:00 +0100 |
| commit | 12e2330ca6b9bc22b245d34789e5603c60992986 (patch) | |
| tree | 9aa46b170a8511bfdc0775625da5256e077a1247 | |
| parent | acb9b2d14e3173029aa44e853662d11efbe4fa5e (diff) | |
Remove some useless files
| -rw-r--r-- | libmat2/__init__.py | 1 | ||||
| -rw-r--r-- | libmat2/parsers/__init__.py | 0 | ||||
| -rw-r--r-- | libmat2/parsers/abstract.py | 10 | ||||
| -rw-r--r-- | libmat2/parsers/pdf.py | 105 |
4 files changed, 0 insertions, 116 deletions
diff --git a/libmat2/__init__.py b/libmat2/__init__.py deleted file mode 100644 index 3b3dacb..0000000 --- a/libmat2/__init__.py +++ /dev/null | |||
| @@ -1 +0,0 @@ | |||
| 1 | __version__ = '2.0' | ||
diff --git a/libmat2/parsers/__init__.py b/libmat2/parsers/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/libmat2/parsers/__init__.py +++ /dev/null | |||
diff --git a/libmat2/parsers/abstract.py b/libmat2/parsers/abstract.py deleted file mode 100644 index a9129cc..0000000 --- a/libmat2/parsers/abstract.py +++ /dev/null | |||
| @@ -1,10 +0,0 @@ | |||
| 1 | class AbstractParser(object): | ||
| 2 | def __init__(self, filename: str): | ||
| 3 | self.filename = filename | ||
| 4 | self.meta_list = set() | ||
| 5 | |||
| 6 | def get_meta(self): | ||
| 7 | raise NotImplementedError | ||
| 8 | |||
| 9 | def remove_all(self): | ||
| 10 | raise NotImplementedError | ||
diff --git a/libmat2/parsers/pdf.py b/libmat2/parsers/pdf.py deleted file mode 100644 index f6bc110..0000000 --- a/libmat2/parsers/pdf.py +++ /dev/null | |||
| @@ -1,105 +0,0 @@ | |||
| 1 | """ Handle PDF | ||
| 2 | |||
| 3 | """ | ||
| 4 | |||
| 5 | import os | ||
| 6 | import logging | ||
| 7 | import tempfile | ||
| 8 | import shutil | ||
| 9 | import io | ||
| 10 | |||
| 11 | import cairo | ||
| 12 | import gi | ||
| 13 | gi.require_version('Poppler', '0.18') | ||
| 14 | from gi.repository import Poppler, Gio, GLib | ||
| 15 | |||
| 16 | try: | ||
| 17 | from PIL import Image | ||
| 18 | except ImportError: | ||
| 19 | Image = None | ||
| 20 | |||
| 21 | from . import abstract | ||
| 22 | |||
| 23 | logging.basicConfig(level=logging.DEBUG) | ||
| 24 | |||
| 25 | |||
| 26 | class PDFParser(abstract.AbstractParser): | ||
| 27 | def __init__(self, filename): | ||
| 28 | super().__init__(filename) | ||
| 29 | self.meta_list = {'title', 'author', 'subject', | ||
| 30 | 'keywords', 'creator', 'producer', 'metadata'} | ||
| 31 | self.uri = 'file://' + os.path.abspath(self.filename) | ||
| 32 | self.password = None | ||
| 33 | |||
| 34 | def remove_all(self): | ||
| 35 | """ | ||
| 36 | Load the document into Poppler, render pages on PNG, | ||
| 37 | and shove those PNG into a new PDF. Metadata from the new | ||
| 38 | PDF are removed via Poppler, because there is no way to tell | ||
| 39 | cairo to not add "created by cairo" during rendering. | ||
| 40 | |||
| 41 | TODO: Improve the resolution | ||
| 42 | TODO: Don't use a temp file | ||
| 43 | """ | ||
| 44 | document = Poppler.Document.new_from_file(self.uri, self.password) | ||
| 45 | |||
| 46 | pdf_out = io.BytesIO() | ||
| 47 | pdf_surface = cairo.PDFSurface(pdf_out, 128, 128) | ||
| 48 | pdf_context = cairo.Context(pdf_surface) | ||
| 49 | |||
| 50 | for pagenum in range(document.get_n_pages()): | ||
| 51 | page = document.get_page(pagenum) | ||
| 52 | page_width, page_height = page.get_size() | ||
| 53 | logging.info("Rendering page %d/%d", pagenum + 1, document.get_n_pages()) | ||
| 54 | |||
| 55 | img_surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, int(page_width)*2, int(page_height)*2) | ||
| 56 | img_context = cairo.Context(img_surface) | ||
| 57 | |||
| 58 | img_context.scale(2, 2) | ||
| 59 | page.render_for_printing_with_options(img_context, Poppler.PrintFlags.DOCUMENT) | ||
| 60 | img_context.show_page() | ||
| 61 | |||
| 62 | buf = io.BytesIO() | ||
| 63 | img_surface.write_to_png(buf) | ||
| 64 | img_surface.finish() | ||
| 65 | buf.seek(0) | ||
| 66 | |||
| 67 | img = cairo.ImageSurface.create_from_png(buf) | ||
| 68 | pdf_surface.set_size(page_width*2, page_height*2) | ||
| 69 | pdf_context.set_source_surface(img, 0, 0) | ||
| 70 | pdf_context.paint() | ||
| 71 | pdf_context.show_page() | ||
| 72 | |||
| 73 | pdf_surface.finish() | ||
| 74 | |||
| 75 | b = GLib.Bytes(pdf_out.getvalue()) | ||
| 76 | input_stream = Gio.MemoryInputStream.new_from_bytes(b) | ||
| 77 | out_document = Poppler.Document.new_from_stream(input_stream, -1, self.password, None) | ||
| 78 | metadata = {} | ||
| 79 | for key in self.meta_list: | ||
| 80 | if out_document.get_property(key): | ||
| 81 | metadata[key] = str(out_document.get_property(key)) | ||
| 82 | out_document.set_producer('totally not MAT2 ;)') | ||
| 83 | out_document.set_creator('') | ||
| 84 | print("AFTER") | ||
| 85 | metadata = {} | ||
| 86 | for key in self.meta_list: | ||
| 87 | if out_document.get_property(key): | ||
| 88 | metadata[key] = str(out_document.get_property(key)) | ||
| 89 | print("LOL") | ||
| 90 | out_document.save('file://' + os.path.abspath("olol.pdf")) | ||
| 91 | |||
| 92 | print(metadata) | ||
| 93 | |||
| 94 | return True | ||
| 95 | |||
| 96 | def get_meta(self): | ||
| 97 | """ Return a dict with all the meta of the file | ||
| 98 | """ | ||
| 99 | print("URI: %s", self.uri) | ||
| 100 | document = Poppler.Document.new_from_file(self.uri, self.password) | ||
| 101 | metadata = {} | ||
| 102 | for key in self.meta_list: | ||
| 103 | if document.get_property(key): | ||
| 104 | metadata[key] = str(document.get_property(key)) | ||
| 105 | return metadata | ||
