From ac4110fbfc60b112836d0b279ae07ea9a779b16d Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sun, 20 Jan 2013 03:57:17 +0100 Subject: preliminary support of gtk3 (one test fails for PDF) --- MAT/office.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'MAT/office.py') diff --git a/MAT/office.py b/MAT/office.py index 20664d2..b74b49c 100644 --- a/MAT/office.py +++ b/MAT/office.py @@ -10,7 +10,7 @@ import xml.dom.minidom as minidom try: import cairo - import poppler + from gi.repository import Poppler except ImportError: pass @@ -125,7 +125,7 @@ class PdfStripper(parser.GenericParser): uri = 'file://' + os.path.abspath(self.filename) self.password = None self.pdf_quality = kwargs['low_pdf_quality'] - self.document = poppler.document_new_from_file(uri, self.password) + self.document = Poppler.Document.new_from_file(uri, self.password) self.meta_list = frozenset(['title', 'author', 'subject', 'keywords', 'creator', 'producer', 'metadata']) -- cgit v1.3 From 3d8e11ce644833106f22778f3171c52a51ff69fe Mon Sep 17 00:00:00 2001 From: jvoisin Date: Fri, 5 Apr 2013 10:55:34 +0200 Subject: Pdf support is back --- MAT/office.py | 10 +++++++++- test/test.py | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'MAT/office.py') diff --git a/MAT/office.py b/MAT/office.py index b74b49c..a861919 100644 --- a/MAT/office.py +++ b/MAT/office.py @@ -6,6 +6,8 @@ import os import logging import zipfile import fileinput +import tempfile +import shutil import xml.dom.minidom as minidom try: @@ -145,11 +147,16 @@ class PdfStripper(parser.GenericParser): http://cairographics.org/documentation/pycairo/2/ python-poppler is not documented at all : have fun ;) + + The use of an intermediate tempfile is necessary because + python-cairo segfaults on unicode. + See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=699457 ''' + output = tempfile.mkstemp()[1] page = self.document.get_page(0) # assume that every pages are the same size page_width, page_height = page.get_size() - surface = cairo.PDFSurface(self.output, page_width, page_height) + surface = cairo.PDFSurface(output, page_width, page_height) context = cairo.Context(surface) # context draws on the surface logging.debug('PDF rendering of %s' % self.filename) for pagenum in xrange(self.document.get_n_pages()): @@ -161,6 +168,7 @@ class PdfStripper(parser.GenericParser): page.render_for_printing(context) # render the page on context context.show_page() # draw context on surface surface.finish() + shutil.move(output, self.output) try: import pdfrw # For now, poppler cannot write meta, so we must use pdfrw diff --git a/test/test.py b/test/test.py index 7becd5b..ef1c768 100644 --- a/test/test.py +++ b/test/test.py @@ -43,7 +43,7 @@ try: # file format exclusively managed by exiftool except OSError: pass # None for now -FILE_LIST.remove(('clean é.pdf', 'dirty é.pdf')) +#FILE_LIST.remove(('clean é.pdf', 'dirty é.pdf')) #FILE_LIST.remove(('clean é.tar', 'dirty é.tar')) FILE_LIST.remove(('clean é.tar.gz', 'dirty é.tar.gz')) #FILE_LIST.remove(('clean é.tar.bz2', 'dirty é.tar.bz2')) -- cgit v1.3