summaryrefslogtreecommitdiff
path: root/MAT/office.py
diff options
context:
space:
mode:
Diffstat (limited to 'MAT/office.py')
-rw-r--r--MAT/office.py14
1 files changed, 11 insertions, 3 deletions
diff --git a/MAT/office.py b/MAT/office.py
index 20664d2..a861919 100644
--- a/MAT/office.py
+++ b/MAT/office.py
@@ -6,11 +6,13 @@ import os
6import logging 6import logging
7import zipfile 7import zipfile
8import fileinput 8import fileinput
9import tempfile
10import shutil
9import xml.dom.minidom as minidom 11import xml.dom.minidom as minidom
10 12
11try: 13try:
12 import cairo 14 import cairo
13 import poppler 15 from gi.repository import Poppler
14except ImportError: 16except ImportError:
15 pass 17 pass
16 18
@@ -125,7 +127,7 @@ class PdfStripper(parser.GenericParser):
125 uri = 'file://' + os.path.abspath(self.filename) 127 uri = 'file://' + os.path.abspath(self.filename)
126 self.password = None 128 self.password = None
127 self.pdf_quality = kwargs['low_pdf_quality'] 129 self.pdf_quality = kwargs['low_pdf_quality']
128 self.document = poppler.document_new_from_file(uri, self.password) 130 self.document = Poppler.Document.new_from_file(uri, self.password)
129 self.meta_list = frozenset(['title', 'author', 'subject', 'keywords', 'creator', 131 self.meta_list = frozenset(['title', 'author', 'subject', 'keywords', 'creator',
130 'producer', 'metadata']) 132 'producer', 'metadata'])
131 133
@@ -145,11 +147,16 @@ class PdfStripper(parser.GenericParser):
145 147
146 http://cairographics.org/documentation/pycairo/2/ 148 http://cairographics.org/documentation/pycairo/2/
147 python-poppler is not documented at all : have fun ;) 149 python-poppler is not documented at all : have fun ;)
150
151 The use of an intermediate tempfile is necessary because
152 python-cairo segfaults on unicode.
153 See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=699457
148 ''' 154 '''
155 output = tempfile.mkstemp()[1]
149 page = self.document.get_page(0) 156 page = self.document.get_page(0)
150 # assume that every pages are the same size 157 # assume that every pages are the same size
151 page_width, page_height = page.get_size() 158 page_width, page_height = page.get_size()
152 surface = cairo.PDFSurface(self.output, page_width, page_height) 159 surface = cairo.PDFSurface(output, page_width, page_height)
153 context = cairo.Context(surface) # context draws on the surface 160 context = cairo.Context(surface) # context draws on the surface
154 logging.debug('PDF rendering of %s' % self.filename) 161 logging.debug('PDF rendering of %s' % self.filename)
155 for pagenum in xrange(self.document.get_n_pages()): 162 for pagenum in xrange(self.document.get_n_pages()):
@@ -161,6 +168,7 @@ class PdfStripper(parser.GenericParser):
161 page.render_for_printing(context) # render the page on context 168 page.render_for_printing(context) # render the page on context
162 context.show_page() # draw context on surface 169 context.show_page() # draw context on surface
163 surface.finish() 170 surface.finish()
171 shutil.move(output, self.output)
164 172
165 try: 173 try:
166 import pdfrw # For now, poppler cannot write meta, so we must use pdfrw 174 import pdfrw # For now, poppler cannot write meta, so we must use pdfrw