diff options
Diffstat (limited to 'MAT/office.py')
| -rw-r--r-- | MAT/office.py | 14 |
1 files changed, 11 insertions, 3 deletions
diff --git a/MAT/office.py b/MAT/office.py index 20664d2..a861919 100644 --- a/MAT/office.py +++ b/MAT/office.py | |||
| @@ -6,11 +6,13 @@ import os | |||
| 6 | import logging | 6 | import logging |
| 7 | import zipfile | 7 | import zipfile |
| 8 | import fileinput | 8 | import fileinput |
| 9 | import tempfile | ||
| 10 | import shutil | ||
| 9 | import xml.dom.minidom as minidom | 11 | import xml.dom.minidom as minidom |
| 10 | 12 | ||
| 11 | try: | 13 | try: |
| 12 | import cairo | 14 | import cairo |
| 13 | import poppler | 15 | from gi.repository import Poppler |
| 14 | except ImportError: | 16 | except ImportError: |
| 15 | pass | 17 | pass |
| 16 | 18 | ||
| @@ -125,7 +127,7 @@ class PdfStripper(parser.GenericParser): | |||
| 125 | uri = 'file://' + os.path.abspath(self.filename) | 127 | uri = 'file://' + os.path.abspath(self.filename) |
| 126 | self.password = None | 128 | self.password = None |
| 127 | self.pdf_quality = kwargs['low_pdf_quality'] | 129 | self.pdf_quality = kwargs['low_pdf_quality'] |
| 128 | self.document = poppler.document_new_from_file(uri, self.password) | 130 | self.document = Poppler.Document.new_from_file(uri, self.password) |
| 129 | self.meta_list = frozenset(['title', 'author', 'subject', 'keywords', 'creator', | 131 | self.meta_list = frozenset(['title', 'author', 'subject', 'keywords', 'creator', |
| 130 | 'producer', 'metadata']) | 132 | 'producer', 'metadata']) |
| 131 | 133 | ||
| @@ -145,11 +147,16 @@ class PdfStripper(parser.GenericParser): | |||
| 145 | 147 | ||
| 146 | http://cairographics.org/documentation/pycairo/2/ | 148 | http://cairographics.org/documentation/pycairo/2/ |
| 147 | python-poppler is not documented at all : have fun ;) | 149 | python-poppler is not documented at all : have fun ;) |
| 150 | |||
| 151 | The use of an intermediate tempfile is necessary because | ||
| 152 | python-cairo segfaults on unicode. | ||
| 153 | See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=699457 | ||
| 148 | ''' | 154 | ''' |
| 155 | output = tempfile.mkstemp()[1] | ||
| 149 | page = self.document.get_page(0) | 156 | page = self.document.get_page(0) |
| 150 | # assume that every pages are the same size | 157 | # assume that every pages are the same size |
| 151 | page_width, page_height = page.get_size() | 158 | page_width, page_height = page.get_size() |
| 152 | surface = cairo.PDFSurface(self.output, page_width, page_height) | 159 | surface = cairo.PDFSurface(output, page_width, page_height) |
| 153 | context = cairo.Context(surface) # context draws on the surface | 160 | context = cairo.Context(surface) # context draws on the surface |
| 154 | logging.debug('PDF rendering of %s' % self.filename) | 161 | logging.debug('PDF rendering of %s' % self.filename) |
| 155 | for pagenum in xrange(self.document.get_n_pages()): | 162 | for pagenum in xrange(self.document.get_n_pages()): |
| @@ -161,6 +168,7 @@ class PdfStripper(parser.GenericParser): | |||
| 161 | page.render_for_printing(context) # render the page on context | 168 | page.render_for_printing(context) # render the page on context |
| 162 | context.show_page() # draw context on surface | 169 | context.show_page() # draw context on surface |
| 163 | surface.finish() | 170 | surface.finish() |
| 171 | shutil.move(output, self.output) | ||
| 164 | 172 | ||
| 165 | try: | 173 | try: |
| 166 | import pdfrw # For now, poppler cannot write meta, so we must use pdfrw | 174 | import pdfrw # For now, poppler cannot write meta, so we must use pdfrw |
