diff options
| -rw-r--r-- | MAT/office.py | 10 | ||||
| -rw-r--r-- | test/test.py | 2 |
2 files changed, 10 insertions, 2 deletions
diff --git a/MAT/office.py b/MAT/office.py index b74b49c..a861919 100644 --- a/MAT/office.py +++ b/MAT/office.py | |||
| @@ -6,6 +6,8 @@ import os | |||
| 6 | import logging | 6 | import logging |
| 7 | import zipfile | 7 | import zipfile |
| 8 | import fileinput | 8 | import fileinput |
| 9 | import tempfile | ||
| 10 | import shutil | ||
| 9 | import xml.dom.minidom as minidom | 11 | import xml.dom.minidom as minidom |
| 10 | 12 | ||
| 11 | try: | 13 | try: |
| @@ -145,11 +147,16 @@ class PdfStripper(parser.GenericParser): | |||
| 145 | 147 | ||
| 146 | http://cairographics.org/documentation/pycairo/2/ | 148 | http://cairographics.org/documentation/pycairo/2/ |
| 147 | python-poppler is not documented at all : have fun ;) | 149 | python-poppler is not documented at all : have fun ;) |
| 150 | |||
| 151 | The use of an intermediate tempfile is necessary because | ||
| 152 | python-cairo segfaults on unicode. | ||
| 153 | See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=699457 | ||
| 148 | ''' | 154 | ''' |
| 155 | output = tempfile.mkstemp()[1] | ||
| 149 | page = self.document.get_page(0) | 156 | page = self.document.get_page(0) |
| 150 | # assume that every pages are the same size | 157 | # assume that every pages are the same size |
| 151 | page_width, page_height = page.get_size() | 158 | page_width, page_height = page.get_size() |
| 152 | surface = cairo.PDFSurface(self.output, page_width, page_height) | 159 | surface = cairo.PDFSurface(output, page_width, page_height) |
| 153 | context = cairo.Context(surface) # context draws on the surface | 160 | context = cairo.Context(surface) # context draws on the surface |
| 154 | logging.debug('PDF rendering of %s' % self.filename) | 161 | logging.debug('PDF rendering of %s' % self.filename) |
| 155 | for pagenum in xrange(self.document.get_n_pages()): | 162 | for pagenum in xrange(self.document.get_n_pages()): |
| @@ -161,6 +168,7 @@ class PdfStripper(parser.GenericParser): | |||
| 161 | page.render_for_printing(context) # render the page on context | 168 | page.render_for_printing(context) # render the page on context |
| 162 | context.show_page() # draw context on surface | 169 | context.show_page() # draw context on surface |
| 163 | surface.finish() | 170 | surface.finish() |
| 171 | shutil.move(output, self.output) | ||
| 164 | 172 | ||
| 165 | try: | 173 | try: |
| 166 | import pdfrw # For now, poppler cannot write meta, so we must use pdfrw | 174 | import pdfrw # For now, poppler cannot write meta, so we must use pdfrw |
diff --git a/test/test.py b/test/test.py index 7becd5b..ef1c768 100644 --- a/test/test.py +++ b/test/test.py | |||
| @@ -43,7 +43,7 @@ try: # file format exclusively managed by exiftool | |||
| 43 | except OSError: | 43 | except OSError: |
| 44 | pass # None for now | 44 | pass # None for now |
| 45 | 45 | ||
| 46 | FILE_LIST.remove(('clean é.pdf', 'dirty é.pdf')) | 46 | #FILE_LIST.remove(('clean é.pdf', 'dirty é.pdf')) |
| 47 | #FILE_LIST.remove(('clean é.tar', 'dirty é.tar')) | 47 | #FILE_LIST.remove(('clean é.tar', 'dirty é.tar')) |
| 48 | FILE_LIST.remove(('clean é.tar.gz', 'dirty é.tar.gz')) | 48 | FILE_LIST.remove(('clean é.tar.gz', 'dirty é.tar.gz')) |
| 49 | #FILE_LIST.remove(('clean é.tar.bz2', 'dirty é.tar.bz2')) | 49 | #FILE_LIST.remove(('clean é.tar.bz2', 'dirty é.tar.bz2')) |
