diff options
Diffstat (limited to 'MAT')
| -rw-r--r-- | MAT/office.py | 18 |
1 files changed, 10 insertions, 8 deletions
diff --git a/MAT/office.py b/MAT/office.py index e4b9567..0ca1ff1 100644 --- a/MAT/office.py +++ b/MAT/office.py | |||
| @@ -113,22 +113,22 @@ class PdfStripper(parser.GenericParser): | |||
| 113 | ''' | 113 | ''' |
| 114 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | 114 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): |
| 115 | super(PdfStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) | 115 | super(PdfStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) |
| 116 | uri = 'file://' + os.path.abspath(self.filename) | 116 | self.uri = 'file://' + os.path.abspath(self.filename) |
| 117 | self.password = None | 117 | self.password = None |
| 118 | try: | 118 | try: |
| 119 | self.pdf_quality = kwargs['low_pdf_quality'] | 119 | self.pdf_quality = kwargs['low_pdf_quality'] |
| 120 | except KeyError: | 120 | except KeyError: |
| 121 | self.pdf_quality = False | 121 | self.pdf_quality = False |
| 122 | 122 | ||
| 123 | self.document = Poppler.Document.new_from_file(uri, self.password) | ||
| 124 | self.meta_list = frozenset(['title', 'author', 'subject', | 123 | self.meta_list = frozenset(['title', 'author', 'subject', |
| 125 | 'keywords', 'creator', 'producer', 'metadata']) | 124 | 'keywords', 'creator', 'producer', 'metadata']) |
| 126 | 125 | ||
| 127 | def is_clean(self): | 126 | def is_clean(self): |
| 128 | ''' Check if the file is clean from harmful metadatas | 127 | ''' Check if the file is clean from harmful metadatas |
| 129 | ''' | 128 | ''' |
| 129 | document = Poppler.Document.new_from_file(self.uri, self.password) | ||
| 130 | for key in self.meta_list: | 130 | for key in self.meta_list: |
| 131 | if self.document.get_property(key): | 131 | if document.get_property(key): |
| 132 | return False | 132 | return False |
| 133 | return True | 133 | return True |
| 134 | 134 | ||
| @@ -142,16 +142,17 @@ class PdfStripper(parser.GenericParser): | |||
| 142 | python-cairo segfaults on unicode. | 142 | python-cairo segfaults on unicode. |
| 143 | See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=699457 | 143 | See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=699457 |
| 144 | ''' | 144 | ''' |
| 145 | document = Poppler.Document.new_from_file(self.uri, self.password) | ||
| 145 | try: | 146 | try: |
| 146 | output = tempfile.mkstemp()[1] | 147 | output = tempfile.mkstemp()[1] |
| 147 | page = self.document.get_page(0) | 148 | page = document.get_page(0) |
| 148 | # assume that every pages are the same size | 149 | # assume that every pages are the same size |
| 149 | page_width, page_height = page.get_size() | 150 | page_width, page_height = page.get_size() |
| 150 | surface = cairo.PDFSurface(output, page_width, page_height) | 151 | surface = cairo.PDFSurface(output, page_width, page_height) |
| 151 | context = cairo.Context(surface) # context draws on the surface | 152 | context = cairo.Context(surface) # context draws on the surface |
| 152 | logging.debug('PDF rendering of %s' % self.filename) | 153 | logging.debug('PDF rendering of %s' % self.filename) |
| 153 | for pagenum in range(self.document.get_n_pages()): | 154 | for pagenum in range(document.get_n_pages()): |
| 154 | page = self.document.get_page(pagenum) | 155 | page = document.get_page(pagenum) |
| 155 | context.translate(0, 0) | 156 | context.translate(0, 0) |
| 156 | if self.pdf_quality: | 157 | if self.pdf_quality: |
| 157 | page.render(context) # render the page on context | 158 | page.render(context) # render the page on context |
| @@ -182,8 +183,9 @@ class PdfStripper(parser.GenericParser): | |||
| 182 | def get_meta(self): | 183 | def get_meta(self): |
| 183 | ''' Return a dict with all the meta of the file | 184 | ''' Return a dict with all the meta of the file |
| 184 | ''' | 185 | ''' |
| 186 | document = Poppler.Document.new_from_file(self.uri, self.password) | ||
| 185 | metadata = {} | 187 | metadata = {} |
| 186 | for key in self.meta_list: | 188 | for key in self.meta_list: |
| 187 | if self.document.get_property(key): | 189 | if document.get_property(key): |
| 188 | metadata[key] = self.document.get_property(key) | 190 | metadata[key] = document.get_property(key) |
| 189 | return metadata | 191 | return metadata |
