summaryrefslogtreecommitdiff
path: root/MAT/office.py
diff options
context:
space:
mode:
Diffstat (limited to 'MAT/office.py')
-rw-r--r--MAT/office.py18
1 files changed, 10 insertions, 8 deletions
diff --git a/MAT/office.py b/MAT/office.py
index e4b9567..0ca1ff1 100644
--- a/MAT/office.py
+++ b/MAT/office.py
@@ -113,22 +113,22 @@ class PdfStripper(parser.GenericParser):
113 ''' 113 '''
114 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): 114 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs):
115 super(PdfStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) 115 super(PdfStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs)
116 uri = 'file://' + os.path.abspath(self.filename) 116 self.uri = 'file://' + os.path.abspath(self.filename)
117 self.password = None 117 self.password = None
118 try: 118 try:
119 self.pdf_quality = kwargs['low_pdf_quality'] 119 self.pdf_quality = kwargs['low_pdf_quality']
120 except KeyError: 120 except KeyError:
121 self.pdf_quality = False 121 self.pdf_quality = False
122 122
123 self.document = Poppler.Document.new_from_file(uri, self.password)
124 self.meta_list = frozenset(['title', 'author', 'subject', 123 self.meta_list = frozenset(['title', 'author', 'subject',
125 'keywords', 'creator', 'producer', 'metadata']) 124 'keywords', 'creator', 'producer', 'metadata'])
126 125
127 def is_clean(self): 126 def is_clean(self):
128 ''' Check if the file is clean from harmful metadatas 127 ''' Check if the file is clean from harmful metadatas
129 ''' 128 '''
129 document = Poppler.Document.new_from_file(self.uri, self.password)
130 for key in self.meta_list: 130 for key in self.meta_list:
131 if self.document.get_property(key): 131 if document.get_property(key):
132 return False 132 return False
133 return True 133 return True
134 134
@@ -142,16 +142,17 @@ class PdfStripper(parser.GenericParser):
142 python-cairo segfaults on unicode. 142 python-cairo segfaults on unicode.
143 See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=699457 143 See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=699457
144 ''' 144 '''
145 document = Poppler.Document.new_from_file(self.uri, self.password)
145 try: 146 try:
146 output = tempfile.mkstemp()[1] 147 output = tempfile.mkstemp()[1]
147 page = self.document.get_page(0) 148 page = document.get_page(0)
148 # assume that every pages are the same size 149 # assume that every pages are the same size
149 page_width, page_height = page.get_size() 150 page_width, page_height = page.get_size()
150 surface = cairo.PDFSurface(output, page_width, page_height) 151 surface = cairo.PDFSurface(output, page_width, page_height)
151 context = cairo.Context(surface) # context draws on the surface 152 context = cairo.Context(surface) # context draws on the surface
152 logging.debug('PDF rendering of %s' % self.filename) 153 logging.debug('PDF rendering of %s' % self.filename)
153 for pagenum in range(self.document.get_n_pages()): 154 for pagenum in range(document.get_n_pages()):
154 page = self.document.get_page(pagenum) 155 page = document.get_page(pagenum)
155 context.translate(0, 0) 156 context.translate(0, 0)
156 if self.pdf_quality: 157 if self.pdf_quality:
157 page.render(context) # render the page on context 158 page.render(context) # render the page on context
@@ -182,8 +183,9 @@ class PdfStripper(parser.GenericParser):
182 def get_meta(self): 183 def get_meta(self):
183 ''' Return a dict with all the meta of the file 184 ''' Return a dict with all the meta of the file
184 ''' 185 '''
186 document = Poppler.Document.new_from_file(self.uri, self.password)
185 metadata = {} 187 metadata = {}
186 for key in self.meta_list: 188 for key in self.meta_list:
187 if self.document.get_property(key): 189 if document.get_property(key):
188 metadata[key] = self.document.get_property(key) 190 metadata[key] = document.get_property(key)
189 return metadata 191 return metadata