From a4f80082885079351d71bd3b6c3eaae1fcd5258f Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sun, 27 Oct 2013 16:07:10 +0000 Subject: Improve MAT's reliability --- MAT/office.py | 38 +++++++++++++++++++++----------------- MAT/parser.py | 19 +++++++------------ 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/MAT/office.py b/MAT/office.py index c44a52b..583e0f9 100644 --- a/MAT/office.py +++ b/MAT/office.py @@ -156,23 +156,27 @@ class PdfStripper(parser.GenericParser): python-cairo segfaults on unicode. See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=699457 ''' - output = tempfile.mkstemp()[1] - page = self.document.get_page(0) - # assume that every pages are the same size - page_width, page_height = page.get_size() - surface = cairo.PDFSurface(output, page_width, page_height) - context = cairo.Context(surface) # context draws on the surface - logging.debug('PDF rendering of %s' % self.filename) - for pagenum in range(self.document.get_n_pages()): - page = self.document.get_page(pagenum) - context.translate(0, 0) - if self.pdf_quality: - page.render(context) # render the page on context - else: - page.render_for_printing(context) # render the page on context - context.show_page() # draw context on surface - surface.finish() - shutil.move(output, self.output) + try: + output = tempfile.mkstemp()[1] + page = self.document.get_page(0) + # assume that every pages are the same size + page_width, page_height = page.get_size() + surface = cairo.PDFSurface(output, page_width, page_height) + context = cairo.Context(surface) # context draws on the surface + logging.debug('PDF rendering of %s' % self.filename) + for pagenum in range(self.document.get_n_pages()): + page = self.document.get_page(pagenum) + context.translate(0, 0) + if self.pdf_quality: + page.render(context) # render the page on context + else: + page.render_for_printing(context) # render the page on context + context.show_page() # draw context on surface + surface.finish() + shutil.move(output, self.output) + except: + logging.error('Something went wrong when cleaning %s. File not cleaned' % self.filename) + return False try: import pdfrw # For now, poppler cannot write meta, so we must use pdfrw diff --git a/MAT/parser.py b/MAT/parser.py index c57eb00..c1c3f4c 100644 --- a/MAT/parser.py +++ b/MAT/parser.py @@ -22,8 +22,7 @@ FIELD = object() class GenericParser(object): - ''' - Parent class of all parsers + ''' Parent class of all parsers ''' def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): self.filename = '' @@ -66,8 +65,7 @@ class GenericParser(object): return True def remove_all(self): - ''' - Remove all compromising fields + ''' Remove all compromising fields ''' state = self._remove_all(self.editor) hachoir_core.field.writeIntoFile(self.editor, self.output) @@ -75,8 +73,7 @@ class GenericParser(object): return state def _remove_all(self, fieldset): - ''' - Recursive way to handle tree metadatas + ''' Recursive way to handle tree metadatas ''' try: for field in fieldset: @@ -90,8 +87,7 @@ class GenericParser(object): return False def _remove(self, fieldset, field): - ''' - Delete the given field + ''' Delete the given field ''' del fieldset[field] @@ -104,8 +100,7 @@ class GenericParser(object): return metadata def _get_meta(self, fieldset, metadata): - ''' - Recursive way to handle tree metadatas + ''' Recursive way to handle tree metadatas ''' for field in fieldset: remove = self._should_remove(field) @@ -119,7 +114,7 @@ class GenericParser(object): def _should_remove(self, key): ''' - return True if the field is compromising + Return True if the field is compromising abstract method ''' raise NotImplementedError @@ -137,7 +132,7 @@ class GenericParser(object): but it greatly simplify new strippers implementation. ''' if self.backup: - os.rename(self.filename, self.filename + '.bak') + shutil.move(self.filename, self.filename + '.bak') else: mat.secure_remove(self.filename) os.rename(self.output, self.filename) -- cgit v1.3