From b2e623d78db9e4c8da14fa44f830697f1a4eb739 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sat, 30 Jul 2011 20:43:45 +0200 Subject: Tests for pdf (all green btw) --- lib/office.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/office.py b/lib/office.py index 5d05e76..ec08cfb 100644 --- a/lib/office.py +++ b/lib/office.py @@ -120,7 +120,7 @@ class PdfStripper(parser.GenericParser): def __init__(self, filename, parser, mime, backup, add2archive): super(PdfStripper, self).__init__(filename, parser, mime, backup, add2archive) - uri = 'file://' + self.filename + uri = 'file://' + os.path.abspath(self.filename) self.password = None self.document = poppler.document_new_from_file(uri, self.password) self.meta_list = ('title', 'author', 'subject', 'keywords', 'creator', @@ -131,11 +131,12 @@ class PdfStripper(parser.GenericParser): Check if the file is clean from harmful metadatas ''' for key in self.meta_list: - if key == 'creation-date' and key == 'mod-date': + if key == 'creation-date' or key == 'mod-date': if self.document.get_property(key) != -1: return False else: - if self.document.get_property(key) is not None: + if self.document.get_property(key) is not None and \ + self.document.get_property(key) != '': return False return True @@ -179,4 +180,5 @@ class PdfStripper(parser.GenericParser): if self.document.get_property(key) is not None and \ self.document.get_property(key) != '': metadata[key] = self.document.get_property(key) + print metadata return metadata -- cgit v1.3