From b2e623d78db9e4c8da14fa44f830697f1a4eb739 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sat, 30 Jul 2011 20:43:45 +0200 Subject: Tests for pdf (all green btw) --- lib/office.py | 8 +++++--- test/clean.pdf | Bin 0 -> 11821 bytes test/dirty.pdf | Bin 0 -> 13053 bytes 3 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 test/clean.pdf create mode 100644 test/dirty.pdf diff --git a/lib/office.py b/lib/office.py index 5d05e76..ec08cfb 100644 --- a/lib/office.py +++ b/lib/office.py @@ -120,7 +120,7 @@ class PdfStripper(parser.GenericParser): def __init__(self, filename, parser, mime, backup, add2archive): super(PdfStripper, self).__init__(filename, parser, mime, backup, add2archive) - uri = 'file://' + self.filename + uri = 'file://' + os.path.abspath(self.filename) self.password = None self.document = poppler.document_new_from_file(uri, self.password) self.meta_list = ('title', 'author', 'subject', 'keywords', 'creator', @@ -131,11 +131,12 @@ class PdfStripper(parser.GenericParser): Check if the file is clean from harmful metadatas ''' for key in self.meta_list: - if key == 'creation-date' and key == 'mod-date': + if key == 'creation-date' or key == 'mod-date': if self.document.get_property(key) != -1: return False else: - if self.document.get_property(key) is not None: + if self.document.get_property(key) is not None and \ + self.document.get_property(key) != '': return False return True @@ -179,4 +180,5 @@ class PdfStripper(parser.GenericParser): if self.document.get_property(key) is not None and \ self.document.get_property(key) != '': metadata[key] = self.document.get_property(key) + print metadata return metadata diff --git a/test/clean.pdf b/test/clean.pdf new file mode 100644 index 0000000..c95cfb0 Binary files /dev/null and b/test/clean.pdf differ diff --git a/test/dirty.pdf b/test/dirty.pdf new file mode 100644 index 0000000..e4bf7e8 Binary files /dev/null and b/test/dirty.pdf differ -- cgit v1.3