summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/office.py8
-rw-r--r--test/clean.pdfbin0 -> 11821 bytes
-rw-r--r--test/dirty.pdfbin0 -> 13053 bytes
3 files changed, 5 insertions, 3 deletions
diff --git a/lib/office.py b/lib/office.py
index 5d05e76..ec08cfb 100644
--- a/lib/office.py
+++ b/lib/office.py
@@ -120,7 +120,7 @@ class PdfStripper(parser.GenericParser):
120 def __init__(self, filename, parser, mime, backup, add2archive): 120 def __init__(self, filename, parser, mime, backup, add2archive):
121 super(PdfStripper, self).__init__(filename, parser, mime, backup, 121 super(PdfStripper, self).__init__(filename, parser, mime, backup,
122 add2archive) 122 add2archive)
123 uri = 'file://' + self.filename 123 uri = 'file://' + os.path.abspath(self.filename)
124 self.password = None 124 self.password = None
125 self.document = poppler.document_new_from_file(uri, self.password) 125 self.document = poppler.document_new_from_file(uri, self.password)
126 self.meta_list = ('title', 'author', 'subject', 'keywords', 'creator', 126 self.meta_list = ('title', 'author', 'subject', 'keywords', 'creator',
@@ -131,11 +131,12 @@ class PdfStripper(parser.GenericParser):
131 Check if the file is clean from harmful metadatas 131 Check if the file is clean from harmful metadatas
132 ''' 132 '''
133 for key in self.meta_list: 133 for key in self.meta_list:
134 if key == 'creation-date' and key == 'mod-date': 134 if key == 'creation-date' or key == 'mod-date':
135 if self.document.get_property(key) != -1: 135 if self.document.get_property(key) != -1:
136 return False 136 return False
137 else: 137 else:
138 if self.document.get_property(key) is not None: 138 if self.document.get_property(key) is not None and \
139 self.document.get_property(key) != '':
139 return False 140 return False
140 return True 141 return True
141 142
@@ -179,4 +180,5 @@ class PdfStripper(parser.GenericParser):
179 if self.document.get_property(key) is not None and \ 180 if self.document.get_property(key) is not None and \
180 self.document.get_property(key) != '': 181 self.document.get_property(key) != '':
181 metadata[key] = self.document.get_property(key) 182 metadata[key] = self.document.get_property(key)
183 print metadata
182 return metadata 184 return metadata
diff --git a/test/clean.pdf b/test/clean.pdf
new file mode 100644
index 0000000..c95cfb0
--- /dev/null
+++ b/test/clean.pdf
Binary files differ
diff --git a/test/dirty.pdf b/test/dirty.pdf
new file mode 100644
index 0000000..e4bf7e8
--- /dev/null
+++ b/test/dirty.pdf
Binary files differ