From 9e69adbe1b065707f8be4f146cc3c05660cef711 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Tue, 21 Jun 2011 20:41:18 +0200 Subject: Add pdfrw, and many files that I have forgetten, sorry ! --- lib/misc.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 lib/misc.py (limited to 'lib/misc.py') diff --git a/lib/misc.py b/lib/misc.py new file mode 100644 index 0000000..56c2274 --- /dev/null +++ b/lib/misc.py @@ -0,0 +1,44 @@ +import parser +import pdfrw + +class PdfStripper(parser.Generic_parser): + ''' + Represent a pdf file, with the help of pdfrw + ''' + def __init__(self, filename): + self.filename = filename + self.trailer = pdfrw.PdfReader(self.filename) + self.writer = pdfrw.PdfWriter() + + def remove_all(self): + ''' + Remove all the files that are compromizing + ''' + self.trailer.Info.Title = '' + self.trailer.Info.Author = '' + self.trailer.Info.Producer = '' + self.trailer.Info.Creator = '' + self.trailer.Info.CreationDate = '' + self.trailer.Info.ModDate = '' + + self.writer.trailer = self.trailer + self.writer.write(self.filename + parser.POSTFIX) + + def is_clean(self): + ''' + Check if the file is clean from harmful metadatas + ''' + for field in self.trailer.Info: + if field != '': + return False + return True + + def get_meta(self): + ''' + return a dict with all the meta of the file + ''' + metadata = {} + for key, value in self.trailer.Info.iteritems(): + metadata[key[1:]] = value[1:-1] + return metadata + -- cgit v1.3