From 52f2fedd5d73807d42ba5c397c3e4c5348b47a47 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Mon, 20 Jun 2011 01:25:33 +0200 Subject: Introduction of a nice separation of functions/class in differents files --- lib/parser.py | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 lib/parser.py (limited to 'lib/parser.py') diff --git a/lib/parser.py b/lib/parser.py new file mode 100644 index 0000000..828648f --- /dev/null +++ b/lib/parser.py @@ -0,0 +1,79 @@ +''' + Parent class of all parser +''' + +import hachoir_core.error +import hachoir_parser +import hachoir_metadata +import hachoir_editor +import sys + +POSTFIX = ".cleaned" + +class Generic_parser(): + def __init__(self, realname, filename, parser, editor): + self.meta = {} + self.filename = filename + self.realname = realname + self.parser = parser + self.editor = editor + self.meta = self.__fill_meta() + + def __fill_meta(self): + metadata = {} + try: + meta = hachoir_metadata.extractMetadata(self.parser) + except hachoir_core.error.HachoirError, err: + print("Metadata extraction error: %s" % err) + + if not meta: + print("Unable to extract metadata from the file %s" % self.filename) + sys.exit(1) + + for title in meta: + #fixme i'm so dirty + if title.values != []: #if the field is not empty + value = "" + for item in title.values: + value = item.text + metadata[title.key] = value + return metadata + + def is_clean(self): + ''' + Check if the file is clean from harmful metadatas + ''' + for field in self.editor: + if self._should_remove(field): + return False + return True + + def remove_all(self): + ''' + Remove all the files that are compromizing + ''' + for field in self.editor: + if self._should_remove(field): + self._remove(field) + hachoir_core.field.writeIntoFile(self.editor, self.filename + POSTFIX) + + def _remove(self, field): + ''' + Remove the given field + ''' + del self.editor[field.name] + + + def get_meta(self): + ''' + return a dict with all the meta of the file + ''' + #am I useless ? + return self.meta + + def _should_remove(self, key): + ''' + return True if the field is compromizing + abstract method + ''' + raise NotImplementedError() -- cgit v1.3