From 52f2fedd5d73807d42ba5c397c3e4c5348b47a47 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Mon, 20 Jun 2011 01:25:33 +0200 Subject: Introduction of a nice separation of functions/class in differents files --- lib/__init__.py | 1 + lib/images.py | 17 ++++++++++ lib/mat.py | 99 +++------------------------------------------------------ lib/parser.py | 79 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 102 insertions(+), 94 deletions(-) create mode 100644 lib/images.py mode change 100755 => 100644 lib/mat.py create mode 100644 lib/parser.py diff --git a/lib/__init__.py b/lib/__init__.py index e69de29..8b13789 100644 --- a/lib/__init__.py +++ b/lib/__init__.py @@ -0,0 +1 @@ + diff --git a/lib/images.py b/lib/images.py new file mode 100644 index 0000000..21229c2 --- /dev/null +++ b/lib/images.py @@ -0,0 +1,17 @@ +import parser + +class JpegStripper(parser.Generic_parser): + def _should_remove(self, field): + if field.name.startswith('comment'): + return True + elif field.name in ("photoshop", "exif", "adobe"): + return True + else: + return False + +class PngStripper(parser.Generic_parser): + def _should_remove(self, field): + if field.name in ('comment'): + return True + else: + return False diff --git a/lib/mat.py b/lib/mat.py old mode 100755 new mode 100644 index 48b83fb..5641c62 --- a/lib/mat.py +++ b/lib/mat.py @@ -3,111 +3,22 @@ ''' Metadata anonymisation toolkit library ''' + import sys import os -import hachoir_core.error -import hachoir_core.field import hachoir_core.cmd_line import hachoir_parser -import hachoir_metadata -import hachoir_parser.image - -sys.path.append('..') import hachoir_editor +import images + __version__ = "0.1" __author__ = "jvoisin" -POSTFIX = ".cleaned" - -class file(): - def __init__(self, realname, filename, parser, editor): - self.meta = {} - self.filename = filename - self.realname = realname - self.parser = parser - self.editor = editor - self.meta = self.__fill_meta() - - def __fill_meta(self): - metadata = {} - try: - meta = hachoir_metadata.extractMetadata(self.parser) - except hachoir_core.error.HachoirError, err: - print("Metadata extraction error: %s" % err) - - if not meta: - print("Unable to extract metadata from the file %s" % self.filename) - sys.exit(1) - - for title in meta: - #fixme i'm so dirty - if title.values != []: #if the field is not empty - value = "" - for item in title.values: - value = item.text - metadata[title.key] = value - return metadata - - def is_clean(self): - ''' - Check if the file is clean from harmful metadatas - ''' - for field in self.editor: - if self._should_remove(field): - return False - return True - - def remove_all(self): - ''' - Remove all the files that are compromizing - ''' - for field in self.editor: - if self._should_remove(field): - self._remove(field) - hachoir_core.field.writeIntoFile(self.editor, self.filename + POSTFIX) - - def _remove(self, field): - ''' - Remove the given field - ''' - del self.editor[field.name] - - - def get_meta(self): - ''' - return a dict with all the meta of the file - ''' - #am I useless ? - return self.meta - - def _should_remove(self, key): - ''' - return True if the field is compromizing - abstract method - ''' - raise NotImplementedError() - -class JpegStripper(file): - def _should_remove(self, field): - if field.name.startswith('comment'): - return True - elif field.name in ("photoshop", "exif", "adobe"): - return True - else: - return False - -class PngStripper(file): - def _should_remove(self, field): - if field.name in ('comment'): - return True - else: - return False - strippers = { - hachoir_parser.image.JpegFile: JpegStripper, - hachoir_parser.image.PngFile: PngStripper, + hachoir_parser.image.JpegFile: images.JpegStripper, + hachoir_parser.image.PngFile: images.PngStripper, } def create_class_file(name): diff --git a/lib/parser.py b/lib/parser.py new file mode 100644 index 0000000..828648f --- /dev/null +++ b/lib/parser.py @@ -0,0 +1,79 @@ +''' + Parent class of all parser +''' + +import hachoir_core.error +import hachoir_parser +import hachoir_metadata +import hachoir_editor +import sys + +POSTFIX = ".cleaned" + +class Generic_parser(): + def __init__(self, realname, filename, parser, editor): + self.meta = {} + self.filename = filename + self.realname = realname + self.parser = parser + self.editor = editor + self.meta = self.__fill_meta() + + def __fill_meta(self): + metadata = {} + try: + meta = hachoir_metadata.extractMetadata(self.parser) + except hachoir_core.error.HachoirError, err: + print("Metadata extraction error: %s" % err) + + if not meta: + print("Unable to extract metadata from the file %s" % self.filename) + sys.exit(1) + + for title in meta: + #fixme i'm so dirty + if title.values != []: #if the field is not empty + value = "" + for item in title.values: + value = item.text + metadata[title.key] = value + return metadata + + def is_clean(self): + ''' + Check if the file is clean from harmful metadatas + ''' + for field in self.editor: + if self._should_remove(field): + return False + return True + + def remove_all(self): + ''' + Remove all the files that are compromizing + ''' + for field in self.editor: + if self._should_remove(field): + self._remove(field) + hachoir_core.field.writeIntoFile(self.editor, self.filename + POSTFIX) + + def _remove(self, field): + ''' + Remove the given field + ''' + del self.editor[field.name] + + + def get_meta(self): + ''' + return a dict with all the meta of the file + ''' + #am I useless ? + return self.meta + + def _should_remove(self, key): + ''' + return True if the field is compromizing + abstract method + ''' + raise NotImplementedError() -- cgit v1.3