From c308cf7daaa4fa46377e2df0f2e9a397981e19b2 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Fri, 10 Jun 2011 01:29:29 +0200 Subject: The current version is (mostly) working --- mat.py | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 mat.py (limited to 'mat.py') diff --git a/mat.py b/mat.py new file mode 100644 index 0000000..8a3afcd --- /dev/null +++ b/mat.py @@ -0,0 +1,117 @@ +import hachoir_core.error +import hachoir_core.cmd_line +import hachoir_parser +import hachoir_metadata +import hachoir_editor + +import sys +import os +import hachoir_parser.image + +__version__ = "0.1" +__author__ = "jvoisin" + + +class file(): + def __init__(self, realname, filename, parser, editor): + self.meta = {} + self.clean = False + self.filename = filename + self.realname = realname + self.parser = parser + self.editor = editor + self.meta = self.__fill_meta() + + def __fill_meta(self): + metadata = {} + try: + meta = hachoir_metadata.extractMetadata(self.parser) + except hachoir_core.error.HachoirError, err: + print("Metadata extraction error: %s" % err) + + if not meta: + print("Unable to extract metadata from the file %s" % self.filename) + sys.exit(1) + + for title in meta: + #fixme i'm so dirty + if title.values != []: #if the field is not empty + value = "" + for item in title.values: + value = item.text + metadata[title.key] = value + return metadata + + def is_clean(self): + ''' + Return true if the file is clean from any compromizing meta + ''' + return self.clean + + def remove_all(self): + ''' + Remove all the files that are compromizing + ''' + for key, field in self.meta.iteritems(): + if self._should_remove(key): + print "BLEH" #DEBUG + #__remove(self, key) + #self.clean = True + + def __remove(self, field): + ''' + Remove the given file + ''' + del self.editor[field] + + + def get_meta(self): + ''' + return a dict with all the meta of the file + ''' + return self.meta + + def _should_remove(self, field): + ''' + return True if the field is compromizing + abstract method + ''' + raise NotImplementedError() + +class JpegStripper(file): + def _should_remove(self, field): + return False + +strippers = { + hachoir_parser.image.JpegFile: JpegStripper, +} + +def create_class_file(name): + ''' + return a $FILETYPEStripper() class, + corresponding to the filetype of the given file + ''' + if not(os.path.isfile(name)): #check if the file exist + print("Error: %s is not a valid file" % name) + sys.exit(1) + + filename = "" + realname = name + filename = hachoir_core.cmd_line.unicodeFilename(name) + parser = hachoir_parser.createParser(filename, realname) + if not parser: + print("Unable to parse the file %s : sorry" % filename) + sys.exit(1) + + editor = hachoir_editor.createEditor(parser) + try: + '''this part is a little tricky : + stripper_class will receice the name of the class $FILETYPEStripper, + (which herits from the "file" class), based on the editor + of given file (name) + ''' + stripper_class = strippers[editor.input.__class__] + except KeyError: + print("Don't have stripper for file type: %s" % editor.description) + sys.exit(1) + return stripper_class(realname, filename, parser, editor) -- cgit v1.3