From 80ece3001895ea13d50915a5215fd47e313bab4c Mon Sep 17 00:00:00 2001 From: jvoisin Date: Wed, 2 Dec 2015 17:07:19 +0100 Subject: Remove hachoir from MAT. This (huge) commit removes completely hachoir from MAT. Audio files are now processed with mutagen, and images with exiftool, since the main python imaging library (PIL) isn't super-great to deal with metadata (and damaged/non-standard files). Package maintainer should change the dependencies to reflect this. --- libmat/parser.py | 78 +++----------------------------------------------------- 1 file changed, 4 insertions(+), 74 deletions(-) (limited to 'libmat/parser.py') diff --git a/libmat/parser.py b/libmat/parser.py index 8e10ae9..2a82a25 100644 --- a/libmat/parser.py +++ b/libmat/parser.py @@ -5,8 +5,6 @@ import os import shutil import tempfile -import hachoir_core -import hachoir_editor import mat @@ -24,19 +22,14 @@ FIELD = object() class GenericParser(object): """ Parent class of all parsers """ - def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): + def __init__(self, filename, mime, backup, is_writable, **kwargs): self.filename = '' - self.parser = parser self.mime = mime self.backup = backup self.is_writable = is_writable - self.editor = hachoir_editor.createEditor(parser) - try: - self.filename = hachoir_core.cmd_line.unicodeFilename(filename) - except TypeError: # get rid of "decoding Unicode is not supported" - self.filename = filename + self.filename = filename self.basename = os.path.basename(filename) - self.output = hachoir_core.cmd_line.unicodeFilename(tempfile.mkstemp()[1]) + self.output = tempfile.mkstemp()[1] def __del__(self): """ Remove tempfile if it was not used @@ -48,74 +41,11 @@ class GenericParser(object): """ Check if the file is clean from harmful metadatas """ - for field in self.editor: - if self._should_remove(field): - return self._is_clean(self.editor) - return True - - def _is_clean(self, fieldset): - """ Helper method of the `is_clean` one """ - for field in fieldset: - remove = self._should_remove(field) - if remove is True: - return False - if remove is FIELD: - if not self._is_clean(field): - return False - return True + raise NotImplementedError def remove_all(self): """ Remove all compromising fields """ - state = self._remove_all(self.editor) - hachoir_core.field.writeIntoFile(self.editor, self.output) - self.do_backup() - return state - - def _remove_all(self, fieldset): - """ Recursive way to handle tree metadatas - """ - try: - for field in fieldset: - remove = self._should_remove(field) - if remove is True: - self._remove(fieldset, field.name) - if remove is FIELD: - self._remove_all(field) - return True - except: - return False - - @staticmethod - def _remove(fieldset, field): - """ Delete the given field - """ - del fieldset[field] - - def get_meta(self): - """ Return a dict with all the meta of the file - """ - metadata = {} - self._get_meta(self.editor, metadata) - return metadata - - def _get_meta(self, fieldset, metadata): - """ Recursive way to handle tree metadatas - """ - for field in fieldset: - remove = self._should_remove(field) - if remove: - try: - metadata[field.name] = field.value - except: - metadata[field.name] = 'harmful content' - if remove is FIELD: - self._get_meta(field, None) - - def _should_remove(self, key): - """ Return True if the field is compromising - abstract method - """ raise NotImplementedError def create_backup_copy(self): -- cgit v1.3