From cbf8a2a65928694202e19b6bcf56ec84bcbf613c Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sat, 8 Dec 2012 02:02:25 +0100 Subject: Reorganize source tree and files installation location, cleanup setup.py (Closes: #689409) --- lib/parser.py | 120 ---------------------------------------------------------- 1 file changed, 120 deletions(-) delete mode 100644 lib/parser.py (limited to 'lib/parser.py') diff --git a/lib/parser.py b/lib/parser.py deleted file mode 100644 index d2eaf9c..0000000 --- a/lib/parser.py +++ /dev/null @@ -1,120 +0,0 @@ -''' - Parent class of all parser -''' - -import hachoir_core -import hachoir_editor - -import os - -import mat - -NOMETA = ('.bmp', '.rdf', '.txt', '.xml', '.rels') -#bmp : image -#rdf : text -#txt : plain text -#xml : formated text -#rels : openxml foramted text - - -FIELD = object() - -class GenericParser(object): - ''' - Parent class of all parsers - ''' - def __init__(self, filename, parser, mime, backup, add2archive): - self.filename = '' - self.parser = parser - self.mime = mime - self.backup = backup - self.editor = hachoir_editor.createEditor(parser) - self.realname = filename - try: - self.filename = hachoir_core.cmd_line.unicodeFilename(filename) - except TypeError: # get rid of "decoding Unicode is not supported" - self.filename = filename - basename, ext = os.path.splitext(filename) - self.output = basename + '.cleaned' + ext - self.basename = os.path.basename(filename) # only filename - - def is_clean(self): - ''' - Check if the file is clean from harmful metadatas - ''' - for field in self.editor: - if self._should_remove(field): - return self._is_clean(self.editor) - return True - - def _is_clean(self, fieldset): - for field in fieldset: - remove = self._should_remove(field) - if remove is True: - return False - if remove is FIELD: - if not self._is_clean(field): - return False - return True - - def remove_all(self): - ''' - Remove all the files that are compromizing - ''' - state = self._remove_all(self.editor) - hachoir_core.field.writeIntoFile(self.editor, self.output) - self.do_backup() - return state - - def _remove_all(self, fieldset): - try: - for field in fieldset: - remove = self._should_remove(field) - if remove is True: - self._remove(fieldset, field.name) - if remove is FIELD: - self._remove_all(field) - return True - except: - return False - - def _remove(self, fieldset, field): - ''' - Delete the given field - ''' - del fieldset[field] - - def get_meta(self): - ''' - Return a dict with all the meta of the file - ''' - metadata = {} - self._get_meta(self.editor, metadata) - return metadata - - def _get_meta(self, fieldset, metadata): - for field in fieldset: - remove = self._should_remove(field) - if remove is True: - try: - metadata[field.name] = field.value - except: - metadata[field.name] = 'harmful content' - if remove is FIELD: - self._get_meta(field) - - def _should_remove(self, key): - ''' - return True if the field is compromizing - abstract method - ''' - raise NotImplementedError - - def do_backup(self): - ''' - Do a backup of the file if asked, - and change his creation/access date - ''' - if self.backup is False: - mat.secure_remove(self.filename) - os.rename(self.output, self.filename) -- cgit v1.3