diff options
| author | jvoisin | 2014-06-08 13:39:18 +0200 |
|---|---|---|
| committer | jvoisin | 2014-06-08 13:39:18 +0200 |
| commit | af36529554c39a2eefcc2c8723715e2d25b401b8 (patch) | |
| tree | f54b964520bab44d1dfac725086211eaf22d3763 /libmat/parser.py | |
| parent | ef5a32cfd3c0555ffe5ddf413eeaae61622ebb4b (diff) | |
Rename the MAT folder to libmat.
This commit fixes some issues for dump operating
systems who doesn't handle capitalization.
Diffstat (limited to 'libmat/parser.py')
| -rw-r--r-- | libmat/parser.py | 135 |
1 files changed, 135 insertions, 0 deletions
diff --git a/libmat/parser.py b/libmat/parser.py new file mode 100644 index 0000000..1765da8 --- /dev/null +++ b/libmat/parser.py | |||
| @@ -0,0 +1,135 @@ | |||
| 1 | ''' Parent class of all parser | ||
| 2 | ''' | ||
| 3 | |||
| 4 | import os | ||
| 5 | import shutil | ||
| 6 | import tempfile | ||
| 7 | |||
| 8 | import hachoir_core | ||
| 9 | import hachoir_editor | ||
| 10 | |||
| 11 | import mat | ||
| 12 | |||
| 13 | NOMETA = frozenset(( | ||
| 14 | '.bmp', # "raw" image | ||
| 15 | '.rdf', # text | ||
| 16 | '.txt', # plain text | ||
| 17 | '.xml', # formated text (XML) | ||
| 18 | '.rels', # openXML formated text | ||
| 19 | )) | ||
| 20 | |||
| 21 | FIELD = object() | ||
| 22 | |||
| 23 | |||
| 24 | class GenericParser(object): | ||
| 25 | ''' Parent class of all parsers | ||
| 26 | ''' | ||
| 27 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | ||
| 28 | self.filename = '' | ||
| 29 | self.parser = parser | ||
| 30 | self.mime = mime | ||
| 31 | self.backup = backup | ||
| 32 | self.is_writable = is_writable | ||
| 33 | self.editor = hachoir_editor.createEditor(parser) | ||
| 34 | try: | ||
| 35 | self.filename = hachoir_core.cmd_line.unicodeFilename(filename) | ||
| 36 | except TypeError: # get rid of "decoding Unicode is not supported" | ||
| 37 | self.filename = filename | ||
| 38 | self.basename = os.path.basename(filename) | ||
| 39 | _, output = tempfile.mkstemp() | ||
| 40 | self.output = hachoir_core.cmd_line.unicodeFilename(output) | ||
| 41 | |||
| 42 | def __del__(self): | ||
| 43 | ''' Remove tempfile if it was not used | ||
| 44 | ''' | ||
| 45 | if os.path.exists(self.output): | ||
| 46 | mat.secure_remove(self.output) | ||
| 47 | |||
| 48 | def is_clean(self): | ||
| 49 | ''' | ||
| 50 | Check if the file is clean from harmful metadatas | ||
| 51 | ''' | ||
| 52 | for field in self.editor: | ||
| 53 | if self._should_remove(field): | ||
| 54 | return self._is_clean(self.editor) | ||
| 55 | return True | ||
| 56 | |||
| 57 | def _is_clean(self, fieldset): | ||
| 58 | for field in fieldset: | ||
| 59 | remove = self._should_remove(field) | ||
| 60 | if remove is True: | ||
| 61 | return False | ||
| 62 | if remove is FIELD: | ||
| 63 | if not self._is_clean(field): | ||
| 64 | return False | ||
| 65 | return True | ||
| 66 | |||
| 67 | def remove_all(self): | ||
| 68 | ''' Remove all compromising fields | ||
| 69 | ''' | ||
| 70 | state = self._remove_all(self.editor) | ||
| 71 | hachoir_core.field.writeIntoFile(self.editor, self.output) | ||
| 72 | self.do_backup() | ||
| 73 | return state | ||
| 74 | |||
| 75 | def _remove_all(self, fieldset): | ||
| 76 | ''' Recursive way to handle tree metadatas | ||
| 77 | ''' | ||
| 78 | try: | ||
| 79 | for field in fieldset: | ||
| 80 | remove = self._should_remove(field) | ||
| 81 | if remove is True: | ||
| 82 | self._remove(fieldset, field.name) | ||
| 83 | if remove is FIELD: | ||
| 84 | self._remove_all(field) | ||
| 85 | return True | ||
| 86 | except: | ||
| 87 | return False | ||
| 88 | |||
| 89 | def _remove(self, fieldset, field): | ||
| 90 | ''' Delete the given field | ||
| 91 | ''' | ||
| 92 | del fieldset[field] | ||
| 93 | |||
| 94 | def get_meta(self): | ||
| 95 | ''' Return a dict with all the meta of the file | ||
| 96 | ''' | ||
| 97 | metadata = {} | ||
| 98 | self._get_meta(self.editor, metadata) | ||
| 99 | return metadata | ||
| 100 | |||
| 101 | def _get_meta(self, fieldset, metadata): | ||
| 102 | ''' Recursive way to handle tree metadatas | ||
| 103 | ''' | ||
| 104 | for field in fieldset: | ||
| 105 | remove = self._should_remove(field) | ||
| 106 | if remove: | ||
| 107 | try: | ||
| 108 | metadata[field.name] = field.value | ||
| 109 | except: | ||
| 110 | metadata[field.name] = 'harmful content' | ||
| 111 | if remove is FIELD: | ||
| 112 | self._get_meta(field, None) | ||
| 113 | |||
| 114 | def _should_remove(self, key): | ||
| 115 | ''' Return True if the field is compromising | ||
| 116 | abstract method | ||
| 117 | ''' | ||
| 118 | raise NotImplementedError | ||
| 119 | |||
| 120 | def create_backup_copy(self): | ||
| 121 | ''' Create a backup copy | ||
| 122 | ''' | ||
| 123 | shutil.copy2(self.filename, self.filename + '.bak') | ||
| 124 | |||
| 125 | def do_backup(self): | ||
| 126 | ''' Keep a backup of the file if asked. | ||
| 127 | |||
| 128 | The process of double-renaming is not very elegant, | ||
| 129 | but it greatly simplify new strippers implementation. | ||
| 130 | ''' | ||
| 131 | if self.backup: | ||
| 132 | shutil.move(self.filename, self.filename + '.bak') | ||
| 133 | else: | ||
| 134 | mat.secure_remove(self.filename) | ||
| 135 | shutil.move(self.output, self.filename) | ||
