diff options
| author | jvoisin | 2011-06-20 01:25:33 +0200 |
|---|---|---|
| committer | jvoisin | 2011-06-20 01:25:33 +0200 |
| commit | 52f2fedd5d73807d42ba5c397c3e4c5348b47a47 (patch) | |
| tree | 09070e23428f3f72e4a95da8df33520adacbf01f /lib/parser.py | |
| parent | de5917e5f01374bb1a647f49ae85283241a2bea9 (diff) | |
Introduction of a nice separation of functions/class in differents files
Diffstat (limited to 'lib/parser.py')
| -rw-r--r-- | lib/parser.py | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/lib/parser.py b/lib/parser.py new file mode 100644 index 0000000..828648f --- /dev/null +++ b/lib/parser.py | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | ''' | ||
| 2 | Parent class of all parser | ||
| 3 | ''' | ||
| 4 | |||
| 5 | import hachoir_core.error | ||
| 6 | import hachoir_parser | ||
| 7 | import hachoir_metadata | ||
| 8 | import hachoir_editor | ||
| 9 | import sys | ||
| 10 | |||
| 11 | POSTFIX = ".cleaned" | ||
| 12 | |||
| 13 | class Generic_parser(): | ||
| 14 | def __init__(self, realname, filename, parser, editor): | ||
| 15 | self.meta = {} | ||
| 16 | self.filename = filename | ||
| 17 | self.realname = realname | ||
| 18 | self.parser = parser | ||
| 19 | self.editor = editor | ||
| 20 | self.meta = self.__fill_meta() | ||
| 21 | |||
| 22 | def __fill_meta(self): | ||
| 23 | metadata = {} | ||
| 24 | try: | ||
| 25 | meta = hachoir_metadata.extractMetadata(self.parser) | ||
| 26 | except hachoir_core.error.HachoirError, err: | ||
| 27 | print("Metadata extraction error: %s" % err) | ||
| 28 | |||
| 29 | if not meta: | ||
| 30 | print("Unable to extract metadata from the file %s" % self.filename) | ||
| 31 | sys.exit(1) | ||
| 32 | |||
| 33 | for title in meta: | ||
| 34 | #fixme i'm so dirty | ||
| 35 | if title.values != []: #if the field is not empty | ||
| 36 | value = "" | ||
| 37 | for item in title.values: | ||
| 38 | value = item.text | ||
| 39 | metadata[title.key] = value | ||
| 40 | return metadata | ||
| 41 | |||
| 42 | def is_clean(self): | ||
| 43 | ''' | ||
| 44 | Check if the file is clean from harmful metadatas | ||
| 45 | ''' | ||
| 46 | for field in self.editor: | ||
| 47 | if self._should_remove(field): | ||
| 48 | return False | ||
| 49 | return True | ||
| 50 | |||
| 51 | def remove_all(self): | ||
| 52 | ''' | ||
| 53 | Remove all the files that are compromizing | ||
| 54 | ''' | ||
| 55 | for field in self.editor: | ||
| 56 | if self._should_remove(field): | ||
| 57 | self._remove(field) | ||
| 58 | hachoir_core.field.writeIntoFile(self.editor, self.filename + POSTFIX) | ||
| 59 | |||
| 60 | def _remove(self, field): | ||
| 61 | ''' | ||
| 62 | Remove the given field | ||
| 63 | ''' | ||
| 64 | del self.editor[field.name] | ||
| 65 | |||
| 66 | |||
| 67 | def get_meta(self): | ||
| 68 | ''' | ||
| 69 | return a dict with all the meta of the file | ||
| 70 | ''' | ||
| 71 | #am I useless ? | ||
| 72 | return self.meta | ||
| 73 | |||
| 74 | def _should_remove(self, key): | ||
| 75 | ''' | ||
| 76 | return True if the field is compromizing | ||
| 77 | abstract method | ||
| 78 | ''' | ||
| 79 | raise NotImplementedError() | ||
