diff options
| author | jvoisin | 2012-12-08 02:02:25 +0100 |
|---|---|---|
| committer | jvoisin | 2012-12-13 14:24:01 +0100 |
| commit | cbf8a2a65928694202e19b6bcf56ec84bcbf613c (patch) | |
| tree | e106475b0d5c003505336b5ae6416e4508bb768b /lib/parser.py | |
| parent | 67d5c1fa6b9ab6e1e7328ee57b15d8e46526d72a (diff) | |
Reorganize source tree and files installation location, cleanup setup.py (Closes: #689409)
Diffstat (limited to 'lib/parser.py')
| -rw-r--r-- | lib/parser.py | 120 |
1 files changed, 0 insertions, 120 deletions
diff --git a/lib/parser.py b/lib/parser.py deleted file mode 100644 index d2eaf9c..0000000 --- a/lib/parser.py +++ /dev/null | |||
| @@ -1,120 +0,0 @@ | |||
| 1 | ''' | ||
| 2 | Parent class of all parser | ||
| 3 | ''' | ||
| 4 | |||
| 5 | import hachoir_core | ||
| 6 | import hachoir_editor | ||
| 7 | |||
| 8 | import os | ||
| 9 | |||
| 10 | import mat | ||
| 11 | |||
| 12 | NOMETA = ('.bmp', '.rdf', '.txt', '.xml', '.rels') | ||
| 13 | #bmp : image | ||
| 14 | #rdf : text | ||
| 15 | #txt : plain text | ||
| 16 | #xml : formated text | ||
| 17 | #rels : openxml foramted text | ||
| 18 | |||
| 19 | |||
| 20 | FIELD = object() | ||
| 21 | |||
| 22 | class GenericParser(object): | ||
| 23 | ''' | ||
| 24 | Parent class of all parsers | ||
| 25 | ''' | ||
| 26 | def __init__(self, filename, parser, mime, backup, add2archive): | ||
| 27 | self.filename = '' | ||
| 28 | self.parser = parser | ||
| 29 | self.mime = mime | ||
| 30 | self.backup = backup | ||
| 31 | self.editor = hachoir_editor.createEditor(parser) | ||
| 32 | self.realname = filename | ||
| 33 | try: | ||
| 34 | self.filename = hachoir_core.cmd_line.unicodeFilename(filename) | ||
| 35 | except TypeError: # get rid of "decoding Unicode is not supported" | ||
| 36 | self.filename = filename | ||
| 37 | basename, ext = os.path.splitext(filename) | ||
| 38 | self.output = basename + '.cleaned' + ext | ||
| 39 | self.basename = os.path.basename(filename) # only filename | ||
| 40 | |||
| 41 | def is_clean(self): | ||
| 42 | ''' | ||
| 43 | Check if the file is clean from harmful metadatas | ||
| 44 | ''' | ||
| 45 | for field in self.editor: | ||
| 46 | if self._should_remove(field): | ||
| 47 | return self._is_clean(self.editor) | ||
| 48 | return True | ||
| 49 | |||
| 50 | def _is_clean(self, fieldset): | ||
| 51 | for field in fieldset: | ||
| 52 | remove = self._should_remove(field) | ||
| 53 | if remove is True: | ||
| 54 | return False | ||
| 55 | if remove is FIELD: | ||
| 56 | if not self._is_clean(field): | ||
| 57 | return False | ||
| 58 | return True | ||
| 59 | |||
| 60 | def remove_all(self): | ||
| 61 | ''' | ||
| 62 | Remove all the files that are compromizing | ||
| 63 | ''' | ||
| 64 | state = self._remove_all(self.editor) | ||
| 65 | hachoir_core.field.writeIntoFile(self.editor, self.output) | ||
| 66 | self.do_backup() | ||
| 67 | return state | ||
| 68 | |||
| 69 | def _remove_all(self, fieldset): | ||
| 70 | try: | ||
| 71 | for field in fieldset: | ||
| 72 | remove = self._should_remove(field) | ||
| 73 | if remove is True: | ||
| 74 | self._remove(fieldset, field.name) | ||
| 75 | if remove is FIELD: | ||
| 76 | self._remove_all(field) | ||
| 77 | return True | ||
| 78 | except: | ||
| 79 | return False | ||
| 80 | |||
| 81 | def _remove(self, fieldset, field): | ||
| 82 | ''' | ||
| 83 | Delete the given field | ||
| 84 | ''' | ||
| 85 | del fieldset[field] | ||
| 86 | |||
| 87 | def get_meta(self): | ||
| 88 | ''' | ||
| 89 | Return a dict with all the meta of the file | ||
| 90 | ''' | ||
| 91 | metadata = {} | ||
| 92 | self._get_meta(self.editor, metadata) | ||
| 93 | return metadata | ||
| 94 | |||
| 95 | def _get_meta(self, fieldset, metadata): | ||
| 96 | for field in fieldset: | ||
| 97 | remove = self._should_remove(field) | ||
| 98 | if remove is True: | ||
| 99 | try: | ||
| 100 | metadata[field.name] = field.value | ||
| 101 | except: | ||
| 102 | metadata[field.name] = 'harmful content' | ||
| 103 | if remove is FIELD: | ||
| 104 | self._get_meta(field) | ||
| 105 | |||
| 106 | def _should_remove(self, key): | ||
| 107 | ''' | ||
| 108 | return True if the field is compromizing | ||
| 109 | abstract method | ||
| 110 | ''' | ||
| 111 | raise NotImplementedError | ||
| 112 | |||
| 113 | def do_backup(self): | ||
| 114 | ''' | ||
| 115 | Do a backup of the file if asked, | ||
| 116 | and change his creation/access date | ||
| 117 | ''' | ||
| 118 | if self.backup is False: | ||
| 119 | mat.secure_remove(self.filename) | ||
| 120 | os.rename(self.output, self.filename) | ||
