diff options
| author | jvoisin | 2011-08-16 18:11:24 +0200 |
|---|---|---|
| committer | jvoisin | 2011-08-16 18:11:24 +0200 |
| commit | 4bd3e47da02fde08acfada1795cc55170abdb00a (patch) | |
| tree | f8c7aa5fd5e1b07a28b350c5ded8125ef2467c51 /lib/parser.py | |
| parent | baf8e080125614326ba9c96ca8f2404fd12b050e (diff) | |
setup.py now works !
Diffstat (limited to 'lib/parser.py')
| -rw-r--r-- | lib/parser.py | 104 |
1 files changed, 0 insertions, 104 deletions
diff --git a/lib/parser.py b/lib/parser.py deleted file mode 100644 index 58dd7fa..0000000 --- a/lib/parser.py +++ /dev/null | |||
| @@ -1,104 +0,0 @@ | |||
| 1 | ''' | ||
| 2 | Parent class of all parser | ||
| 3 | ''' | ||
| 4 | |||
| 5 | import hachoir_core | ||
| 6 | import hachoir_editor | ||
| 7 | |||
| 8 | import os | ||
| 9 | |||
| 10 | import mat | ||
| 11 | |||
| 12 | NOMETA = ('.bmp', '.rdf', '.txt', '.xml', '.rels') | ||
| 13 | #bmp : image | ||
| 14 | #rdf : text | ||
| 15 | #txt : plain text | ||
| 16 | #xml : formated text | ||
| 17 | #rels : openxml foramted text | ||
| 18 | |||
| 19 | |||
| 20 | class GenericParser(object): | ||
| 21 | ''' | ||
| 22 | Parent class of all parsers | ||
| 23 | ''' | ||
| 24 | def __init__(self, filename, parser, mime, backup, add2archive): | ||
| 25 | self.filename = '' | ||
| 26 | self.parser = parser | ||
| 27 | self.mime = mime | ||
| 28 | self.backup = backup | ||
| 29 | self.editor = hachoir_editor.createEditor(parser) | ||
| 30 | self.realname = filename | ||
| 31 | try: | ||
| 32 | self.filename = hachoir_core.cmd_line.unicodeFilename(filename) | ||
| 33 | except TypeError: # get rid of "decoding Unicode is not supported" | ||
| 34 | self.filename = filename | ||
| 35 | basename, ext = os.path.splitext(filename) | ||
| 36 | self.output = basename + '.cleaned' + ext | ||
| 37 | self.basename = os.path.basename(filename) # only filename | ||
| 38 | |||
| 39 | def is_clean(self): | ||
| 40 | ''' | ||
| 41 | Check if the file is clean from harmful metadatas | ||
| 42 | ''' | ||
| 43 | for field in self.editor: | ||
| 44 | if self._should_remove(field): | ||
| 45 | return False | ||
| 46 | return True | ||
| 47 | |||
| 48 | def remove_all(self): | ||
| 49 | ''' | ||
| 50 | Remove all the files that are compromizing | ||
| 51 | ''' | ||
| 52 | for field in self.editor: | ||
| 53 | if self._should_remove(field): | ||
| 54 | self._remove(field.name) | ||
| 55 | hachoir_core.field.writeIntoFile(self.editor, self.output) | ||
| 56 | self.do_backup() | ||
| 57 | |||
| 58 | def remove_all_ugly(self): | ||
| 59 | ''' | ||
| 60 | If the remove_all() is not efficient enough, | ||
| 61 | this method is implemented : | ||
| 62 | It is efficient, but destructive. | ||
| 63 | In a perfect world, with nice fileformat, | ||
| 64 | this method would not exist. | ||
| 65 | ''' | ||
| 66 | self.remove_all() | ||
| 67 | |||
| 68 | def _remove(self, field): | ||
| 69 | ''' | ||
| 70 | Delete the given field | ||
| 71 | ''' | ||
| 72 | del self.editor[field] | ||
| 73 | |||
| 74 | def get_meta(self): | ||
| 75 | ''' | ||
| 76 | Return a dict with all the meta of the file | ||
| 77 | ''' | ||
| 78 | metadata = {} | ||
| 79 | for field in self.editor: | ||
| 80 | if self._should_remove(field): | ||
| 81 | try: | ||
| 82 | metadata[field.name] = field.value | ||
| 83 | except: | ||
| 84 | metadata[field.name] = 'harmful content' | ||
| 85 | return metadata | ||
| 86 | |||
| 87 | def _should_remove(self, key): | ||
| 88 | ''' | ||
| 89 | return True if the field is compromizing | ||
| 90 | abstract method | ||
| 91 | ''' | ||
| 92 | raise NotImplementedError | ||
| 93 | |||
| 94 | def do_backup(self): | ||
| 95 | ''' | ||
| 96 | Do a backup of the file if asked, | ||
| 97 | and change his creation/access date | ||
| 98 | ''' | ||
| 99 | if self.backup is True: | ||
| 100 | os.utime(self.output, (0, 0)) | ||
| 101 | else: | ||
| 102 | mat.secure_remove(self.filename) | ||
| 103 | os.rename(self.output, self.filename) | ||
| 104 | os.utime(self.filename, (0, 0)) | ||
