diff options
Diffstat (limited to 'mat.py')
| -rwxr-xr-x | mat.py | 141 |
1 files changed, 0 insertions, 141 deletions
| @@ -1,141 +0,0 @@ | |||
| 1 | #!/usr/bin/python | ||
| 2 | |||
| 3 | ''' | ||
| 4 | Metadata anonymisation toolkit library | ||
| 5 | ''' | ||
| 6 | |||
| 7 | import hachoir_core.error | ||
| 8 | import hachoir_core.field | ||
| 9 | import hachoir_core.cmd_line | ||
| 10 | import hachoir_parser | ||
| 11 | import hachoir_metadata | ||
| 12 | import hachoir_editor | ||
| 13 | |||
| 14 | import sys | ||
| 15 | import os | ||
| 16 | import hachoir_parser.image | ||
| 17 | |||
| 18 | __version__ = "0.1" | ||
| 19 | __author__ = "jvoisin" | ||
| 20 | |||
| 21 | POSTFIX = ".cleaned" | ||
| 22 | |||
| 23 | class file(): | ||
| 24 | def __init__(self, realname, filename, parser, editor): | ||
| 25 | self.meta = {} | ||
| 26 | self.filename = filename | ||
| 27 | self.realname = realname | ||
| 28 | self.parser = parser | ||
| 29 | self.editor = editor | ||
| 30 | self.meta = self.__fill_meta() | ||
| 31 | |||
| 32 | def __fill_meta(self): | ||
| 33 | metadata = {} | ||
| 34 | try: | ||
| 35 | meta = hachoir_metadata.extractMetadata(self.parser) | ||
| 36 | except hachoir_core.error.HachoirError, err: | ||
| 37 | print("Metadata extraction error: %s" % err) | ||
| 38 | |||
| 39 | if not meta: | ||
| 40 | print("Unable to extract metadata from the file %s" % self.filename) | ||
| 41 | sys.exit(1) | ||
| 42 | |||
| 43 | for title in meta: | ||
| 44 | #fixme i'm so dirty | ||
| 45 | if title.values != []: #if the field is not empty | ||
| 46 | value = "" | ||
| 47 | for item in title.values: | ||
| 48 | value = item.text | ||
| 49 | metadata[title.key] = value | ||
| 50 | return metadata | ||
| 51 | |||
| 52 | def is_clean(self): | ||
| 53 | ''' | ||
| 54 | Check if the file is clean from harmful metadatas | ||
| 55 | ''' | ||
| 56 | for field in self.editor: | ||
| 57 | if self._should_remove(field): | ||
| 58 | return False | ||
| 59 | return True | ||
| 60 | |||
| 61 | def remove_all(self): | ||
| 62 | ''' | ||
| 63 | Remove all the files that are compromizing | ||
| 64 | ''' | ||
| 65 | for field in self.editor: | ||
| 66 | if self._should_remove(field): | ||
| 67 | self._remove(field) | ||
| 68 | hachoir_core.field.writeIntoFile(self.editor, self.filename + POSTFIX) | ||
| 69 | |||
| 70 | def _remove(self, field): | ||
| 71 | ''' | ||
| 72 | Remove the given field | ||
| 73 | ''' | ||
| 74 | del self.editor[field.name] | ||
| 75 | |||
| 76 | |||
| 77 | def get_meta(self): | ||
| 78 | ''' | ||
| 79 | return a dict with all the meta of the file | ||
| 80 | ''' | ||
| 81 | #am I useless ? | ||
| 82 | return self.meta | ||
| 83 | |||
| 84 | def _should_remove(self, key): | ||
| 85 | ''' | ||
| 86 | return True if the field is compromizing | ||
| 87 | abstract method | ||
| 88 | ''' | ||
| 89 | raise NotImplementedError() | ||
| 90 | |||
| 91 | class JpegStripper(file): | ||
| 92 | def _should_remove(self, field): | ||
| 93 | if field.name.startswith('comment'): | ||
| 94 | return True | ||
| 95 | elif field.name in ("photoshop", "exif", "adobe"): | ||
| 96 | return True | ||
| 97 | else: | ||
| 98 | return False | ||
| 99 | |||
| 100 | class PngStripper(file): | ||
| 101 | def _should_remove(self, field): | ||
| 102 | if field.name in ('comment'): | ||
| 103 | return True | ||
| 104 | else: | ||
| 105 | return False | ||
| 106 | |||
| 107 | strippers = { | ||
| 108 | hachoir_parser.image.JpegFile: JpegStripper, | ||
| 109 | hachoir_parser.image.PngFile: PngStripper, | ||
| 110 | } | ||
| 111 | |||
| 112 | def create_class_file(name): | ||
| 113 | ''' | ||
| 114 | return a $FILETYPEStripper() class, | ||
| 115 | corresponding to the filetype of the given file | ||
| 116 | ''' | ||
| 117 | if not(os.path.isfile(name)): #check if the file exist | ||
| 118 | print("Error: %s is not a valid file" % name) | ||
| 119 | sys.exit(1) | ||
| 120 | |||
| 121 | filename = "" | ||
| 122 | realname = name | ||
| 123 | filename = hachoir_core.cmd_line.unicodeFilename(name) | ||
| 124 | parser = hachoir_parser.createParser(filename) | ||
| 125 | if not parser: | ||
| 126 | print("Unable to parse the file %s : sorry" % filename) | ||
| 127 | sys.exit(1) | ||
| 128 | |||
| 129 | editor = hachoir_editor.createEditor(parser) | ||
| 130 | try: | ||
| 131 | '''this part is a little tricky : | ||
| 132 | stripper_class will receice the name of the class $FILETYPEStripper, | ||
| 133 | (which herits from the "file" class), based on the editor | ||
| 134 | of given file (name) | ||
| 135 | ''' | ||
| 136 | stripper_class = strippers[editor.input.__class__] | ||
| 137 | except KeyError: | ||
| 138 | #Place for another lib than hachoir | ||
| 139 | print("Don't have stripper for file type: %s" % editor.description) | ||
| 140 | sys.exit(1) | ||
| 141 | return stripper_class(realname, filename, parser, editor) | ||
