diff options
Diffstat (limited to 'libmat')
| -rw-r--r-- | libmat/__init__.py | 1 | ||||
| -rw-r--r-- | libmat/archive.py | 335 | ||||
| -rw-r--r-- | libmat/audio.py | 53 | ||||
| -rw-r--r-- | libmat/bencode/__init__.py | 1 | ||||
| -rw-r--r-- | libmat/bencode/bencode.py | 143 | ||||
| -rw-r--r-- | libmat/exceptions.py | 14 | ||||
| -rw-r--r-- | libmat/exiftool.py | 78 | ||||
| -rw-r--r-- | libmat/hachoir_editor/__init__.py | 8 | ||||
| -rw-r--r-- | libmat/hachoir_editor/field.py | 69 | ||||
| -rw-r--r-- | libmat/hachoir_editor/fieldset.py | 352 | ||||
| -rw-r--r-- | libmat/hachoir_editor/typed_field.py | 253 | ||||
| -rw-r--r-- | libmat/images.py | 52 | ||||
| -rw-r--r-- | libmat/mat.py | 186 | ||||
| -rw-r--r-- | libmat/misc.py | 76 | ||||
| -rw-r--r-- | libmat/mutagenstripper.py | 33 | ||||
| -rw-r--r-- | libmat/office.py | 191 | ||||
| -rw-r--r-- | libmat/parser.py | 135 | ||||
| -rw-r--r-- | libmat/strippers.py | 70 |
18 files changed, 2050 insertions, 0 deletions
diff --git a/libmat/__init__.py b/libmat/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/libmat/__init__.py | |||
| @@ -0,0 +1 @@ | |||
diff --git a/libmat/archive.py b/libmat/archive.py new file mode 100644 index 0000000..d483dcc --- /dev/null +++ b/libmat/archive.py | |||
| @@ -0,0 +1,335 @@ | |||
| 1 | ''' Take care of archives formats | ||
| 2 | ''' | ||
| 3 | |||
| 4 | import datetime | ||
| 5 | import logging | ||
| 6 | import os | ||
| 7 | import shutil | ||
| 8 | import stat | ||
| 9 | import tarfile | ||
| 10 | import tempfile | ||
| 11 | import zipfile | ||
| 12 | |||
| 13 | import mat | ||
| 14 | import parser | ||
| 15 | |||
| 16 | # Zip files do not support dates older than 01/01/1980 | ||
| 17 | ZIP_EPOCH = (1980, 1, 1, 0, 0, 0) | ||
| 18 | ZIP_EPOCH_SECONDS = (datetime.datetime(1980, 1, 1, 0, 0, 0) | ||
| 19 | - datetime.datetime(1970, 1, 1, 1, 0, 0)).total_seconds() | ||
| 20 | |||
| 21 | |||
| 22 | class GenericArchiveStripper(parser.GenericParser): | ||
| 23 | ''' Represent a generic archive | ||
| 24 | ''' | ||
| 25 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | ||
| 26 | super(GenericArchiveStripper, self).__init__(filename, | ||
| 27 | parser, mime, backup, is_writable, **kwargs) | ||
| 28 | self.compression = '' | ||
| 29 | self.add2archive = kwargs['add2archive'] | ||
| 30 | self.tempdir = tempfile.mkdtemp() | ||
| 31 | |||
| 32 | def __del__(self): | ||
| 33 | ''' Remove the files inside the temp dir, | ||
| 34 | then remove the temp dir | ||
| 35 | ''' | ||
| 36 | for root, dirs, files in os.walk(self.tempdir): | ||
| 37 | for item in files: | ||
| 38 | path_file = os.path.join(root, item) | ||
| 39 | mat.secure_remove(path_file) | ||
| 40 | shutil.rmtree(self.tempdir) | ||
| 41 | |||
| 42 | def is_clean(self, list_unsupported=False): | ||
| 43 | ''' Virtual method to check for harmul metadata | ||
| 44 | ''' | ||
| 45 | raise NotImplementedError | ||
| 46 | |||
| 47 | def list_unsupported(self): | ||
| 48 | ''' Get a list of every non-supported files present in the archive | ||
| 49 | ''' | ||
| 50 | return self.is_clean(list_unsupported=True) | ||
| 51 | |||
| 52 | def remove_all(self): | ||
| 53 | ''' Virtual method to remove all metadata | ||
| 54 | ''' | ||
| 55 | raise NotImplementedError | ||
| 56 | |||
| 57 | |||
| 58 | class ZipStripper(GenericArchiveStripper): | ||
| 59 | ''' Represent a zip file | ||
| 60 | ''' | ||
| 61 | def __is_zipfile_clean(self, fileinfo): | ||
| 62 | ''' Check if a ZipInfo object is clean of metadata added | ||
| 63 | by zip itself, independently of the corresponding file metadata | ||
| 64 | ''' | ||
| 65 | if fileinfo.comment != '': | ||
| 66 | return False | ||
| 67 | elif fileinfo.date_time != ZIP_EPOCH: | ||
| 68 | return False | ||
| 69 | elif fileinfo.create_system != 3: # 3 is UNIX | ||
| 70 | return False | ||
| 71 | return True | ||
| 72 | |||
| 73 | def is_clean(self, list_unsupported=False): | ||
| 74 | ''' Check if the given file is clean from harmful metadata | ||
| 75 | When list_unsupported is True, the method returns a list | ||
| 76 | of all non-supported/archives files contained in the | ||
| 77 | archive. | ||
| 78 | ''' | ||
| 79 | ret_list = [] | ||
| 80 | zipin = zipfile.ZipFile(self.filename, 'r') | ||
| 81 | if zipin.comment != '' and not list_unsupported: | ||
| 82 | logging.debug('%s has a comment' % self.filename) | ||
| 83 | return False | ||
| 84 | for item in zipin.infolist(): | ||
| 85 | zipin.extract(item, self.tempdir) | ||
| 86 | path = os.path.join(self.tempdir, item.filename) | ||
| 87 | if not self.__is_zipfile_clean(item) and not list_unsupported: | ||
| 88 | logging.debug('%s from %s has compromising zipinfo' % | ||
| 89 | (item.filename, self.filename)) | ||
| 90 | return False | ||
| 91 | if os.path.isfile(path): | ||
| 92 | cfile = mat.create_class_file(path, False, add2archive=self.add2archive) | ||
| 93 | if cfile is not None: | ||
| 94 | if not cfile.is_clean(): | ||
| 95 | logging.debug('%s from %s has metadata' % (item.filename, self.filename)) | ||
| 96 | if not list_unsupported: | ||
| 97 | return False | ||
| 98 | else: | ||
| 99 | logging.info('%s\'s fileformat is not supported or harmless.' | ||
| 100 | % item.filename) | ||
| 101 | basename, ext = os.path.splitext(path) | ||
| 102 | if os.path.basename(item.filename) not in ('mimetype', '.rels'): | ||
| 103 | if ext not in parser.NOMETA: | ||
| 104 | if not list_unsupported: | ||
| 105 | return False | ||
| 106 | ret_list.append(item.filename) | ||
| 107 | zipin.close() | ||
| 108 | if list_unsupported: | ||
| 109 | return ret_list | ||
| 110 | return True | ||
| 111 | |||
| 112 | def get_meta(self): | ||
| 113 | ''' Return all the metadata of a zip archive''' | ||
| 114 | zipin = zipfile.ZipFile(self.filename, 'r') | ||
| 115 | metadata = {} | ||
| 116 | if zipin.comment != '': | ||
| 117 | metadata['comment'] = zipin.comment | ||
| 118 | for item in zipin.infolist(): | ||
| 119 | zipinfo_meta = self.__get_zipinfo_meta(item) | ||
| 120 | if zipinfo_meta != {}: # zipinfo metadata | ||
| 121 | metadata[item.filename + "'s zipinfo"] = str(zipinfo_meta) | ||
| 122 | zipin.extract(item, self.tempdir) | ||
| 123 | path = os.path.join(self.tempdir, item.filename) | ||
| 124 | if os.path.isfile(path): | ||
| 125 | cfile = mat.create_class_file(path, False, add2archive=self.add2archive) | ||
| 126 | if cfile is not None: | ||
| 127 | cfile_meta = cfile.get_meta() | ||
| 128 | if cfile_meta != {}: | ||
| 129 | metadata[item.filename] = str(cfile_meta) | ||
| 130 | else: | ||
| 131 | logging.info('%s\'s fileformat is not supported or harmless' | ||
| 132 | % item.filename) | ||
| 133 | zipin.close() | ||
| 134 | return metadata | ||
| 135 | |||
| 136 | def __get_zipinfo_meta(self, zipinfo): | ||
| 137 | ''' Return all the metadata of a ZipInfo | ||
| 138 | ''' | ||
| 139 | metadata = {} | ||
| 140 | if zipinfo.comment != '': | ||
| 141 | metadata['comment'] = zipinfo.comment | ||
| 142 | if zipinfo.date_time != ZIP_EPOCH: | ||
| 143 | metadata['modified'] = zipinfo.date_time | ||
| 144 | if zipinfo.create_system != 3: # 3 is UNIX | ||
| 145 | metadata['system'] = "windows" if zipinfo.create_system == 2 else "unknown" | ||
| 146 | return metadata | ||
| 147 | |||
| 148 | def remove_all(self, whitelist=[], beginning_blacklist=[], ending_blacklist=[]): | ||
| 149 | ''' Remove all metadata from a zip archive, even thoses | ||
| 150 | added by Python's zipfile itself. It will not add | ||
| 151 | files starting with "begining_blacklist", or ending with | ||
| 152 | "ending_blacklist". This method also add files present in | ||
| 153 | whitelist to the archive. | ||
| 154 | ''' | ||
| 155 | zipin = zipfile.ZipFile(self.filename, 'r') | ||
| 156 | zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True) | ||
| 157 | for item in zipin.infolist(): | ||
| 158 | zipin.extract(item, self.tempdir) | ||
| 159 | path = os.path.join(self.tempdir, item.filename) | ||
| 160 | |||
| 161 | beginning = any((True for f in beginning_blacklist if item.filename.startswith(f))) | ||
| 162 | ending = any((True for f in ending_blacklist if item.filename.endswith(f))) | ||
| 163 | |||
| 164 | if os.path.isfile(path) and not beginning and not ending: | ||
| 165 | cfile = mat.create_class_file(path, False, add2archive=self.add2archive) | ||
| 166 | if cfile is not None: | ||
| 167 | # Handle read-only files inside archive | ||
| 168 | old_stat = os.stat(path).st_mode | ||
| 169 | os.chmod(path, old_stat|stat.S_IWUSR) | ||
| 170 | cfile.remove_all() | ||
| 171 | os.chmod(path, old_stat) | ||
| 172 | logging.debug('Processing %s from %s' % (item.filename, self.filename)) | ||
| 173 | elif item.filename not in whitelist: | ||
| 174 | logging.info('%s\'s format is not supported or harmless' % item.filename) | ||
| 175 | basename, ext = os.path.splitext(path) | ||
| 176 | if not (self.add2archive or ext in parser.NOMETA): | ||
| 177 | continue | ||
| 178 | os.utime(path, (ZIP_EPOCH_SECONDS, ZIP_EPOCH_SECONDS)) | ||
| 179 | zipout.write(path, item.filename) | ||
| 180 | zipin.close() | ||
| 181 | zipout.close() | ||
| 182 | |||
| 183 | logging.info('%s processed' % self.filename) | ||
| 184 | self.do_backup() | ||
| 185 | return True | ||
| 186 | |||
| 187 | |||
| 188 | class TarStripper(GenericArchiveStripper): | ||
| 189 | ''' Represent a tarfile archive | ||
| 190 | ''' | ||
| 191 | def _remove(self, current_file): | ||
| 192 | ''' Remove the meta added by tarfile itself to the file | ||
| 193 | ''' | ||
| 194 | current_file.mtime = 0 | ||
| 195 | current_file.uid = 0 | ||
| 196 | current_file.gid = 0 | ||
| 197 | current_file.uname = '' | ||
| 198 | current_file.gname = '' | ||
| 199 | return current_file | ||
| 200 | |||
| 201 | def remove_all(self, whitelist=[]): | ||
| 202 | ''' Remove all harmful metadata from the tarfile. | ||
| 203 | The method will also add every files matching | ||
| 204 | whitelist in the produced archive. | ||
| 205 | ''' | ||
| 206 | tarin = tarfile.open(self.filename, 'r' + self.compression, encoding='utf-8') | ||
| 207 | tarout = tarfile.open(self.output, 'w' + self.compression, encoding='utf-8') | ||
| 208 | for item in tarin.getmembers(): | ||
| 209 | tarin.extract(item, self.tempdir) | ||
| 210 | if item.isfile(): | ||
| 211 | path = os.path.join(self.tempdir, item.name) | ||
| 212 | cfile = mat.create_class_file(path, False, add2archive=self.add2archive) | ||
| 213 | if cfile is not None: | ||
| 214 | # Handle read-only files inside archive | ||
| 215 | old_stat = os.stat(path).st_mode | ||
| 216 | os.chmod(path, old_stat|stat.S_IWUSR) | ||
| 217 | cfile.remove_all() | ||
| 218 | os.chmod(path, old_stat) | ||
| 219 | elif self.add2archive or os.path.splitext(item.name)[1] in parser.NOMETA: | ||
| 220 | logging.debug('%s\' format is either not supported or harmless' % item.name) | ||
| 221 | elif item.name in whitelist: | ||
| 222 | logging.debug('%s is not supported, but MAT was told to add it anyway.' | ||
| 223 | % item.name) | ||
| 224 | else: # Don't add the file to the archive | ||
| 225 | logging.debug('%s will not be added' % item.name) | ||
| 226 | continue | ||
| 227 | tarout.add(path, item.name, filter=self._remove) | ||
| 228 | tarin.close() | ||
| 229 | tarout.close() | ||
| 230 | self.do_backup() | ||
| 231 | return True | ||
| 232 | |||
| 233 | def is_file_clean(self, current_file): | ||
| 234 | ''' Check metadatas added by tarfile | ||
| 235 | ''' | ||
| 236 | if current_file.mtime != 0: | ||
| 237 | return False | ||
| 238 | elif current_file.uid != 0: | ||
| 239 | return False | ||
| 240 | elif current_file.gid != 0: | ||
| 241 | return False | ||
| 242 | elif current_file.uname != '': | ||
| 243 | return False | ||
| 244 | elif current_file.gname != '': | ||
| 245 | return False | ||
| 246 | return True | ||
| 247 | |||
| 248 | def is_clean(self, list_unsupported=False): | ||
| 249 | ''' Check if the file is clean from harmful metadatas | ||
| 250 | When list_unsupported is True, the method returns a list | ||
| 251 | of all non-supported/archives files contained in the | ||
| 252 | archive. | ||
| 253 | ''' | ||
| 254 | ret_list = [] | ||
| 255 | tarin = tarfile.open(self.filename, 'r' + self.compression) | ||
| 256 | for item in tarin.getmembers(): | ||
| 257 | if not self.is_file_clean(item) and not list_unsupported: | ||
| 258 | logging.debug('%s from %s has compromising tarinfo' % | ||
| 259 | (item.name, self.filename)) | ||
| 260 | return False | ||
| 261 | tarin.extract(item, self.tempdir) | ||
| 262 | path = os.path.join(self.tempdir, item.name) | ||
| 263 | if item.isfile(): | ||
| 264 | cfile = mat.create_class_file(path, False, add2archive=self.add2archive) | ||
| 265 | if cfile is not None: | ||
| 266 | if not cfile.is_clean(): | ||
| 267 | logging.debug('%s from %s has metadata' % | ||
| 268 | (item.name.decode("utf8"), self.filename)) | ||
| 269 | if not list_unsupported: | ||
| 270 | return False | ||
| 271 | # Nested archives are treated like unsupported files | ||
| 272 | elif isinstance(cfile, GenericArchiveStripper): | ||
| 273 | ret_list.append(item.name) | ||
| 274 | else: | ||
| 275 | logging.error('%s\'s format is not supported or harmless' % item.name) | ||
| 276 | if os.path.splitext(path)[1] not in parser.NOMETA: | ||
| 277 | if not list_unsupported: | ||
| 278 | return False | ||
| 279 | ret_list.append(item.name) | ||
| 280 | tarin.close() | ||
| 281 | if list_unsupported: | ||
| 282 | return ret_list | ||
| 283 | return True | ||
| 284 | |||
| 285 | def get_meta(self): | ||
| 286 | ''' Return a dict with all the meta of the tarfile | ||
| 287 | ''' | ||
| 288 | tarin = tarfile.open(self.filename, 'r' + self.compression) | ||
| 289 | metadata = {} | ||
| 290 | for item in tarin.getmembers(): | ||
| 291 | current_meta = {} | ||
| 292 | if item.isfile(): | ||
| 293 | tarin.extract(item, self.tempdir) | ||
| 294 | path = os.path.join(self.tempdir, item.name) | ||
| 295 | class_file = mat.create_class_file(path, False, add2archive=self.add2archive) | ||
| 296 | if class_file is not None: | ||
| 297 | meta = class_file.get_meta() | ||
| 298 | if meta: | ||
| 299 | current_meta['file'] = str(meta) | ||
| 300 | else: | ||
| 301 | logging.error('%s\'s format is not supported or harmless' % item.name) | ||
| 302 | |||
| 303 | if not self.is_file_clean(item): # if there is meta | ||
| 304 | current_meta['mtime'] = item.mtime | ||
| 305 | current_meta['uid'] = item.uid | ||
| 306 | current_meta['gid'] = item.gid | ||
| 307 | current_meta['uname'] = item.uname | ||
| 308 | current_meta['gname'] = item.gname | ||
| 309 | metadata[item.name] = str(current_meta) | ||
| 310 | tarin.close() | ||
| 311 | return metadata | ||
| 312 | |||
| 313 | |||
| 314 | class TerminalZipStripper(ZipStripper): | ||
| 315 | ''' Represent a terminal level archive. | ||
| 316 | This type of archive can not contain nested archives. | ||
| 317 | It is used for formats like docx, which are basically | ||
| 318 | ziped xml. | ||
| 319 | ''' | ||
| 320 | |||
| 321 | |||
| 322 | class GzipStripper(TarStripper): | ||
| 323 | ''' Represent a tar.gz archive | ||
| 324 | ''' | ||
| 325 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | ||
| 326 | super(GzipStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) | ||
| 327 | self.compression = ':gz' | ||
| 328 | |||
| 329 | |||
| 330 | class Bzip2Stripper(TarStripper): | ||
| 331 | ''' Represent a tar.bz2 archive | ||
| 332 | ''' | ||
| 333 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | ||
| 334 | super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) | ||
| 335 | self.compression = ':bz2' | ||
diff --git a/libmat/audio.py b/libmat/audio.py new file mode 100644 index 0000000..dae9d75 --- /dev/null +++ b/libmat/audio.py | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | ''' Care about audio fileformat | ||
| 2 | ''' | ||
| 3 | |||
| 4 | try: | ||
| 5 | from mutagen.flac import FLAC | ||
| 6 | from mutagen.oggvorbis import OggVorbis | ||
| 7 | except ImportError: | ||
| 8 | pass | ||
| 9 | |||
| 10 | import parser | ||
| 11 | import mutagenstripper | ||
| 12 | |||
| 13 | |||
| 14 | class MpegAudioStripper(parser.GenericParser): | ||
| 15 | ''' Represent mpeg audio file (mp3, ...) | ||
| 16 | ''' | ||
| 17 | def _should_remove(self, field): | ||
| 18 | return field.name in ("id3v1", "id3v2") | ||
| 19 | |||
| 20 | |||
| 21 | class OggStripper(mutagenstripper.MutagenStripper): | ||
| 22 | ''' Represent an ogg vorbis file | ||
| 23 | ''' | ||
| 24 | def _create_mfile(self): | ||
| 25 | self.mfile = OggVorbis(self.filename) | ||
| 26 | |||
| 27 | |||
| 28 | class FlacStripper(mutagenstripper.MutagenStripper): | ||
| 29 | ''' Represent a Flac audio file | ||
| 30 | ''' | ||
| 31 | def _create_mfile(self): | ||
| 32 | self.mfile = FLAC(self.filename) | ||
| 33 | |||
| 34 | def remove_all(self): | ||
| 35 | ''' Remove the "metadata" block from the file | ||
| 36 | ''' | ||
| 37 | super(FlacStripper, self).remove_all() | ||
| 38 | self.mfile.clear_pictures() | ||
| 39 | self.mfile.save() | ||
| 40 | return True | ||
| 41 | |||
| 42 | def is_clean(self): | ||
| 43 | ''' Check if the "metadata" block is present in the file | ||
| 44 | ''' | ||
| 45 | return super(FlacStripper, self).is_clean() and not self.mfile.pictures | ||
| 46 | |||
| 47 | def get_meta(self): | ||
| 48 | ''' Return the content of the metadata block if present | ||
| 49 | ''' | ||
| 50 | metadata = super(FlacStripper, self).get_meta() | ||
| 51 | if self.mfile.pictures: | ||
| 52 | metadata['picture:'] = 'yes' | ||
| 53 | return metadata | ||
diff --git a/libmat/bencode/__init__.py b/libmat/bencode/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/libmat/bencode/__init__.py | |||
| @@ -0,0 +1 @@ | |||
diff --git a/libmat/bencode/bencode.py b/libmat/bencode/bencode.py new file mode 100644 index 0000000..a0cc99a --- /dev/null +++ b/libmat/bencode/bencode.py | |||
| @@ -0,0 +1,143 @@ | |||
| 1 | # Copyright 2007 by Petru Paler | ||
| 2 | # Copyright 2011 by Julien (jvoisin) Voisin | ||
| 3 | # | ||
| 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| 5 | # of this software and associated documentation files (the "Software"), to deal | ||
| 6 | # in the Software without restriction, including without limitation the rights | ||
| 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
| 8 | # copies of the Software, and to permit persons to whom the Software is | ||
| 9 | # furnished to do so, subject to the following conditions: | ||
| 10 | # | ||
| 11 | # The above copyright notice and this permission notice shall be included in | ||
| 12 | # all copies or substantial portions of the Software. | ||
| 13 | # | ||
| 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
| 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
| 19 | # FROM, | ||
| 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
| 21 | # THE SOFTWARE. | ||
| 22 | # | ||
| 23 | |||
| 24 | ''' | ||
| 25 | A quick (and also nice) lib to bencode/bdecode torrent files | ||
| 26 | ''' | ||
| 27 | |||
| 28 | |||
| 29 | class BTFailure(Exception): | ||
| 30 | '''Custom Exception''' | ||
| 31 | pass | ||
| 32 | |||
| 33 | |||
| 34 | class Bencached(object): | ||
| 35 | '''Custom type : cached string''' | ||
| 36 | __slots__ = ['bencoded'] | ||
| 37 | |||
| 38 | def __init__(self, string): | ||
| 39 | self.bencoded = string | ||
| 40 | |||
| 41 | |||
| 42 | def decode_int(x, f): | ||
| 43 | '''decode an int''' | ||
| 44 | f += 1 | ||
| 45 | newf = x.index('e', f) | ||
| 46 | if x[f:f+1] == '-0': | ||
| 47 | raise ValueError | ||
| 48 | elif x[f] == '0' and newf != f + 1: | ||
| 49 | raise ValueError | ||
| 50 | return int(x[f:newf]), newf + 1 | ||
| 51 | |||
| 52 | |||
| 53 | def decode_string(x, f): | ||
| 54 | '''decode a string''' | ||
| 55 | colon = x.index(':', f) | ||
| 56 | if x[f] == '0' and colon != f + 1: | ||
| 57 | raise ValueError | ||
| 58 | n = int(x[f:colon]) | ||
| 59 | colon += 1 | ||
| 60 | return x[colon:colon + n], colon + n | ||
| 61 | |||
| 62 | |||
| 63 | def decode_list(x, f): | ||
| 64 | '''decode a list''' | ||
| 65 | result = [] | ||
| 66 | f += 1 | ||
| 67 | while x[f] != 'e': | ||
| 68 | v, f = DECODE_FUNC[x[f]](x, f) | ||
| 69 | result.append(v) | ||
| 70 | return result, f + 1 | ||
| 71 | |||
| 72 | |||
| 73 | def decode_dict(x, f): | ||
| 74 | '''decode a dict''' | ||
| 75 | result = {} | ||
| 76 | f += 1 | ||
| 77 | while x[f] != 'e': | ||
| 78 | k, f = decode_string(x, f) | ||
| 79 | result[k], f = DECODE_FUNC[x[f]](x, f) | ||
| 80 | return result, f + 1 | ||
| 81 | |||
| 82 | |||
| 83 | def encode_bool(x, r): | ||
| 84 | '''bencode a boolean''' | ||
| 85 | encode_int(1 if r else 0, r) | ||
| 86 | |||
| 87 | |||
| 88 | def encode_int(x, r): | ||
| 89 | '''bencode an integer/float''' | ||
| 90 | r.extend(('i', str(x), 'e')) | ||
| 91 | |||
| 92 | |||
| 93 | def encode_list(x, r): | ||
| 94 | '''bencode a list/tuple''' | ||
| 95 | r.append('l') | ||
| 96 | [ENCODE_FUNC[type(item)](item, r) for item in x] | ||
| 97 | r.append('e') | ||
| 98 | |||
| 99 | |||
| 100 | def encode_dict(x, result): | ||
| 101 | '''bencode a dict''' | ||
| 102 | result.append('d') | ||
| 103 | ilist = list(x.items()) | ||
| 104 | ilist.sort() | ||
| 105 | for k, v in ilist: | ||
| 106 | result.extend((str(len(k)), ':', k)) | ||
| 107 | ENCODE_FUNC[type(v)](v, result) | ||
| 108 | result.append('e') | ||
| 109 | |||
| 110 | |||
| 111 | DECODE_FUNC = {str(x):decode_string for x in range(9)} | ||
| 112 | DECODE_FUNC['l'] = decode_list | ||
| 113 | DECODE_FUNC['d'] = decode_dict | ||
| 114 | DECODE_FUNC['i'] = decode_int | ||
| 115 | |||
| 116 | |||
| 117 | ENCODE_FUNC = {} | ||
| 118 | ENCODE_FUNC[Bencached] = lambda x, r: r.append(x.bencoded) | ||
| 119 | ENCODE_FUNC[int] = encode_int | ||
| 120 | ENCODE_FUNC[int] = encode_int | ||
| 121 | ENCODE_FUNC[bytes] = lambda x, r: r.extend((str(len(x)), ':', x)) | ||
| 122 | ENCODE_FUNC[list] = encode_list | ||
| 123 | ENCODE_FUNC[tuple] = encode_list | ||
| 124 | ENCODE_FUNC[dict] = encode_dict | ||
| 125 | ENCODE_FUNC[bool] = encode_bool | ||
| 126 | |||
| 127 | |||
| 128 | def bencode(string): | ||
| 129 | '''bencode $string''' | ||
| 130 | table = [] | ||
| 131 | ENCODE_FUNC[type(string)](string, table) | ||
| 132 | return ''.join(table) | ||
| 133 | |||
| 134 | |||
| 135 | def bdecode(string): | ||
| 136 | '''decode $string''' | ||
| 137 | try: | ||
| 138 | result, lenght = DECODE_FUNC[string[0]](string, 0) | ||
| 139 | except (IndexError, KeyError, ValueError): | ||
| 140 | raise BTFailure('Not a valid bencoded string') | ||
| 141 | if lenght != len(string): | ||
| 142 | raise BTFailure('Invalid bencoded value (data after valid prefix)') | ||
| 143 | return result | ||
diff --git a/libmat/exceptions.py b/libmat/exceptions.py new file mode 100644 index 0000000..47da15c --- /dev/null +++ b/libmat/exceptions.py | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | ''' Base exceptions for MAT | ||
| 2 | ''' | ||
| 3 | |||
| 4 | |||
| 5 | class UnableToRemoveFile(Exception): | ||
| 6 | '''This exception is raised when a file could not be removed | ||
| 7 | ''' | ||
| 8 | pass | ||
| 9 | |||
| 10 | class UnableToWriteFile(Exception): | ||
| 11 | '''This exception is raised when a file | ||
| 12 | can could not be chmod +w | ||
| 13 | ''' | ||
| 14 | pass | ||
diff --git a/libmat/exiftool.py b/libmat/exiftool.py new file mode 100644 index 0000000..9e38f04 --- /dev/null +++ b/libmat/exiftool.py | |||
| @@ -0,0 +1,78 @@ | |||
| 1 | ''' Care about images with help of the amazing (perl) library Exiftool. | ||
| 2 | ''' | ||
| 3 | |||
| 4 | import parser | ||
| 5 | import subprocess | ||
| 6 | |||
| 7 | |||
| 8 | class ExiftoolStripper(parser.GenericParser): | ||
| 9 | ''' A generic stripper class using exiftool as backend | ||
| 10 | ''' | ||
| 11 | |||
| 12 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | ||
| 13 | super(ExiftoolStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) | ||
| 14 | self.allowed = set(['ExifTool Version Number', 'File Name', 'Directory', | ||
| 15 | 'File Size', 'File Modification Date/Time', 'File Access Date/Time', 'File Permissions', | ||
| 16 | 'File Type', 'MIME Type', 'Image Width', 'Image Height', | ||
| 17 | 'Image Size', 'File Inode Change Date/Time']) | ||
| 18 | self._set_allowed() | ||
| 19 | |||
| 20 | def _set_allowed(self): | ||
| 21 | ''' Virtual method. Set the allowed/harmless list of metadata | ||
| 22 | ''' | ||
| 23 | raise NotImplementedError | ||
| 24 | |||
| 25 | def remove_all(self): | ||
| 26 | ''' Remove all metadata with help of exiftool | ||
| 27 | ''' | ||
| 28 | try: | ||
| 29 | if self.backup: | ||
| 30 | self.create_backup_copy() | ||
| 31 | # Note: '-All=' must be followed by a known exiftool option. | ||
| 32 | subprocess.call(['exiftool', '-m', '-all=', | ||
| 33 | '-adobe=', '-overwrite_original', self.filename], | ||
| 34 | stdout=open('/dev/null')) | ||
| 35 | return True | ||
| 36 | except: | ||
| 37 | return False | ||
| 38 | |||
| 39 | def is_clean(self): | ||
| 40 | ''' Check if the file is clean with the help of exiftool | ||
| 41 | ''' | ||
| 42 | return not self.get_meta() | ||
| 43 | |||
| 44 | def get_meta(self): | ||
| 45 | ''' Return every harmful meta with help of exiftool. | ||
| 46 | Exiftool output looks like this: | ||
| 47 | field name : value | ||
| 48 | field name : value | ||
| 49 | ''' | ||
| 50 | output = subprocess.Popen(['exiftool', self.filename], | ||
| 51 | stdout=subprocess.PIPE).communicate()[0] | ||
| 52 | meta = {} | ||
| 53 | for i in output.split('\n')[:-1]: # chop last char ('\n') | ||
| 54 | key = i.split(':')[0].strip() | ||
| 55 | if key not in self.allowed: | ||
| 56 | meta[key] = i.split(':')[1].strip() # add the field name to the metadata set | ||
| 57 | return meta | ||
| 58 | |||
| 59 | |||
| 60 | class JpegStripper(ExiftoolStripper): | ||
| 61 | ''' Care about jpeg files with help | ||
| 62 | of exiftool | ||
| 63 | ''' | ||
| 64 | def _set_allowed(self): | ||
| 65 | self.allowed.update(['JFIF Version', 'Resolution Unit', | ||
| 66 | 'X Resolution', 'Y Resolution', 'Encoding Process', | ||
| 67 | 'Bits Per Sample', 'Color Components', 'Y Cb Cr Sub Sampling']) | ||
| 68 | |||
| 69 | |||
| 70 | class PngStripper(ExiftoolStripper): | ||
| 71 | ''' Care about png files with help | ||
| 72 | of exiftool | ||
| 73 | ''' | ||
| 74 | def _set_allowed(self): | ||
| 75 | self.allowed.update(['Bit Depth', 'Color Type', | ||
| 76 | 'Compression', 'Filter', 'Interlace', 'Pixels Per Unit X', | ||
| 77 | 'Pixels Per Unit Y', 'Pixel Units', 'Significant Bits', | ||
| 78 | 'Background Color', 'SRGB Rendering']) | ||
diff --git a/libmat/hachoir_editor/__init__.py b/libmat/hachoir_editor/__init__.py new file mode 100644 index 0000000..1835676 --- /dev/null +++ b/libmat/hachoir_editor/__init__.py | |||
| @@ -0,0 +1,8 @@ | |||
| 1 | from field import ( | ||
| 2 | EditorError, FakeField) | ||
| 3 | from typed_field import ( | ||
| 4 | EditableField, EditableBits, EditableBytes, | ||
| 5 | EditableInteger, EditableString, | ||
| 6 | createEditableField) | ||
| 7 | from fieldset import EditableFieldSet, NewFieldSet, createEditor | ||
| 8 | |||
diff --git a/libmat/hachoir_editor/field.py b/libmat/hachoir_editor/field.py new file mode 100644 index 0000000..6b1efe3 --- /dev/null +++ b/libmat/hachoir_editor/field.py | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | from hachoir_core.error import HachoirError | ||
| 2 | from hachoir_core.field import joinPath, MissingField | ||
| 3 | |||
| 4 | class EditorError(HachoirError): | ||
| 5 | pass | ||
| 6 | |||
| 7 | class FakeField(object): | ||
| 8 | """ | ||
| 9 | This class have API looks similar to Field API, but objects don't contain | ||
| 10 | any value: all values are _computed_ by parent methods. | ||
| 11 | |||
| 12 | Example: FakeField(editor, "abc").size calls editor._getFieldSize("abc"). | ||
| 13 | """ | ||
| 14 | is_field_set = False | ||
| 15 | |||
| 16 | def __init__(self, parent, name): | ||
| 17 | self._parent = parent | ||
| 18 | self._name = name | ||
| 19 | |||
| 20 | def _getPath(self): | ||
| 21 | return joinPath(self._parent.path, self._name) | ||
| 22 | path = property(_getPath) | ||
| 23 | |||
| 24 | def _getName(self): | ||
| 25 | return self._name | ||
| 26 | name = property(_getName) | ||
| 27 | |||
| 28 | def _getAddress(self): | ||
| 29 | return self._parent._getFieldAddress(self._name) | ||
| 30 | address = property(_getAddress) | ||
| 31 | |||
| 32 | def _getSize(self): | ||
| 33 | return self._parent.input[self._name].size | ||
| 34 | size = property(_getSize) | ||
| 35 | |||
| 36 | def _getValue(self): | ||
| 37 | return self._parent.input[self._name].value | ||
| 38 | value = property(_getValue) | ||
| 39 | |||
| 40 | def createDisplay(self): | ||
| 41 | # TODO: Returns new value if field is altered | ||
| 42 | return self._parent.input[self._name].display | ||
| 43 | display = property(createDisplay) | ||
| 44 | |||
| 45 | def _getParent(self): | ||
| 46 | return self._parent | ||
| 47 | parent = property(_getParent) | ||
| 48 | |||
| 49 | def hasValue(self): | ||
| 50 | return self._parent.input[self._name].hasValue() | ||
| 51 | |||
| 52 | def __getitem__(self, key): | ||
| 53 | # TODO: Implement this function! | ||
| 54 | raise MissingField(self, key) | ||
| 55 | |||
| 56 | def _isAltered(self): | ||
| 57 | return False | ||
| 58 | is_altered = property(_isAltered) | ||
| 59 | |||
| 60 | def writeInto(self, output): | ||
| 61 | size = self.size | ||
| 62 | addr = self._parent._getFieldInputAddress(self._name) | ||
| 63 | input = self._parent.input | ||
| 64 | stream = input.stream | ||
| 65 | if size % 8: | ||
| 66 | output.copyBitsFrom(stream, addr, size, input.endian) | ||
| 67 | else: | ||
| 68 | output.copyBytesFrom(stream, addr, size//8) | ||
| 69 | |||
diff --git a/libmat/hachoir_editor/fieldset.py b/libmat/hachoir_editor/fieldset.py new file mode 100644 index 0000000..b7c9b07 --- /dev/null +++ b/libmat/hachoir_editor/fieldset.py | |||
| @@ -0,0 +1,352 @@ | |||
| 1 | from hachoir_core.dict import UniqKeyError | ||
| 2 | from hachoir_core.field import MissingField, Float32, Float64, FakeArray | ||
| 3 | from hachoir_core.compatibility import any | ||
| 4 | from hachoir_core.i18n import _ | ||
| 5 | from typed_field import createEditableField | ||
| 6 | from field import EditorError | ||
| 7 | from collections import deque # Python 2.4 | ||
| 8 | import weakref # Python 2.1 | ||
| 9 | import struct | ||
| 10 | |||
| 11 | class EditableFieldSet(object): | ||
| 12 | MAX_SIZE = (1 << 40) # Arbitrary limit to catch errors | ||
| 13 | is_field_set = True | ||
| 14 | |||
| 15 | def __init__(self, parent, fieldset): | ||
| 16 | self._parent = parent | ||
| 17 | self.input = fieldset # original FieldSet | ||
| 18 | self._fields = {} # cache of editable fields | ||
| 19 | self._deleted = set() # Names of deleted fields | ||
| 20 | self._inserted = {} # Inserted field (name => list of field, | ||
| 21 | # where name is the name after) | ||
| 22 | |||
| 23 | def array(self, key): | ||
| 24 | # FIXME: Use cache? | ||
| 25 | return FakeArray(self, key) | ||
| 26 | |||
| 27 | def _getParent(self): | ||
| 28 | return self._parent | ||
| 29 | parent = property(_getParent) | ||
| 30 | |||
| 31 | def _isAltered(self): | ||
| 32 | if self._inserted: | ||
| 33 | return True | ||
| 34 | if self._deleted: | ||
| 35 | return True | ||
| 36 | return any(field.is_altered for field in self._fields.itervalues()) | ||
| 37 | is_altered = property(_isAltered) | ||
| 38 | |||
| 39 | def reset(self): | ||
| 40 | """ | ||
| 41 | Reset the field set and the input field set. | ||
| 42 | """ | ||
| 43 | for key, field in self._fields.iteritems(): | ||
| 44 | if not field.is_altered: | ||
| 45 | del self._fields[key] | ||
| 46 | self.input.reset() | ||
| 47 | |||
| 48 | def __len__(self): | ||
| 49 | return len(self.input) \ | ||
| 50 | - len(self._deleted) \ | ||
| 51 | + sum( len(new) for new in self._inserted.itervalues() ) | ||
| 52 | |||
| 53 | def __iter__(self): | ||
| 54 | for field in self.input: | ||
| 55 | name = field.name | ||
| 56 | if name in self._inserted: | ||
| 57 | for newfield in self._inserted[name]: | ||
| 58 | yield weakref.proxy(newfield) | ||
| 59 | if name not in self._deleted: | ||
| 60 | yield self[name] | ||
| 61 | if None in self._inserted: | ||
| 62 | for newfield in self._inserted[None]: | ||
| 63 | yield weakref.proxy(newfield) | ||
| 64 | |||
| 65 | def insertBefore(self, name, *new_fields): | ||
| 66 | self._insert(name, new_fields, False) | ||
| 67 | |||
| 68 | def insertAfter(self, name, *new_fields): | ||
| 69 | self._insert(name, new_fields, True) | ||
| 70 | |||
| 71 | def insert(self, *new_fields): | ||
| 72 | self._insert(None, new_fields, True) | ||
| 73 | |||
| 74 | def _insert(self, key, new_fields, next): | ||
| 75 | """ | ||
| 76 | key is the name of the field before which new_fields | ||
| 77 | will be inserted. If next is True, the fields will be inserted | ||
| 78 | _after_ this field. | ||
| 79 | """ | ||
| 80 | # Set unique field name | ||
| 81 | for field in new_fields: | ||
| 82 | if field._name.endswith("[]"): | ||
| 83 | self.input.setUniqueFieldName(field) | ||
| 84 | |||
| 85 | # Check that there is no duplicate in inserted fields | ||
| 86 | new_names = list(field.name for field in new_fields) | ||
| 87 | names_set = set(new_names) | ||
| 88 | if len(names_set) != len(new_fields): | ||
| 89 | duplicates = (name for name in names_set if 1 < new_names.count(name)) | ||
| 90 | raise UniqKeyError(_("Duplicates in inserted fields: %s") % ", ".join(duplicates)) | ||
| 91 | |||
| 92 | # Check that field names are not in input | ||
| 93 | if self.input: # Write special version for NewFieldSet? | ||
| 94 | for name in new_names: | ||
| 95 | if name in self.input and name not in self._deleted: | ||
| 96 | raise UniqKeyError(_("Field name '%s' already exists") % name) | ||
| 97 | |||
| 98 | # Check that field names are not in inserted fields | ||
| 99 | for fields in self._inserted.itervalues(): | ||
| 100 | for field in fields: | ||
| 101 | if field.name in new_names: | ||
| 102 | raise UniqKeyError(_("Field name '%s' already exists") % field.name) | ||
| 103 | |||
| 104 | # Input have already inserted field? | ||
| 105 | if key in self._inserted: | ||
| 106 | if next: | ||
| 107 | self._inserted[key].extend( reversed(new_fields) ) | ||
| 108 | else: | ||
| 109 | self._inserted[key].extendleft( reversed(new_fields) ) | ||
| 110 | return | ||
| 111 | |||
| 112 | # Whould like to insert in inserted fields? | ||
| 113 | if key: | ||
| 114 | for fields in self._inserted.itervalues(): | ||
| 115 | names = [item.name for item in fields] | ||
| 116 | try: | ||
| 117 | pos = names.index(key) | ||
| 118 | except ValueError: | ||
| 119 | continue | ||
| 120 | if 0 <= pos: | ||
| 121 | if next: | ||
| 122 | pos += 1 | ||
| 123 | fields.rotate(-pos) | ||
| 124 | fields.extendleft( reversed(new_fields) ) | ||
| 125 | fields.rotate(pos) | ||
| 126 | return | ||
| 127 | |||
| 128 | # Get next field. Use None if we are at the end. | ||
| 129 | if next: | ||
| 130 | index = self.input[key].index + 1 | ||
| 131 | try: | ||
| 132 | key = self.input[index].name | ||
| 133 | except IndexError: | ||
| 134 | key = None | ||
| 135 | |||
| 136 | # Check that field names are not in input | ||
| 137 | if key not in self.input: | ||
| 138 | raise MissingField(self, key) | ||
| 139 | |||
| 140 | # Insert in original input | ||
| 141 | self._inserted[key]= deque(new_fields) | ||
| 142 | |||
| 143 | def _getDescription(self): | ||
| 144 | return self.input.description | ||
| 145 | description = property(_getDescription) | ||
| 146 | |||
| 147 | def _getStream(self): | ||
| 148 | # FIXME: This property is maybe a bad idea since address may be differents | ||
| 149 | return self.input.stream | ||
| 150 | stream = property(_getStream) | ||
| 151 | |||
| 152 | def _getName(self): | ||
| 153 | return self.input.name | ||
| 154 | name = property(_getName) | ||
| 155 | |||
| 156 | def _getEndian(self): | ||
| 157 | return self.input.endian | ||
| 158 | endian = property(_getEndian) | ||
| 159 | |||
| 160 | def _getAddress(self): | ||
| 161 | if self._parent: | ||
| 162 | return self._parent._getFieldAddress(self.name) | ||
| 163 | else: | ||
| 164 | return 0 | ||
| 165 | address = property(_getAddress) | ||
| 166 | |||
| 167 | def _getAbsoluteAddress(self): | ||
| 168 | address = self.address | ||
| 169 | current = self._parent | ||
| 170 | while current: | ||
| 171 | address += current.address | ||
| 172 | current = current._parent | ||
| 173 | return address | ||
| 174 | absolute_address = property(_getAbsoluteAddress) | ||
| 175 | |||
| 176 | def hasValue(self): | ||
| 177 | return False | ||
| 178 | # return self._parent.input[self.name].hasValue() | ||
| 179 | |||
| 180 | def _getSize(self): | ||
| 181 | if self.is_altered: | ||
| 182 | return sum(field.size for field in self) | ||
| 183 | else: | ||
| 184 | return self.input.size | ||
| 185 | size = property(_getSize) | ||
| 186 | |||
| 187 | def _getPath(self): | ||
| 188 | return self.input.path | ||
| 189 | path = property(_getPath) | ||
| 190 | |||
| 191 | def _getOriginalField(self, name): | ||
| 192 | assert name in self.input | ||
| 193 | return self.input[name] | ||
| 194 | |||
| 195 | def _getFieldInputAddress(self, name): | ||
| 196 | """ | ||
| 197 | Absolute address of a field from the input field set. | ||
| 198 | """ | ||
| 199 | assert name in self.input | ||
| 200 | return self.input[name].absolute_address | ||
| 201 | |||
| 202 | def _getFieldAddress(self, name): | ||
| 203 | """ | ||
| 204 | Compute relative address of a field. The operation takes care of | ||
| 205 | deleted and resized fields. | ||
| 206 | """ | ||
| 207 | #assert name not in self._deleted | ||
| 208 | addr = 0 | ||
| 209 | for field in self: | ||
| 210 | if field.name == name: | ||
| 211 | return addr | ||
| 212 | addr += field.size | ||
| 213 | raise MissingField(self, name) | ||
| 214 | |||
| 215 | def _getItemByPath(self, path): | ||
| 216 | if not path[0]: | ||
| 217 | path = path[1:] | ||
| 218 | field = self | ||
| 219 | for name in path: | ||
| 220 | field = field[name] | ||
| 221 | return field | ||
| 222 | |||
| 223 | def __contains__(self, name): | ||
| 224 | try: | ||
| 225 | field = self[name] | ||
| 226 | return (field is not None) | ||
| 227 | except MissingField: | ||
| 228 | return False | ||
| 229 | |||
| 230 | def __getitem__(self, key): | ||
| 231 | """ | ||
| 232 | Create a weak reference to an editable field (EditableField) for the | ||
| 233 | field with specified name. If the field is removed later, using the | ||
| 234 | editable field will raise a weakref.ReferenceError exception. | ||
| 235 | |||
| 236 | May raise a MissingField error if the field doesn't exist in original | ||
| 237 | field set or it has been deleted. | ||
| 238 | """ | ||
| 239 | if "/" in key: | ||
| 240 | return self._getItemByPath(key.split("/")) | ||
| 241 | if isinstance(key, (int, long)): | ||
| 242 | raise EditorError("Integer index are not supported") | ||
| 243 | |||
| 244 | if (key in self._deleted) or (key not in self.input): | ||
| 245 | raise MissingField(self, key) | ||
| 246 | if key not in self._fields: | ||
| 247 | field = self.input[key] | ||
| 248 | if field.is_field_set: | ||
| 249 | self._fields[key] = createEditableFieldSet(self, field) | ||
| 250 | else: | ||
| 251 | self._fields[key] = createEditableField(self, field) | ||
| 252 | return weakref.proxy(self._fields[key]) | ||
| 253 | |||
| 254 | def __delitem__(self, name): | ||
| 255 | """ | ||
| 256 | Remove a field from the field set. May raise an MissingField exception | ||
| 257 | if the field has already been deleted. | ||
| 258 | """ | ||
| 259 | parts = name.partition('/') | ||
| 260 | if parts[2]: | ||
| 261 | fieldset = self[parts[0]] | ||
| 262 | del fieldset[parts[2]] | ||
| 263 | return | ||
| 264 | if name in self._deleted: | ||
| 265 | raise MissingField(self, name) | ||
| 266 | self._deleted.add(name) | ||
| 267 | if name in self._fields: | ||
| 268 | del self._fields[name] | ||
| 269 | |||
| 270 | def writeInto(self, output): | ||
| 271 | """ | ||
| 272 | Write the content if this field set into the output stream | ||
| 273 | (OutputStream). | ||
| 274 | """ | ||
| 275 | if not self.is_altered: | ||
| 276 | # Not altered: just copy bits/bytes | ||
| 277 | input = self.input | ||
| 278 | if input.size % 8: | ||
| 279 | output.copyBitsFrom(input.stream, | ||
| 280 | input.absolute_address, input.size, input.endian) | ||
| 281 | else: | ||
| 282 | output.copyBytesFrom(input.stream, | ||
| 283 | input.absolute_address, input.size//8) | ||
| 284 | else: | ||
| 285 | # Altered: call writeInto() method of each field | ||
| 286 | realaddr = 0 | ||
| 287 | for field in self: | ||
| 288 | field.writeInto(output) | ||
| 289 | realaddr += field.size | ||
| 290 | |||
| 291 | def _getValue(self): | ||
| 292 | raise EditorError('Field set "%s" has no value' % self.path) | ||
| 293 | def _setValue(self, value): | ||
| 294 | raise EditorError('Field set "%s" value is read only' % self.path) | ||
| 295 | value = property(_getValue, _setValue, "Value of field") | ||
| 296 | |||
| 297 | class EditableFloat(EditableFieldSet): | ||
| 298 | _value = None | ||
| 299 | |||
| 300 | def _isAltered(self): | ||
| 301 | return (self._value is not None) | ||
| 302 | is_altered = property(_isAltered) | ||
| 303 | |||
| 304 | def writeInto(self, output): | ||
| 305 | if self._value is not None: | ||
| 306 | self._write(output) | ||
| 307 | else: | ||
| 308 | EditableFieldSet.writeInto(self, output) | ||
| 309 | |||
| 310 | def _write(self, output): | ||
| 311 | format = self.input.struct_format | ||
| 312 | raw = struct.pack(format, self._value) | ||
| 313 | output.writeBytes(raw) | ||
| 314 | |||
| 315 | def _setValue(self, value): | ||
| 316 | self.parent._is_altered = True | ||
| 317 | self._value = value | ||
| 318 | value = property(EditableFieldSet._getValue, _setValue) | ||
| 319 | |||
| 320 | def createEditableFieldSet(parent, field): | ||
| 321 | cls = field.__class__ | ||
| 322 | # FIXME: Support Float80 | ||
| 323 | if cls in (Float32, Float64): | ||
| 324 | return EditableFloat(parent, field) | ||
| 325 | else: | ||
| 326 | return EditableFieldSet(parent, field) | ||
| 327 | |||
| 328 | class NewFieldSet(EditableFieldSet): | ||
| 329 | def __init__(self, parent, name): | ||
| 330 | EditableFieldSet.__init__(self, parent, None) | ||
| 331 | self._name = name | ||
| 332 | self._endian = parent.endian | ||
| 333 | |||
| 334 | def __iter__(self): | ||
| 335 | if None in self._inserted: | ||
| 336 | return iter(self._inserted[None]) | ||
| 337 | else: | ||
| 338 | raise StopIteration() | ||
| 339 | |||
| 340 | def _getName(self): | ||
| 341 | return self._name | ||
| 342 | name = property(_getName) | ||
| 343 | |||
| 344 | def _getEndian(self): | ||
| 345 | return self._endian | ||
| 346 | endian = property(_getEndian) | ||
| 347 | |||
| 348 | is_altered = property(lambda self: True) | ||
| 349 | |||
| 350 | def createEditor(fieldset): | ||
| 351 | return EditableFieldSet(None, fieldset) | ||
| 352 | |||
diff --git a/libmat/hachoir_editor/typed_field.py b/libmat/hachoir_editor/typed_field.py new file mode 100644 index 0000000..0f0427b --- /dev/null +++ b/libmat/hachoir_editor/typed_field.py | |||
| @@ -0,0 +1,253 @@ | |||
| 1 | from hachoir_core.field import ( | ||
| 2 | RawBits, Bit, Bits, PaddingBits, | ||
| 3 | RawBytes, Bytes, PaddingBytes, | ||
| 4 | GenericString, Character, | ||
| 5 | isInteger, isString) | ||
| 6 | from field import FakeField | ||
| 7 | |||
| 8 | class EditableField(FakeField): | ||
| 9 | """ | ||
| 10 | Pure virtual class used to write editable field class. | ||
| 11 | """ | ||
| 12 | |||
| 13 | _is_altered = False | ||
| 14 | def __init__(self, parent, name, value=None): | ||
| 15 | FakeField.__init__(self, parent, name) | ||
| 16 | self._value = value | ||
| 17 | |||
| 18 | def _isAltered(self): | ||
| 19 | return self._is_altered | ||
| 20 | is_altered = property(_isAltered) | ||
| 21 | |||
| 22 | def hasValue(self): | ||
| 23 | return True | ||
| 24 | |||
| 25 | def _computeSize(self): | ||
| 26 | raise NotImplementedError() | ||
| 27 | def _getValue(self): | ||
| 28 | return self._value | ||
| 29 | def _setValue(self, value): | ||
| 30 | self._value = value | ||
| 31 | |||
| 32 | def _propGetValue(self): | ||
| 33 | if self._value is not None: | ||
| 34 | return self._getValue() | ||
| 35 | else: | ||
| 36 | return FakeField._getValue(self) | ||
| 37 | def _propSetValue(self, value): | ||
| 38 | self._setValue(value) | ||
| 39 | self._is_altered = True | ||
| 40 | value = property(_propGetValue, _propSetValue) | ||
| 41 | |||
| 42 | def _getSize(self): | ||
| 43 | if self._value is not None: | ||
| 44 | return self._computeSize() | ||
| 45 | else: | ||
| 46 | return FakeField._getSize(self) | ||
| 47 | size = property(_getSize) | ||
| 48 | |||
| 49 | def _write(self, output): | ||
| 50 | raise NotImplementedError() | ||
| 51 | |||
| 52 | def writeInto(self, output): | ||
| 53 | if self._is_altered: | ||
| 54 | self._write(output) | ||
| 55 | else: | ||
| 56 | return FakeField.writeInto(self, output) | ||
| 57 | |||
| 58 | class EditableFixedField(EditableField): | ||
| 59 | """ | ||
| 60 | Editable field with fixed size. | ||
| 61 | """ | ||
| 62 | |||
| 63 | def __init__(self, parent, name, value=None, size=None): | ||
| 64 | EditableField.__init__(self, parent, name, value) | ||
| 65 | if size is not None: | ||
| 66 | self._size = size | ||
| 67 | else: | ||
| 68 | self._size = self._parent._getOriginalField(self._name).size | ||
| 69 | |||
| 70 | def _getSize(self): | ||
| 71 | return self._size | ||
| 72 | size = property(_getSize) | ||
| 73 | |||
| 74 | class EditableBits(EditableFixedField): | ||
| 75 | def __init__(self, parent, name, *args): | ||
| 76 | if args: | ||
| 77 | if len(args) != 2: | ||
| 78 | raise TypeError( | ||
| 79 | "Wrong argument count, EditableBits constructor prototype is: " | ||
| 80 | "(parent, name, [size, value])") | ||
| 81 | size = args[0] | ||
| 82 | value = args[1] | ||
| 83 | assert isinstance(value, (int, long)) | ||
| 84 | else: | ||
| 85 | size = None | ||
| 86 | value = None | ||
| 87 | EditableFixedField.__init__(self, parent, name, value, size) | ||
| 88 | if args: | ||
| 89 | self._setValue(args[1]) | ||
| 90 | self._is_altered = True | ||
| 91 | |||
| 92 | def _setValue(self, value): | ||
| 93 | if not(0 <= value < (1 << self._size)): | ||
| 94 | raise ValueError("Invalid value, must be in range %s..%s" | ||
| 95 | % (0, (1 << self._size) - 1)) | ||
| 96 | self._value = value | ||
| 97 | |||
| 98 | def _write(self, output): | ||
| 99 | output.writeBits(self._size, self._value, self._parent.endian) | ||
| 100 | |||
| 101 | class EditableBytes(EditableField): | ||
| 102 | def _setValue(self, value): | ||
| 103 | if not value: raise ValueError( | ||
| 104 | "Unable to set empty string to a EditableBytes field") | ||
| 105 | self._value = value | ||
| 106 | |||
| 107 | def _computeSize(self): | ||
| 108 | return len(self._value) * 8 | ||
| 109 | |||
| 110 | def _write(self, output): | ||
| 111 | output.writeBytes(self._value) | ||
| 112 | |||
| 113 | class EditableString(EditableField): | ||
| 114 | MAX_SIZE = { | ||
| 115 | "Pascal8": (1 << 8)-1, | ||
| 116 | "Pascal16": (1 << 16)-1, | ||
| 117 | "Pascal32": (1 << 32)-1, | ||
| 118 | } | ||
| 119 | |||
| 120 | def __init__(self, parent, name, *args, **kw): | ||
| 121 | if len(args) == 2: | ||
| 122 | value = args[1] | ||
| 123 | assert isinstance(value, str) # TODO: support Unicode | ||
| 124 | elif not args: | ||
| 125 | value = None | ||
| 126 | else: | ||
| 127 | raise TypeError( | ||
| 128 | "Wrong argument count, EditableString constructor prototype is:" | ||
| 129 | "(parent, name, [format, value])") | ||
| 130 | EditableField.__init__(self, parent, name, value) | ||
| 131 | if len(args) == 2: | ||
| 132 | self._charset = kw.get('charset', None) | ||
| 133 | self._format = args[0] | ||
| 134 | if self._format in GenericString.PASCAL_FORMATS: | ||
| 135 | self._prefix_size = GenericString.PASCAL_FORMATS[self._format] | ||
| 136 | else: | ||
| 137 | self._prefix_size = 0 | ||
| 138 | self._suffix_str = GenericString.staticSuffixStr( | ||
| 139 | self._format, self._charset, self._parent.endian) | ||
| 140 | self._is_altered = True | ||
| 141 | else: | ||
| 142 | orig = self._parent._getOriginalField(name) | ||
| 143 | self._charset = orig.charset | ||
| 144 | self._format = orig.format | ||
| 145 | self._prefix_size = orig.content_offset | ||
| 146 | self._suffix_str = orig.suffix_str | ||
| 147 | |||
| 148 | def _setValue(self, value): | ||
| 149 | size = len(value) | ||
| 150 | if self._format in self.MAX_SIZE and self.MAX_SIZE[self._format] < size: | ||
| 151 | raise ValueError("String is too big") | ||
| 152 | self._value = value | ||
| 153 | |||
| 154 | def _computeSize(self): | ||
| 155 | return (self._prefix_size + len(self._value) + len(self._suffix_str))*8 | ||
| 156 | |||
| 157 | def _write(self, output): | ||
| 158 | if self._format in GenericString.SUFFIX_FORMAT: | ||
| 159 | output.writeBytes(self._value) | ||
| 160 | output.writeBytes(self._suffix_str) | ||
| 161 | elif self._format == "fixed": | ||
| 162 | output.writeBytes(self._value) | ||
| 163 | else: | ||
| 164 | assert self._format in GenericString.PASCAL_FORMATS | ||
| 165 | size = GenericString.PASCAL_FORMATS[self._format] | ||
| 166 | output.writeInteger(len(self._value), False, size, self._parent.endian) | ||
| 167 | output.writeBytes(self._value) | ||
| 168 | |||
| 169 | class EditableCharacter(EditableFixedField): | ||
| 170 | def __init__(self, parent, name, *args): | ||
| 171 | if args: | ||
| 172 | if len(args) != 3: | ||
| 173 | raise TypeError( | ||
| 174 | "Wrong argument count, EditableCharacter " | ||
| 175 | "constructor prototype is: (parent, name, [value])") | ||
| 176 | value = args[0] | ||
| 177 | if not isinstance(value, str) or len(value) != 1: | ||
| 178 | raise TypeError("EditableCharacter needs a character") | ||
| 179 | else: | ||
| 180 | value = None | ||
| 181 | EditableFixedField.__init__(self, parent, name, value, 8) | ||
| 182 | if args: | ||
| 183 | self._is_altered = True | ||
| 184 | |||
| 185 | def _setValue(self, value): | ||
| 186 | if not isinstance(value, str) or len(value) != 1: | ||
| 187 | raise TypeError("EditableCharacter needs a character") | ||
| 188 | self._value = value | ||
| 189 | |||
| 190 | def _write(self, output): | ||
| 191 | output.writeBytes(self._value) | ||
| 192 | |||
| 193 | class EditableInteger(EditableFixedField): | ||
| 194 | VALID_VALUE_SIGNED = { | ||
| 195 | 8: (-(1 << 8), (1 << 8)-1), | ||
| 196 | 16: (-(1 << 15), (1 << 15)-1), | ||
| 197 | 32: (-(1 << 31), (1 << 31)-1), | ||
| 198 | } | ||
| 199 | VALID_VALUE_UNSIGNED = { | ||
| 200 | 8: (0, (1 << 8)-1), | ||
| 201 | 16: (0, (1 << 16)-1), | ||
| 202 | 32: (0, (1 << 32)-1) | ||
| 203 | } | ||
| 204 | |||
| 205 | def __init__(self, parent, name, *args): | ||
| 206 | if args: | ||
| 207 | if len(args) != 3: | ||
| 208 | raise TypeError( | ||
| 209 | "Wrong argument count, EditableInteger constructor prototype is: " | ||
| 210 | "(parent, name, [signed, size, value])") | ||
| 211 | size = args[1] | ||
| 212 | value = args[2] | ||
| 213 | assert isinstance(value, (int, long)) | ||
| 214 | else: | ||
| 215 | size = None | ||
| 216 | value = None | ||
| 217 | EditableFixedField.__init__(self, parent, name, value, size) | ||
| 218 | if args: | ||
| 219 | self._signed = args[0] | ||
| 220 | self._is_altered = True | ||
| 221 | else: | ||
| 222 | self._signed = self._parent._getOriginalField(self._name).signed | ||
| 223 | |||
| 224 | def _setValue(self, value): | ||
| 225 | if self._signed: | ||
| 226 | valid = self.VALID_VALUE_SIGNED | ||
| 227 | else: | ||
| 228 | valid = self.VALID_VALUE_UNSIGNED | ||
| 229 | minval, maxval = valid[self._size] | ||
| 230 | if not(minval <= value <= maxval): | ||
| 231 | raise ValueError("Invalid value, must be in range %s..%s" | ||
| 232 | % (minval, maxval)) | ||
| 233 | self._value = value | ||
| 234 | |||
| 235 | def _write(self, output): | ||
| 236 | output.writeInteger( | ||
| 237 | self.value, self._signed, self._size//8, self._parent.endian) | ||
| 238 | |||
| 239 | def createEditableField(fieldset, field): | ||
| 240 | if isInteger(field): | ||
| 241 | cls = EditableInteger | ||
| 242 | elif isString(field): | ||
| 243 | cls = EditableString | ||
| 244 | elif field.__class__ in (RawBytes, Bytes, PaddingBytes): | ||
| 245 | cls = EditableBytes | ||
| 246 | elif field.__class__ in (RawBits, Bits, Bit, PaddingBits): | ||
| 247 | cls = EditableBits | ||
| 248 | elif field.__class__ == Character: | ||
| 249 | cls = EditableCharacter | ||
| 250 | else: | ||
| 251 | cls = FakeField | ||
| 252 | return cls(fieldset, field.name) | ||
| 253 | |||
diff --git a/libmat/images.py b/libmat/images.py new file mode 100644 index 0000000..67c710f --- /dev/null +++ b/libmat/images.py | |||
| @@ -0,0 +1,52 @@ | |||
| 1 | ''' Takes care about pictures formats | ||
| 2 | |||
| 3 | References: | ||
| 4 | - JFIF: http://www.ecma-international.org/publications/techreports/E-TR-098.htm | ||
| 5 | - PNG: http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html | ||
| 6 | - PNG: http://www.w3.org/TR/PNG-Chunks.html | ||
| 7 | ''' | ||
| 8 | |||
| 9 | import parser | ||
| 10 | |||
| 11 | |||
| 12 | class JpegStripper(parser.GenericParser): | ||
| 13 | ''' Represents a jpeg file. | ||
| 14 | Custom Huffman and Quantization tables | ||
| 15 | are stripped: they may leak | ||
| 16 | some info, and the quality loss is minor. | ||
| 17 | ''' | ||
| 18 | def _should_remove(self, field): | ||
| 19 | ''' Return True if the field is compromising | ||
| 20 | ''' | ||
| 21 | field_list = frozenset([ | ||
| 22 | 'start_image', # start of the image | ||
| 23 | 'app0', # JFIF data | ||
| 24 | 'start_frame', # specify width, height, number of components | ||
| 25 | 'start_scan', # specify which slice of data the top-to-bottom scan contains | ||
| 26 | 'data', # actual data | ||
| 27 | 'end_image']) # end of the image | ||
| 28 | if field.name in field_list: | ||
| 29 | return False | ||
| 30 | elif field.name.startswith('quantization['): # custom Quant. tables | ||
| 31 | return False | ||
| 32 | elif field.name.startswith('huffman['): # custom Huffman tables | ||
| 33 | return False | ||
| 34 | return True | ||
| 35 | |||
| 36 | |||
| 37 | class PngStripper(parser.GenericParser): | ||
| 38 | ''' Represents a png file | ||
| 39 | ''' | ||
| 40 | def _should_remove(self, field): | ||
| 41 | ''' Return True if the field is compromising | ||
| 42 | ''' | ||
| 43 | field_list = frozenset([ | ||
| 44 | 'id', | ||
| 45 | 'header', # PNG header | ||
| 46 | 'physical', # the intended pixel size or aspect ratio | ||
| 47 | 'end']) # end of the image | ||
| 48 | if field.name in field_list: | ||
| 49 | return False | ||
| 50 | if field.name.startswith('data['): # data | ||
| 51 | return False | ||
| 52 | return True | ||
diff --git a/libmat/mat.py b/libmat/mat.py new file mode 100644 index 0000000..8dfc2dc --- /dev/null +++ b/libmat/mat.py | |||
| @@ -0,0 +1,186 @@ | |||
| 1 | #!/usr/bin/env python | ||
| 2 | |||
| 3 | ''' Metadata anonymisation toolkit library | ||
| 4 | ''' | ||
| 5 | |||
| 6 | import logging | ||
| 7 | import mimetypes | ||
| 8 | import os | ||
| 9 | import subprocess | ||
| 10 | import xml.sax | ||
| 11 | |||
| 12 | import hachoir_core.cmd_line | ||
| 13 | import hachoir_parser | ||
| 14 | |||
| 15 | import libmat.exceptions | ||
| 16 | |||
| 17 | __version__ = '0.5.2' | ||
| 18 | __author__ = 'jvoisin' | ||
| 19 | |||
| 20 | #Silence | ||
| 21 | LOGGING_LEVEL = logging.CRITICAL | ||
| 22 | hachoir_core.config.quiet = True | ||
| 23 | fname = '' | ||
| 24 | |||
| 25 | #Verbose | ||
| 26 | #LOGGING_LEVEL = logging.DEBUG | ||
| 27 | #hachoir_core.config.quiet = False | ||
| 28 | #logname = 'report.log' | ||
| 29 | |||
| 30 | logging.basicConfig(filename=fname, level=LOGGING_LEVEL) | ||
| 31 | |||
| 32 | import strippers # this is loaded here because we need LOGGING_LEVEL | ||
| 33 | |||
| 34 | |||
| 35 | def get_logo(): | ||
| 36 | ''' Return the path to the logo | ||
| 37 | ''' | ||
| 38 | if os.path.isfile('./data/mat.png'): | ||
| 39 | return './data/mat.png' | ||
| 40 | elif os.path.isfile('/usr/share/pixmaps/mat.png'): | ||
| 41 | return '/usr/share/pixmaps/mat.png' | ||
| 42 | elif os.path.isfile('/usr/local/share/pixmaps/mat.png'): | ||
| 43 | return '/usr/local/share/pixmaps/mat.png' | ||
| 44 | |||
| 45 | |||
| 46 | def get_datadir(): | ||
| 47 | ''' Return the path to the data directory | ||
| 48 | ''' | ||
| 49 | if os.path.isdir('./data/'): | ||
| 50 | return './data/' | ||
| 51 | elif os.path.isdir('/usr/local/share/mat/'): | ||
| 52 | return '/usr/local/share/mat/' | ||
| 53 | elif os.path.isdir('/usr/share/mat/'): | ||
| 54 | return '/usr/share/mat/' | ||
| 55 | |||
| 56 | |||
| 57 | def list_supported_formats(): | ||
| 58 | ''' Return a list of all locally supported fileformat. | ||
| 59 | It parses that FORMATS file, and removes locally | ||
| 60 | non-supported formats. | ||
| 61 | ''' | ||
| 62 | handler = XMLParser() | ||
| 63 | parser = xml.sax.make_parser() | ||
| 64 | parser.setContentHandler(handler) | ||
| 65 | path = os.path.join(get_datadir(), 'FORMATS') | ||
| 66 | with open(path, 'r') as xmlfile: | ||
| 67 | parser.parse(xmlfile) | ||
| 68 | |||
| 69 | localy_supported = [] | ||
| 70 | for item in handler.list: | ||
| 71 | if item['mimetype'].split(',')[0] in strippers.STRIPPERS: | ||
| 72 | localy_supported.append(item) | ||
| 73 | |||
| 74 | return localy_supported | ||
| 75 | |||
| 76 | |||
| 77 | class XMLParser(xml.sax.handler.ContentHandler): | ||
| 78 | ''' Parse the supported format xml, and return a corresponding | ||
| 79 | list of dict | ||
| 80 | ''' | ||
| 81 | def __init__(self): | ||
| 82 | self.dict = {} | ||
| 83 | self.list = [] | ||
| 84 | self.content, self.key = '', '' | ||
| 85 | self.between = False | ||
| 86 | |||
| 87 | def startElement(self, name, attrs): | ||
| 88 | ''' Called when entering into xml tag | ||
| 89 | ''' | ||
| 90 | self.between = True | ||
| 91 | self.key = name | ||
| 92 | self.content = '' | ||
| 93 | |||
| 94 | def endElement(self, name): | ||
| 95 | ''' Called when exiting a xml tag | ||
| 96 | ''' | ||
| 97 | if name == 'format': # leaving a fileformat section | ||
| 98 | self.list.append(self.dict.copy()) | ||
| 99 | self.dict.clear() | ||
| 100 | else: | ||
| 101 | content = self.content.replace('\s', ' ') | ||
| 102 | self.dict[self.key] = content | ||
| 103 | self.between = False | ||
| 104 | |||
| 105 | def characters(self, characters): | ||
| 106 | ''' Concatenate the content between opening and closing tags | ||
| 107 | ''' | ||
| 108 | if self.between: | ||
| 109 | self.content += characters | ||
| 110 | |||
| 111 | |||
| 112 | def secure_remove(filename): | ||
| 113 | ''' Securely remove the file | ||
| 114 | ''' | ||
| 115 | # I want the file removed, even if it's ro | ||
| 116 | try: | ||
| 117 | os.chmod(filename, 220) | ||
| 118 | except OSError: | ||
| 119 | logging.error('Unable to add write rights to %s' % filename) | ||
| 120 | raise libmat.exceptions.UnableToWriteFile | ||
| 121 | |||
| 122 | try: | ||
| 123 | if not subprocess.call(['shred', '--remove', filename]): | ||
| 124 | return True | ||
| 125 | else: | ||
| 126 | raise OSError | ||
| 127 | except OSError: | ||
| 128 | logging.error('Unable to securely remove %s' % filename) | ||
| 129 | |||
| 130 | try: | ||
| 131 | os.remove(filename) | ||
| 132 | except OSError: | ||
| 133 | logging.error('Unable to remove %s' % filename) | ||
| 134 | raise libmat.exceptions.UnableToRemoveFile | ||
| 135 | |||
| 136 | return True | ||
| 137 | |||
| 138 | |||
| 139 | def create_class_file(name, backup, **kwargs): | ||
| 140 | ''' Return a $FILETYPEStripper() class, | ||
| 141 | corresponding to the filetype of the given file | ||
| 142 | ''' | ||
| 143 | if not os.path.isfile(name): # check if the file exists | ||
| 144 | logging.error('%s is not a valid file' % name) | ||
| 145 | return None | ||
| 146 | |||
| 147 | if not os.access(name, os.R_OK): # check read permissions | ||
| 148 | logging.error('%s is is not readable' % name) | ||
| 149 | return None | ||
| 150 | |||
| 151 | if not os.path.getsize(name): | ||
| 152 | #check if the file is not empty (hachoir crash on empty files) | ||
| 153 | logging.error('%s is empty' % name) | ||
| 154 | return None | ||
| 155 | |||
| 156 | filename = '' | ||
| 157 | try: | ||
| 158 | filename = hachoir_core.cmd_line.unicodeFilename(name) | ||
| 159 | except TypeError: # get rid of "decoding Unicode is not supported" | ||
| 160 | filename = name | ||
| 161 | |||
| 162 | parser = hachoir_parser.createParser(filename) | ||
| 163 | if not parser: | ||
| 164 | logging.info('Unable to parse %s' % filename) | ||
| 165 | return None | ||
| 166 | |||
| 167 | mime = parser.mime_type | ||
| 168 | |||
| 169 | if mime == 'application/zip': # some formats are zipped stuff | ||
| 170 | if mimetypes.guess_type(name)[0]: | ||
| 171 | mime = mimetypes.guess_type(name)[0] | ||
| 172 | |||
| 173 | if mime.startswith('application/vnd.oasis.opendocument'): | ||
| 174 | mime = 'application/opendocument' # opendocument fileformat | ||
| 175 | elif mime.startswith('application/vnd.openxmlformats-officedocument'): | ||
| 176 | mime = 'application/officeopenxml' # office openxml | ||
| 177 | |||
| 178 | is_writable = os.access(name, os.W_OK) | ||
| 179 | |||
| 180 | try: | ||
| 181 | stripper_class = strippers.STRIPPERS[mime] | ||
| 182 | except KeyError: | ||
| 183 | logging.info('Don\'t have stripper for %s format' % mime) | ||
| 184 | return None | ||
| 185 | |||
| 186 | return stripper_class(filename, parser, mime, backup, is_writable, **kwargs) | ||
diff --git a/libmat/misc.py b/libmat/misc.py new file mode 100644 index 0000000..450f381 --- /dev/null +++ b/libmat/misc.py | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | ''' Care about misc formats | ||
| 2 | ''' | ||
| 3 | |||
| 4 | import parser | ||
| 5 | |||
| 6 | from bencode import bencode | ||
| 7 | |||
| 8 | |||
| 9 | class TorrentStripper(parser.GenericParser): | ||
| 10 | ''' Represent a torrent file with the help | ||
| 11 | of the bencode lib from Petru Paler | ||
| 12 | ''' | ||
| 13 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | ||
| 14 | super(TorrentStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) | ||
| 15 | self.fields = frozenset(['announce', 'info', 'name', 'path', 'piece length', 'pieces', | ||
| 16 | 'length', 'files', 'announce-list', 'nodes', 'httpseeds', 'private', 'root hash']) | ||
| 17 | |||
| 18 | def __get_key_recursively(self, dictionary): | ||
| 19 | ''' Get recursively all keys from a dict and | ||
| 20 | its subdicts | ||
| 21 | ''' | ||
| 22 | for i, j in list(dictionary.items()): | ||
| 23 | if isinstance(j, dict): | ||
| 24 | return set([i]).union(self.__get_key_recursively(j)) | ||
| 25 | return set([i]) | ||
| 26 | |||
| 27 | def is_clean(self): | ||
| 28 | ''' Check if the file is clean from harmful metadata | ||
| 29 | ''' | ||
| 30 | with open(self.filename, 'r') as f: | ||
| 31 | decoded = bencode.bdecode(f.read()) | ||
| 32 | return self.fields.issuperset(self.__get_key_recursively(decoded)) | ||
| 33 | |||
| 34 | def __get_meta_recursively(self, dictionary): | ||
| 35 | ''' Get recursively all harmful metadata | ||
| 36 | ''' | ||
| 37 | d = dict() | ||
| 38 | for i, j in list(dictionary.items()): | ||
| 39 | if i not in self.fields: | ||
| 40 | d[i] = j | ||
| 41 | elif isinstance(j, dict): | ||
| 42 | d = dict(d.items() + list(self.__get_meta_recursively(j).items())) | ||
| 43 | return d | ||
| 44 | |||
| 45 | def get_meta(self): | ||
| 46 | ''' Return a dict with all the meta of the file | ||
| 47 | ''' | ||
| 48 | with open(self.filename, 'r') as f: | ||
| 49 | decoded = bencode.bdecode(f.read()) | ||
| 50 | return self.__get_meta_recursively(decoded) | ||
| 51 | |||
| 52 | def __remove_all_recursively(self, dictionary): | ||
| 53 | ''' Remove recursively all compromizing fields | ||
| 54 | ''' | ||
| 55 | d = dict() | ||
| 56 | for i, j in [i for i in list(dictionary.items()) if i in self.fields]: | ||
| 57 | if isinstance(j, dict): | ||
| 58 | d = dict(list(d.items()) + list(self.__get_meta_recursively(j).items())) | ||
| 59 | else: | ||
| 60 | d[i] = j | ||
| 61 | return d | ||
| 62 | |||
| 63 | def remove_all(self): | ||
| 64 | ''' Remove all comprimizing fields | ||
| 65 | ''' | ||
| 66 | decoded = '' | ||
| 67 | with open(self.filename, 'r') as f: | ||
| 68 | decoded = bencode.bdecode(f.read()) | ||
| 69 | |||
| 70 | cleaned = {i: j for i, j in list(decoded.items()) if i in self.fields} | ||
| 71 | |||
| 72 | with open(self.output, 'w') as f: # encode the decoded torrent | ||
| 73 | f.write(bencode.bencode(cleaned)) # and write it in self.output | ||
| 74 | |||
| 75 | self.do_backup() | ||
| 76 | return True | ||
diff --git a/libmat/mutagenstripper.py b/libmat/mutagenstripper.py new file mode 100644 index 0000000..403c9a7 --- /dev/null +++ b/libmat/mutagenstripper.py | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | ''' Take care of mutagen-supported formats (audio) | ||
| 2 | ''' | ||
| 3 | |||
| 4 | import parser | ||
| 5 | |||
| 6 | |||
| 7 | class MutagenStripper(parser.GenericParser): | ||
| 8 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | ||
| 9 | super(MutagenStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) | ||
| 10 | self._create_mfile() | ||
| 11 | |||
| 12 | def _create_mfile(self): | ||
| 13 | raise NotImplementedError | ||
| 14 | |||
| 15 | def is_clean(self): | ||
| 16 | return not self.mfile.tags | ||
| 17 | |||
| 18 | def remove_all(self): | ||
| 19 | if self.backup: | ||
| 20 | self.create_backup_copy() | ||
| 21 | self.mfile.delete() | ||
| 22 | self.mfile.save() | ||
| 23 | return True | ||
| 24 | |||
| 25 | def get_meta(self): | ||
| 26 | ''' | ||
| 27 | Return the content of the metadata block is present | ||
| 28 | ''' | ||
| 29 | metadata = {} | ||
| 30 | if self.mfile.tags: | ||
| 31 | for key, value in self.mfile.tags: | ||
| 32 | metadata[key] = value | ||
| 33 | return metadata | ||
diff --git a/libmat/office.py b/libmat/office.py new file mode 100644 index 0000000..0ca1ff1 --- /dev/null +++ b/libmat/office.py | |||
| @@ -0,0 +1,191 @@ | |||
| 1 | ''' Care about office's formats | ||
| 2 | |||
| 3 | ''' | ||
| 4 | |||
| 5 | import logging | ||
| 6 | import os | ||
| 7 | import shutil | ||
| 8 | import tempfile | ||
| 9 | import xml.dom.minidom as minidom | ||
| 10 | import zipfile | ||
| 11 | |||
| 12 | try: | ||
| 13 | import cairo | ||
| 14 | from gi.repository import Poppler | ||
| 15 | except ImportError: | ||
| 16 | logging.info('office.py loaded without PDF support') | ||
| 17 | pass | ||
| 18 | |||
| 19 | import parser | ||
| 20 | import archive | ||
| 21 | |||
| 22 | |||
| 23 | class OpenDocumentStripper(archive.TerminalZipStripper): | ||
| 24 | ''' An open document file is a zip, with xml file into. | ||
| 25 | The one that interest us is meta.xml | ||
| 26 | ''' | ||
| 27 | |||
| 28 | def get_meta(self): | ||
| 29 | ''' Return a dict with all the meta of the file by | ||
| 30 | trying to read the meta.xml file. | ||
| 31 | ''' | ||
| 32 | metadata = super(OpenDocumentStripper, self).get_meta() | ||
| 33 | zipin = zipfile.ZipFile(self.filename, 'r') | ||
| 34 | try: | ||
| 35 | content = zipin.read('meta.xml') | ||
| 36 | dom1 = minidom.parseString(content) | ||
| 37 | elements = dom1.getElementsByTagName('office:meta') | ||
| 38 | for i in elements[0].childNodes: | ||
| 39 | if i.tagName != 'meta:document-statistic': | ||
| 40 | nodename = ''.join(i.nodeName.split(':')[1:]) | ||
| 41 | metadata[nodename] = ''.join([j.data for j in i.childNodes]) | ||
| 42 | else: | ||
| 43 | # thank you w3c for not providing a nice | ||
| 44 | # method to get all attributes of a node | ||
| 45 | pass | ||
| 46 | except KeyError: # no meta.xml file found | ||
| 47 | logging.debug('%s has no opendocument metadata' % self.filename) | ||
| 48 | zipin.close() | ||
| 49 | return metadata | ||
| 50 | |||
| 51 | def remove_all(self): | ||
| 52 | ''' Removes metadata | ||
| 53 | ''' | ||
| 54 | return super(OpenDocumentStripper, self).remove_all(ending_blacklist=['meta.xml']) | ||
| 55 | |||
| 56 | def is_clean(self): | ||
| 57 | ''' Check if the file is clean from harmful metadatas | ||
| 58 | ''' | ||
| 59 | clean_super = super(OpenDocumentStripper, self).is_clean() | ||
| 60 | if clean_super is False: | ||
| 61 | return False | ||
| 62 | |||
| 63 | zipin = zipfile.ZipFile(self.filename, 'r') | ||
| 64 | try: | ||
| 65 | zipin.getinfo('meta.xml') | ||
| 66 | except KeyError: # no meta.xml in the file | ||
| 67 | return True | ||
| 68 | zipin.close() | ||
| 69 | return False | ||
| 70 | |||
| 71 | |||
| 72 | class OpenXmlStripper(archive.TerminalZipStripper): | ||
| 73 | ''' Represent an office openxml document, which is like | ||
| 74 | an opendocument format, with some tricky stuff added. | ||
| 75 | It contains mostly xml, but can have media blobs, crap, ... | ||
| 76 | (I don't like this format.) | ||
| 77 | ''' | ||
| 78 | def remove_all(self): | ||
| 79 | return super(OpenXmlStripper, self).remove_all( | ||
| 80 | beginning_blacklist=('docProps/'), whitelist=('.rels')) | ||
| 81 | |||
| 82 | def is_clean(self): | ||
| 83 | ''' Check if the file is clean from harmful metadatas. | ||
| 84 | This implementation is faster than something like | ||
| 85 | "return this.get_meta() == {}". | ||
| 86 | ''' | ||
| 87 | clean_super = super(OpenXmlStripper, self).is_clean() | ||
| 88 | if clean_super is False: | ||
| 89 | return False | ||
| 90 | |||
| 91 | zipin = zipfile.ZipFile(self.filename, 'r') | ||
| 92 | for item in zipin.namelist(): | ||
| 93 | if item.startswith('docProps/'): | ||
| 94 | return False | ||
| 95 | zipin.close() | ||
| 96 | return True | ||
| 97 | |||
| 98 | def get_meta(self): | ||
| 99 | ''' Return a dict with all the meta of the file | ||
| 100 | ''' | ||
| 101 | metadata = super(OpenXmlStripper, self).get_meta() | ||
| 102 | |||
| 103 | zipin = zipfile.ZipFile(self.filename, 'r') | ||
| 104 | for item in zipin.namelist(): | ||
| 105 | if item.startswith('docProps/'): | ||
| 106 | metadata[item] = 'harmful content' | ||
| 107 | zipin.close() | ||
| 108 | return metadata | ||
| 109 | |||
| 110 | |||
| 111 | class PdfStripper(parser.GenericParser): | ||
| 112 | ''' Represent a PDF file | ||
| 113 | ''' | ||
| 114 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | ||
| 115 | super(PdfStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) | ||
| 116 | self.uri = 'file://' + os.path.abspath(self.filename) | ||
| 117 | self.password = None | ||
| 118 | try: | ||
| 119 | self.pdf_quality = kwargs['low_pdf_quality'] | ||
| 120 | except KeyError: | ||
| 121 | self.pdf_quality = False | ||
| 122 | |||
| 123 | self.meta_list = frozenset(['title', 'author', 'subject', | ||
| 124 | 'keywords', 'creator', 'producer', 'metadata']) | ||
| 125 | |||
| 126 | def is_clean(self): | ||
| 127 | ''' Check if the file is clean from harmful metadatas | ||
| 128 | ''' | ||
| 129 | document = Poppler.Document.new_from_file(self.uri, self.password) | ||
| 130 | for key in self.meta_list: | ||
| 131 | if document.get_property(key): | ||
| 132 | return False | ||
| 133 | return True | ||
| 134 | |||
| 135 | def remove_all(self): | ||
| 136 | ''' Opening the PDF with poppler, then doing a render | ||
| 137 | on a cairo pdfsurface for each pages. | ||
| 138 | |||
| 139 | http://cairographics.org/documentation/pycairo/2/ | ||
| 140 | |||
| 141 | The use of an intermediate tempfile is necessary because | ||
| 142 | python-cairo segfaults on unicode. | ||
| 143 | See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=699457 | ||
| 144 | ''' | ||
| 145 | document = Poppler.Document.new_from_file(self.uri, self.password) | ||
| 146 | try: | ||
| 147 | output = tempfile.mkstemp()[1] | ||
| 148 | page = document.get_page(0) | ||
| 149 | # assume that every pages are the same size | ||
| 150 | page_width, page_height = page.get_size() | ||
| 151 | surface = cairo.PDFSurface(output, page_width, page_height) | ||
| 152 | context = cairo.Context(surface) # context draws on the surface | ||
| 153 | logging.debug('PDF rendering of %s' % self.filename) | ||
| 154 | for pagenum in range(document.get_n_pages()): | ||
| 155 | page = document.get_page(pagenum) | ||
| 156 | context.translate(0, 0) | ||
| 157 | if self.pdf_quality: | ||
| 158 | page.render(context) # render the page on context | ||
| 159 | else: | ||
| 160 | page.render_for_printing(context) # render the page on context | ||
| 161 | context.show_page() # draw context on surface | ||
| 162 | surface.finish() | ||
| 163 | shutil.move(output, self.output) | ||
| 164 | except: | ||
| 165 | logging.error('Something went wrong when cleaning %s.' % self.filename) | ||
| 166 | return False | ||
| 167 | |||
| 168 | try: | ||
| 169 | import pdfrw # For now, poppler cannot write meta, so we must use pdfrw | ||
| 170 | logging.debug('Removing %s\'s superficial metadata' % self.filename) | ||
| 171 | trailer = pdfrw.PdfReader(self.output) | ||
| 172 | trailer.Info.Producer = None | ||
| 173 | trailer.Info.Creator = None | ||
| 174 | writer = pdfrw.PdfWriter() | ||
| 175 | writer.trailer = trailer | ||
| 176 | writer.write(self.output) | ||
| 177 | self.do_backup() | ||
| 178 | except: | ||
| 179 | logging.error('Unable to remove all metadata from %s, please install pdfrw' % self.output) | ||
| 180 | return False | ||
| 181 | return True | ||
| 182 | |||
| 183 | def get_meta(self): | ||
| 184 | ''' Return a dict with all the meta of the file | ||
| 185 | ''' | ||
| 186 | document = Poppler.Document.new_from_file(self.uri, self.password) | ||
| 187 | metadata = {} | ||
| 188 | for key in self.meta_list: | ||
| 189 | if document.get_property(key): | ||
| 190 | metadata[key] = document.get_property(key) | ||
| 191 | return metadata | ||
diff --git a/libmat/parser.py b/libmat/parser.py new file mode 100644 index 0000000..1765da8 --- /dev/null +++ b/libmat/parser.py | |||
| @@ -0,0 +1,135 @@ | |||
| 1 | ''' Parent class of all parser | ||
| 2 | ''' | ||
| 3 | |||
| 4 | import os | ||
| 5 | import shutil | ||
| 6 | import tempfile | ||
| 7 | |||
| 8 | import hachoir_core | ||
| 9 | import hachoir_editor | ||
| 10 | |||
| 11 | import mat | ||
| 12 | |||
| 13 | NOMETA = frozenset(( | ||
| 14 | '.bmp', # "raw" image | ||
| 15 | '.rdf', # text | ||
| 16 | '.txt', # plain text | ||
| 17 | '.xml', # formated text (XML) | ||
| 18 | '.rels', # openXML formated text | ||
| 19 | )) | ||
| 20 | |||
| 21 | FIELD = object() | ||
| 22 | |||
| 23 | |||
| 24 | class GenericParser(object): | ||
| 25 | ''' Parent class of all parsers | ||
| 26 | ''' | ||
| 27 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | ||
| 28 | self.filename = '' | ||
| 29 | self.parser = parser | ||
| 30 | self.mime = mime | ||
| 31 | self.backup = backup | ||
| 32 | self.is_writable = is_writable | ||
| 33 | self.editor = hachoir_editor.createEditor(parser) | ||
| 34 | try: | ||
| 35 | self.filename = hachoir_core.cmd_line.unicodeFilename(filename) | ||
| 36 | except TypeError: # get rid of "decoding Unicode is not supported" | ||
| 37 | self.filename = filename | ||
| 38 | self.basename = os.path.basename(filename) | ||
| 39 | _, output = tempfile.mkstemp() | ||
| 40 | self.output = hachoir_core.cmd_line.unicodeFilename(output) | ||
| 41 | |||
| 42 | def __del__(self): | ||
| 43 | ''' Remove tempfile if it was not used | ||
| 44 | ''' | ||
| 45 | if os.path.exists(self.output): | ||
| 46 | mat.secure_remove(self.output) | ||
| 47 | |||
| 48 | def is_clean(self): | ||
| 49 | ''' | ||
| 50 | Check if the file is clean from harmful metadatas | ||
| 51 | ''' | ||
| 52 | for field in self.editor: | ||
| 53 | if self._should_remove(field): | ||
| 54 | return self._is_clean(self.editor) | ||
| 55 | return True | ||
| 56 | |||
| 57 | def _is_clean(self, fieldset): | ||
| 58 | for field in fieldset: | ||
| 59 | remove = self._should_remove(field) | ||
| 60 | if remove is True: | ||
| 61 | return False | ||
| 62 | if remove is FIELD: | ||
| 63 | if not self._is_clean(field): | ||
| 64 | return False | ||
| 65 | return True | ||
| 66 | |||
| 67 | def remove_all(self): | ||
| 68 | ''' Remove all compromising fields | ||
| 69 | ''' | ||
| 70 | state = self._remove_all(self.editor) | ||
| 71 | hachoir_core.field.writeIntoFile(self.editor, self.output) | ||
| 72 | self.do_backup() | ||
| 73 | return state | ||
| 74 | |||
| 75 | def _remove_all(self, fieldset): | ||
| 76 | ''' Recursive way to handle tree metadatas | ||
| 77 | ''' | ||
| 78 | try: | ||
| 79 | for field in fieldset: | ||
| 80 | remove = self._should_remove(field) | ||
| 81 | if remove is True: | ||
| 82 | self._remove(fieldset, field.name) | ||
| 83 | if remove is FIELD: | ||
| 84 | self._remove_all(field) | ||
| 85 | return True | ||
| 86 | except: | ||
| 87 | return False | ||
| 88 | |||
| 89 | def _remove(self, fieldset, field): | ||
| 90 | ''' Delete the given field | ||
| 91 | ''' | ||
| 92 | del fieldset[field] | ||
| 93 | |||
| 94 | def get_meta(self): | ||
| 95 | ''' Return a dict with all the meta of the file | ||
| 96 | ''' | ||
| 97 | metadata = {} | ||
| 98 | self._get_meta(self.editor, metadata) | ||
| 99 | return metadata | ||
| 100 | |||
| 101 | def _get_meta(self, fieldset, metadata): | ||
| 102 | ''' Recursive way to handle tree metadatas | ||
| 103 | ''' | ||
| 104 | for field in fieldset: | ||
| 105 | remove = self._should_remove(field) | ||
| 106 | if remove: | ||
| 107 | try: | ||
| 108 | metadata[field.name] = field.value | ||
| 109 | except: | ||
| 110 | metadata[field.name] = 'harmful content' | ||
| 111 | if remove is FIELD: | ||
| 112 | self._get_meta(field, None) | ||
| 113 | |||
| 114 | def _should_remove(self, key): | ||
| 115 | ''' Return True if the field is compromising | ||
| 116 | abstract method | ||
| 117 | ''' | ||
| 118 | raise NotImplementedError | ||
| 119 | |||
| 120 | def create_backup_copy(self): | ||
| 121 | ''' Create a backup copy | ||
| 122 | ''' | ||
| 123 | shutil.copy2(self.filename, self.filename + '.bak') | ||
| 124 | |||
| 125 | def do_backup(self): | ||
| 126 | ''' Keep a backup of the file if asked. | ||
| 127 | |||
| 128 | The process of double-renaming is not very elegant, | ||
| 129 | but it greatly simplify new strippers implementation. | ||
| 130 | ''' | ||
| 131 | if self.backup: | ||
| 132 | shutil.move(self.filename, self.filename + '.bak') | ||
| 133 | else: | ||
| 134 | mat.secure_remove(self.filename) | ||
| 135 | shutil.move(self.output, self.filename) | ||
diff --git a/libmat/strippers.py b/libmat/strippers.py new file mode 100644 index 0000000..aea98da --- /dev/null +++ b/libmat/strippers.py | |||
| @@ -0,0 +1,70 @@ | |||
| 1 | ''' Manage which fileformat can be processed | ||
| 2 | ''' | ||
| 3 | |||
| 4 | import archive | ||
| 5 | import audio | ||
| 6 | import gi | ||
| 7 | import images | ||
| 8 | import logging | ||
| 9 | import mat | ||
| 10 | import misc | ||
| 11 | import office | ||
| 12 | import subprocess | ||
| 13 | |||
| 14 | STRIPPERS = { | ||
| 15 | 'application/x-tar': archive.TarStripper, | ||
| 16 | 'application/x-bzip2': archive.Bzip2Stripper, | ||
| 17 | 'application/x-gzip': archive.GzipStripper, | ||
| 18 | 'application/zip': archive.ZipStripper, | ||
| 19 | 'audio/mpeg': audio.MpegAudioStripper, | ||
| 20 | 'application/x-bittorrent': misc.TorrentStripper, | ||
| 21 | 'application/opendocument': office.OpenDocumentStripper, | ||
| 22 | 'application/officeopenxml': office.OpenXmlStripper, | ||
| 23 | } | ||
| 24 | |||
| 25 | logging.basicConfig(level=mat.LOGGING_LEVEL) | ||
| 26 | |||
| 27 | # PDF support | ||
| 28 | pdfSupport = True | ||
| 29 | try: | ||
| 30 | from gi.repository import Poppler | ||
| 31 | except ImportError: | ||
| 32 | logging.info('Unable to import Poppler: no PDF support') | ||
| 33 | pdfSupport = False | ||
| 34 | |||
| 35 | try: | ||
| 36 | import cairo | ||
| 37 | except ImportError: | ||
| 38 | logging.info('Unable to import python-cairo: no PDF support') | ||
| 39 | pdfSupport = False | ||
| 40 | |||
| 41 | try: | ||
| 42 | import pdfrw | ||
| 43 | except ImportError: | ||
| 44 | logging.info('Unable to import python-pdfrw: no PDf support') | ||
| 45 | pdfSupport = False | ||
| 46 | |||
| 47 | if pdfSupport: | ||
| 48 | STRIPPERS['application/x-pdf'] = office.PdfStripper | ||
| 49 | STRIPPERS['application/pdf'] = office.PdfStripper | ||
| 50 | |||
| 51 | |||
| 52 | # audio format support with mutagen-python | ||
| 53 | try: | ||
| 54 | import mutagen | ||
| 55 | STRIPPERS['audio/x-flac'] = audio.FlacStripper | ||
| 56 | STRIPPERS['audio/vorbis'] = audio.OggStripper | ||
| 57 | STRIPPERS['audio/mpeg'] = audio.MpegAudioStripper | ||
| 58 | except ImportError: | ||
| 59 | logging.info('Unable to import python-mutagen: limited audio format support') | ||
| 60 | |||
| 61 | # exiftool | ||
| 62 | try: | ||
| 63 | subprocess.check_output(['exiftool', '-ver']) | ||
| 64 | import exiftool | ||
| 65 | STRIPPERS['image/jpeg'] = exiftool.JpegStripper | ||
| 66 | STRIPPERS['image/png'] = exiftool.PngStripper | ||
| 67 | except OSError: # if exiftool is not installed, use hachoir instead | ||
| 68 | logging.info('Unable to find exiftool: limited images support') | ||
| 69 | STRIPPERS['image/jpeg'] = images.JpegStripper | ||
| 70 | STRIPPERS['image/png'] = images.PngStripper | ||
