diff options
Diffstat (limited to 'libmat')
| -rw-r--r-- | libmat/__init__.py | 1 | ||||
| -rw-r--r-- | libmat/archive.py | 128 | ||||
| -rw-r--r-- | libmat/audio.py | 28 | ||||
| -rw-r--r-- | libmat/bencode/__init__.py | 1 | ||||
| -rw-r--r-- | libmat/bencode/bencode.py | 33 | ||||
| -rw-r--r-- | libmat/exceptions.py | 12 | ||||
| -rw-r--r-- | libmat/exiftool.py | 39 | ||||
| -rw-r--r-- | libmat/hachoir_editor/typed_field.py | 47 | ||||
| -rw-r--r-- | libmat/images.py | 20 | ||||
| -rw-r--r-- | libmat/mat.py | 55 | ||||
| -rw-r--r-- | libmat/misc.py | 39 | ||||
| -rw-r--r-- | libmat/mutagenstripper.py | 8 | ||||
| -rw-r--r-- | libmat/office.py | 55 | ||||
| -rw-r--r-- | libmat/parser.py | 48 | ||||
| -rw-r--r-- | libmat/strippers.py | 4 |
15 files changed, 275 insertions, 243 deletions
diff --git a/libmat/__init__.py b/libmat/__init__.py index 8b13789..e69de29 100644 --- a/libmat/__init__.py +++ b/libmat/__init__.py | |||
| @@ -1 +0,0 @@ | |||
| 1 | |||
diff --git a/libmat/archive.py b/libmat/archive.py index d483dcc..4c62dc8 100644 --- a/libmat/archive.py +++ b/libmat/archive.py | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | ''' Take care of archives formats | 1 | """ Take care of archives formats |
| 2 | ''' | 2 | """ |
| 3 | 3 | ||
| 4 | import datetime | 4 | import datetime |
| 5 | import logging | 5 | import logging |
| @@ -16,23 +16,24 @@ import parser | |||
| 16 | # Zip files do not support dates older than 01/01/1980 | 16 | # Zip files do not support dates older than 01/01/1980 |
| 17 | ZIP_EPOCH = (1980, 1, 1, 0, 0, 0) | 17 | ZIP_EPOCH = (1980, 1, 1, 0, 0, 0) |
| 18 | ZIP_EPOCH_SECONDS = (datetime.datetime(1980, 1, 1, 0, 0, 0) | 18 | ZIP_EPOCH_SECONDS = (datetime.datetime(1980, 1, 1, 0, 0, 0) |
| 19 | - datetime.datetime(1970, 1, 1, 1, 0, 0)).total_seconds() | 19 | - datetime.datetime(1970, 1, 1, 1, 0, 0)).total_seconds() |
| 20 | 20 | ||
| 21 | 21 | ||
| 22 | class GenericArchiveStripper(parser.GenericParser): | 22 | class GenericArchiveStripper(parser.GenericParser): |
| 23 | ''' Represent a generic archive | 23 | """ Represent a generic archive |
| 24 | ''' | 24 | """ |
| 25 | |||
| 25 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | 26 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): |
| 26 | super(GenericArchiveStripper, self).__init__(filename, | 27 | super(GenericArchiveStripper, self).__init__(filename, |
| 27 | parser, mime, backup, is_writable, **kwargs) | 28 | parser, mime, backup, is_writable, **kwargs) |
| 28 | self.compression = '' | 29 | self.compression = '' |
| 29 | self.add2archive = kwargs['add2archive'] | 30 | self.add2archive = kwargs['add2archive'] |
| 30 | self.tempdir = tempfile.mkdtemp() | 31 | self.tempdir = tempfile.mkdtemp() |
| 31 | 32 | ||
| 32 | def __del__(self): | 33 | def __del__(self): |
| 33 | ''' Remove the files inside the temp dir, | 34 | """ Remove the files inside the temp dir, |
| 34 | then remove the temp dir | 35 | then remove the temp dir |
| 35 | ''' | 36 | """ |
| 36 | for root, dirs, files in os.walk(self.tempdir): | 37 | for root, dirs, files in os.walk(self.tempdir): |
| 37 | for item in files: | 38 | for item in files: |
| 38 | path_file = os.path.join(root, item) | 39 | path_file = os.path.join(root, item) |
| @@ -40,28 +41,30 @@ class GenericArchiveStripper(parser.GenericParser): | |||
| 40 | shutil.rmtree(self.tempdir) | 41 | shutil.rmtree(self.tempdir) |
| 41 | 42 | ||
| 42 | def is_clean(self, list_unsupported=False): | 43 | def is_clean(self, list_unsupported=False): |
| 43 | ''' Virtual method to check for harmul metadata | 44 | """ Virtual method to check for harmul metadata |
| 44 | ''' | 45 | """ |
| 45 | raise NotImplementedError | 46 | raise NotImplementedError |
| 46 | 47 | ||
| 47 | def list_unsupported(self): | 48 | def list_unsupported(self): |
| 48 | ''' Get a list of every non-supported files present in the archive | 49 | """ Get a list of every non-supported files present in the archive |
| 49 | ''' | 50 | """ |
| 50 | return self.is_clean(list_unsupported=True) | 51 | return self.is_clean(list_unsupported=True) |
| 51 | 52 | ||
| 52 | def remove_all(self): | 53 | def remove_all(self): |
| 53 | ''' Virtual method to remove all metadata | 54 | """ Virtual method to remove all metadata |
| 54 | ''' | 55 | """ |
| 55 | raise NotImplementedError | 56 | raise NotImplementedError |
| 56 | 57 | ||
| 57 | 58 | ||
| 58 | class ZipStripper(GenericArchiveStripper): | 59 | class ZipStripper(GenericArchiveStripper): |
| 59 | ''' Represent a zip file | 60 | """ Represent a zip file |
| 60 | ''' | 61 | """ |
| 61 | def __is_zipfile_clean(self, fileinfo): | 62 | |
| 62 | ''' Check if a ZipInfo object is clean of metadata added | 63 | @staticmethod |
| 64 | def __is_zipfile_clean(fileinfo): | ||
| 65 | """ Check if a ZipInfo object is clean of metadata added | ||
| 63 | by zip itself, independently of the corresponding file metadata | 66 | by zip itself, independently of the corresponding file metadata |
| 64 | ''' | 67 | """ |
| 65 | if fileinfo.comment != '': | 68 | if fileinfo.comment != '': |
| 66 | return False | 69 | return False |
| 67 | elif fileinfo.date_time != ZIP_EPOCH: | 70 | elif fileinfo.date_time != ZIP_EPOCH: |
| @@ -71,11 +74,11 @@ class ZipStripper(GenericArchiveStripper): | |||
| 71 | return True | 74 | return True |
| 72 | 75 | ||
| 73 | def is_clean(self, list_unsupported=False): | 76 | def is_clean(self, list_unsupported=False): |
| 74 | ''' Check if the given file is clean from harmful metadata | 77 | """ Check if the given file is clean from harmful metadata |
| 75 | When list_unsupported is True, the method returns a list | 78 | When list_unsupported is True, the method returns a list |
| 76 | of all non-supported/archives files contained in the | 79 | of all non-supported/archives files contained in the |
| 77 | archive. | 80 | archive. |
| 78 | ''' | 81 | """ |
| 79 | ret_list = [] | 82 | ret_list = [] |
| 80 | zipin = zipfile.ZipFile(self.filename, 'r') | 83 | zipin = zipfile.ZipFile(self.filename, 'r') |
| 81 | if zipin.comment != '' and not list_unsupported: | 84 | if zipin.comment != '' and not list_unsupported: |
| @@ -86,7 +89,7 @@ class ZipStripper(GenericArchiveStripper): | |||
| 86 | path = os.path.join(self.tempdir, item.filename) | 89 | path = os.path.join(self.tempdir, item.filename) |
| 87 | if not self.__is_zipfile_clean(item) and not list_unsupported: | 90 | if not self.__is_zipfile_clean(item) and not list_unsupported: |
| 88 | logging.debug('%s from %s has compromising zipinfo' % | 91 | logging.debug('%s from %s has compromising zipinfo' % |
| 89 | (item.filename, self.filename)) | 92 | (item.filename, self.filename)) |
| 90 | return False | 93 | return False |
| 91 | if os.path.isfile(path): | 94 | if os.path.isfile(path): |
| 92 | cfile = mat.create_class_file(path, False, add2archive=self.add2archive) | 95 | cfile = mat.create_class_file(path, False, add2archive=self.add2archive) |
| @@ -97,7 +100,7 @@ class ZipStripper(GenericArchiveStripper): | |||
| 97 | return False | 100 | return False |
| 98 | else: | 101 | else: |
| 99 | logging.info('%s\'s fileformat is not supported or harmless.' | 102 | logging.info('%s\'s fileformat is not supported or harmless.' |
| 100 | % item.filename) | 103 | % item.filename) |
| 101 | basename, ext = os.path.splitext(path) | 104 | basename, ext = os.path.splitext(path) |
| 102 | if os.path.basename(item.filename) not in ('mimetype', '.rels'): | 105 | if os.path.basename(item.filename) not in ('mimetype', '.rels'): |
| 103 | if ext not in parser.NOMETA: | 106 | if ext not in parser.NOMETA: |
| @@ -110,7 +113,7 @@ class ZipStripper(GenericArchiveStripper): | |||
| 110 | return True | 113 | return True |
| 111 | 114 | ||
| 112 | def get_meta(self): | 115 | def get_meta(self): |
| 113 | ''' Return all the metadata of a zip archive''' | 116 | """ Return all the metadata of a zip archive""" |
| 114 | zipin = zipfile.ZipFile(self.filename, 'r') | 117 | zipin = zipfile.ZipFile(self.filename, 'r') |
| 115 | metadata = {} | 118 | metadata = {} |
| 116 | if zipin.comment != '': | 119 | if zipin.comment != '': |
| @@ -129,13 +132,14 @@ class ZipStripper(GenericArchiveStripper): | |||
| 129 | metadata[item.filename] = str(cfile_meta) | 132 | metadata[item.filename] = str(cfile_meta) |
| 130 | else: | 133 | else: |
| 131 | logging.info('%s\'s fileformat is not supported or harmless' | 134 | logging.info('%s\'s fileformat is not supported or harmless' |
| 132 | % item.filename) | 135 | % item.filename) |
| 133 | zipin.close() | 136 | zipin.close() |
| 134 | return metadata | 137 | return metadata |
| 135 | 138 | ||
| 136 | def __get_zipinfo_meta(self, zipinfo): | 139 | @staticmethod |
| 137 | ''' Return all the metadata of a ZipInfo | 140 | def __get_zipinfo_meta(zipinfo): |
| 138 | ''' | 141 | """ Return all the metadata of a ZipInfo |
| 142 | """ | ||
| 139 | metadata = {} | 143 | metadata = {} |
| 140 | if zipinfo.comment != '': | 144 | if zipinfo.comment != '': |
| 141 | metadata['comment'] = zipinfo.comment | 145 | metadata['comment'] = zipinfo.comment |
| @@ -145,13 +149,19 @@ class ZipStripper(GenericArchiveStripper): | |||
| 145 | metadata['system'] = "windows" if zipinfo.create_system == 2 else "unknown" | 149 | metadata['system'] = "windows" if zipinfo.create_system == 2 else "unknown" |
| 146 | return metadata | 150 | return metadata |
| 147 | 151 | ||
| 148 | def remove_all(self, whitelist=[], beginning_blacklist=[], ending_blacklist=[]): | 152 | def remove_all(self, whitelist=None, beginning_blacklist=None, ending_blacklist=None): |
| 149 | ''' Remove all metadata from a zip archive, even thoses | 153 | """ Remove all metadata from a zip archive, even thoses |
| 150 | added by Python's zipfile itself. It will not add | 154 | added by Python's zipfile itself. It will not add |
| 151 | files starting with "begining_blacklist", or ending with | 155 | files starting with "begining_blacklist", or ending with |
| 152 | "ending_blacklist". This method also add files present in | 156 | "ending_blacklist". This method also add files present in |
| 153 | whitelist to the archive. | 157 | whitelist to the archive. |
| 154 | ''' | 158 | """ |
| 159 | if not ending_blacklist: | ||
| 160 | ending_blacklist = [] | ||
| 161 | if not beginning_blacklist: | ||
| 162 | beginning_blacklist = [] | ||
| 163 | if not whitelist: | ||
| 164 | whitelist = [] | ||
| 155 | zipin = zipfile.ZipFile(self.filename, 'r') | 165 | zipin = zipfile.ZipFile(self.filename, 'r') |
| 156 | zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True) | 166 | zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True) |
| 157 | for item in zipin.infolist(): | 167 | for item in zipin.infolist(): |
| @@ -166,7 +176,7 @@ class ZipStripper(GenericArchiveStripper): | |||
| 166 | if cfile is not None: | 176 | if cfile is not None: |
| 167 | # Handle read-only files inside archive | 177 | # Handle read-only files inside archive |
| 168 | old_stat = os.stat(path).st_mode | 178 | old_stat = os.stat(path).st_mode |
| 169 | os.chmod(path, old_stat|stat.S_IWUSR) | 179 | os.chmod(path, old_stat | stat.S_IWUSR) |
| 170 | cfile.remove_all() | 180 | cfile.remove_all() |
| 171 | os.chmod(path, old_stat) | 181 | os.chmod(path, old_stat) |
| 172 | logging.debug('Processing %s from %s' % (item.filename, self.filename)) | 182 | logging.debug('Processing %s from %s' % (item.filename, self.filename)) |
| @@ -186,11 +196,12 @@ class ZipStripper(GenericArchiveStripper): | |||
| 186 | 196 | ||
| 187 | 197 | ||
| 188 | class TarStripper(GenericArchiveStripper): | 198 | class TarStripper(GenericArchiveStripper): |
| 189 | ''' Represent a tarfile archive | 199 | """ Represent a tarfile archive |
| 190 | ''' | 200 | """ |
| 201 | |||
| 191 | def _remove(self, current_file): | 202 | def _remove(self, current_file): |
| 192 | ''' Remove the meta added by tarfile itself to the file | 203 | """ Remove the meta added by tarfile itself to the file |
| 193 | ''' | 204 | """ |
| 194 | current_file.mtime = 0 | 205 | current_file.mtime = 0 |
| 195 | current_file.uid = 0 | 206 | current_file.uid = 0 |
| 196 | current_file.gid = 0 | 207 | current_file.gid = 0 |
| @@ -198,11 +209,13 @@ class TarStripper(GenericArchiveStripper): | |||
| 198 | current_file.gname = '' | 209 | current_file.gname = '' |
| 199 | return current_file | 210 | return current_file |
| 200 | 211 | ||
| 201 | def remove_all(self, whitelist=[]): | 212 | def remove_all(self, whitelist=None): |
| 202 | ''' Remove all harmful metadata from the tarfile. | 213 | """ Remove all harmful metadata from the tarfile. |
| 203 | The method will also add every files matching | 214 | The method will also add every files matching |
| 204 | whitelist in the produced archive. | 215 | whitelist in the produced archive. |
| 205 | ''' | 216 | """ |
| 217 | if not whitelist: | ||
| 218 | whitelist = [] | ||
| 206 | tarin = tarfile.open(self.filename, 'r' + self.compression, encoding='utf-8') | 219 | tarin = tarfile.open(self.filename, 'r' + self.compression, encoding='utf-8') |
| 207 | tarout = tarfile.open(self.output, 'w' + self.compression, encoding='utf-8') | 220 | tarout = tarfile.open(self.output, 'w' + self.compression, encoding='utf-8') |
| 208 | for item in tarin.getmembers(): | 221 | for item in tarin.getmembers(): |
| @@ -213,14 +226,14 @@ class TarStripper(GenericArchiveStripper): | |||
| 213 | if cfile is not None: | 226 | if cfile is not None: |
| 214 | # Handle read-only files inside archive | 227 | # Handle read-only files inside archive |
| 215 | old_stat = os.stat(path).st_mode | 228 | old_stat = os.stat(path).st_mode |
| 216 | os.chmod(path, old_stat|stat.S_IWUSR) | 229 | os.chmod(path, old_stat | stat.S_IWUSR) |
| 217 | cfile.remove_all() | 230 | cfile.remove_all() |
| 218 | os.chmod(path, old_stat) | 231 | os.chmod(path, old_stat) |
| 219 | elif self.add2archive or os.path.splitext(item.name)[1] in parser.NOMETA: | 232 | elif self.add2archive or os.path.splitext(item.name)[1] in parser.NOMETA: |
| 220 | logging.debug('%s\' format is either not supported or harmless' % item.name) | 233 | logging.debug('%s\' format is either not supported or harmless' % item.name) |
| 221 | elif item.name in whitelist: | 234 | elif item.name in whitelist: |
| 222 | logging.debug('%s is not supported, but MAT was told to add it anyway.' | 235 | logging.debug('%s is not supported, but MAT was told to add it anyway.' |
| 223 | % item.name) | 236 | % item.name) |
| 224 | else: # Don't add the file to the archive | 237 | else: # Don't add the file to the archive |
| 225 | logging.debug('%s will not be added' % item.name) | 238 | logging.debug('%s will not be added' % item.name) |
| 226 | continue | 239 | continue |
| @@ -230,9 +243,10 @@ class TarStripper(GenericArchiveStripper): | |||
| 230 | self.do_backup() | 243 | self.do_backup() |
| 231 | return True | 244 | return True |
| 232 | 245 | ||
| 233 | def is_file_clean(self, current_file): | 246 | @staticmethod |
| 234 | ''' Check metadatas added by tarfile | 247 | def is_file_clean(current_file): |
| 235 | ''' | 248 | """ Check metadatas added by tarfile |
| 249 | """ | ||
| 236 | if current_file.mtime != 0: | 250 | if current_file.mtime != 0: |
| 237 | return False | 251 | return False |
| 238 | elif current_file.uid != 0: | 252 | elif current_file.uid != 0: |
| @@ -246,17 +260,17 @@ class TarStripper(GenericArchiveStripper): | |||
| 246 | return True | 260 | return True |
| 247 | 261 | ||
| 248 | def is_clean(self, list_unsupported=False): | 262 | def is_clean(self, list_unsupported=False): |
| 249 | ''' Check if the file is clean from harmful metadatas | 263 | """ Check if the file is clean from harmful metadatas |
| 250 | When list_unsupported is True, the method returns a list | 264 | When list_unsupported is True, the method returns a list |
| 251 | of all non-supported/archives files contained in the | 265 | of all non-supported/archives files contained in the |
| 252 | archive. | 266 | archive. |
| 253 | ''' | 267 | """ |
| 254 | ret_list = [] | 268 | ret_list = [] |
| 255 | tarin = tarfile.open(self.filename, 'r' + self.compression) | 269 | tarin = tarfile.open(self.filename, 'r' + self.compression) |
| 256 | for item in tarin.getmembers(): | 270 | for item in tarin.getmembers(): |
| 257 | if not self.is_file_clean(item) and not list_unsupported: | 271 | if not self.is_file_clean(item) and not list_unsupported: |
| 258 | logging.debug('%s from %s has compromising tarinfo' % | 272 | logging.debug('%s from %s has compromising tarinfo' % |
| 259 | (item.name, self.filename)) | 273 | (item.name, self.filename)) |
| 260 | return False | 274 | return False |
| 261 | tarin.extract(item, self.tempdir) | 275 | tarin.extract(item, self.tempdir) |
| 262 | path = os.path.join(self.tempdir, item.name) | 276 | path = os.path.join(self.tempdir, item.name) |
| @@ -265,7 +279,7 @@ class TarStripper(GenericArchiveStripper): | |||
| 265 | if cfile is not None: | 279 | if cfile is not None: |
| 266 | if not cfile.is_clean(): | 280 | if not cfile.is_clean(): |
| 267 | logging.debug('%s from %s has metadata' % | 281 | logging.debug('%s from %s has metadata' % |
| 268 | (item.name.decode("utf8"), self.filename)) | 282 | (item.name.decode("utf8"), self.filename)) |
| 269 | if not list_unsupported: | 283 | if not list_unsupported: |
| 270 | return False | 284 | return False |
| 271 | # Nested archives are treated like unsupported files | 285 | # Nested archives are treated like unsupported files |
| @@ -283,8 +297,8 @@ class TarStripper(GenericArchiveStripper): | |||
| 283 | return True | 297 | return True |
| 284 | 298 | ||
| 285 | def get_meta(self): | 299 | def get_meta(self): |
| 286 | ''' Return a dict with all the meta of the tarfile | 300 | """ Return a dict with all the meta of the tarfile |
| 287 | ''' | 301 | """ |
| 288 | tarin = tarfile.open(self.filename, 'r' + self.compression) | 302 | tarin = tarfile.open(self.filename, 'r' + self.compression) |
| 289 | metadata = {} | 303 | metadata = {} |
| 290 | for item in tarin.getmembers(): | 304 | for item in tarin.getmembers(): |
| @@ -312,24 +326,26 @@ class TarStripper(GenericArchiveStripper): | |||
| 312 | 326 | ||
| 313 | 327 | ||
| 314 | class TerminalZipStripper(ZipStripper): | 328 | class TerminalZipStripper(ZipStripper): |
| 315 | ''' Represent a terminal level archive. | 329 | """ Represent a terminal level archive. |
| 316 | This type of archive can not contain nested archives. | 330 | This type of archive can not contain nested archives. |
| 317 | It is used for formats like docx, which are basically | 331 | It is used for formats like docx, which are basically |
| 318 | ziped xml. | 332 | ziped xml. |
| 319 | ''' | 333 | """ |
| 320 | 334 | ||
| 321 | 335 | ||
| 322 | class GzipStripper(TarStripper): | 336 | class GzipStripper(TarStripper): |
| 323 | ''' Represent a tar.gz archive | 337 | """ Represent a tar.gz archive |
| 324 | ''' | 338 | """ |
| 339 | |||
| 325 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | 340 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): |
| 326 | super(GzipStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) | 341 | super(GzipStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) |
| 327 | self.compression = ':gz' | 342 | self.compression = ':gz' |
| 328 | 343 | ||
| 329 | 344 | ||
| 330 | class Bzip2Stripper(TarStripper): | 345 | class Bzip2Stripper(TarStripper): |
| 331 | ''' Represent a tar.bz2 archive | 346 | """ Represent a tar.bz2 archive |
| 332 | ''' | 347 | """ |
| 348 | |||
| 333 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | 349 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): |
| 334 | super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) | 350 | super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) |
| 335 | self.compression = ':bz2' | 351 | self.compression = ':bz2' |
diff --git a/libmat/audio.py b/libmat/audio.py index dae9d75..2747dc1 100644 --- a/libmat/audio.py +++ b/libmat/audio.py | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | ''' Care about audio fileformat | 1 | """ Care about audio fileformat |
| 2 | ''' | 2 | """ |
| 3 | 3 | ||
| 4 | try: | 4 | try: |
| 5 | from mutagen.flac import FLAC | 5 | from mutagen.flac import FLAC |
| @@ -12,41 +12,41 @@ import mutagenstripper | |||
| 12 | 12 | ||
| 13 | 13 | ||
| 14 | class MpegAudioStripper(parser.GenericParser): | 14 | class MpegAudioStripper(parser.GenericParser): |
| 15 | ''' Represent mpeg audio file (mp3, ...) | 15 | """ Represent mpeg audio file (mp3, ...) |
| 16 | ''' | 16 | """ |
| 17 | def _should_remove(self, field): | 17 | def _should_remove(self, field): |
| 18 | return field.name in ("id3v1", "id3v2") | 18 | return field.name in ("id3v1", "id3v2") |
| 19 | 19 | ||
| 20 | 20 | ||
| 21 | class OggStripper(mutagenstripper.MutagenStripper): | 21 | class OggStripper(mutagenstripper.MutagenStripper): |
| 22 | ''' Represent an ogg vorbis file | 22 | """ Represent an ogg vorbis file |
| 23 | ''' | 23 | """ |
| 24 | def _create_mfile(self): | 24 | def _create_mfile(self): |
| 25 | self.mfile = OggVorbis(self.filename) | 25 | self.mfile = OggVorbis(self.filename) |
| 26 | 26 | ||
| 27 | 27 | ||
| 28 | class FlacStripper(mutagenstripper.MutagenStripper): | 28 | class FlacStripper(mutagenstripper.MutagenStripper): |
| 29 | ''' Represent a Flac audio file | 29 | """ Represent a Flac audio file |
| 30 | ''' | 30 | """ |
| 31 | def _create_mfile(self): | 31 | def _create_mfile(self): |
| 32 | self.mfile = FLAC(self.filename) | 32 | self.mfile = FLAC(self.filename) |
| 33 | 33 | ||
| 34 | def remove_all(self): | 34 | def remove_all(self): |
| 35 | ''' Remove the "metadata" block from the file | 35 | """ Remove the "metadata" block from the file |
| 36 | ''' | 36 | """ |
| 37 | super(FlacStripper, self).remove_all() | 37 | super(FlacStripper, self).remove_all() |
| 38 | self.mfile.clear_pictures() | 38 | self.mfile.clear_pictures() |
| 39 | self.mfile.save() | 39 | self.mfile.save() |
| 40 | return True | 40 | return True |
| 41 | 41 | ||
| 42 | def is_clean(self): | 42 | def is_clean(self): |
| 43 | ''' Check if the "metadata" block is present in the file | 43 | """ Check if the "metadata" block is present in the file |
| 44 | ''' | 44 | """ |
| 45 | return super(FlacStripper, self).is_clean() and not self.mfile.pictures | 45 | return super(FlacStripper, self).is_clean() and not self.mfile.pictures |
| 46 | 46 | ||
| 47 | def get_meta(self): | 47 | def get_meta(self): |
| 48 | ''' Return the content of the metadata block if present | 48 | """ Return the content of the metadata block if present |
| 49 | ''' | 49 | """ |
| 50 | metadata = super(FlacStripper, self).get_meta() | 50 | metadata = super(FlacStripper, self).get_meta() |
| 51 | if self.mfile.pictures: | 51 | if self.mfile.pictures: |
| 52 | metadata['picture:'] = 'yes' | 52 | metadata['picture:'] = 'yes' |
diff --git a/libmat/bencode/__init__.py b/libmat/bencode/__init__.py index 8b13789..e69de29 100644 --- a/libmat/bencode/__init__.py +++ b/libmat/bencode/__init__.py | |||
| @@ -1 +0,0 @@ | |||
| 1 | |||
diff --git a/libmat/bencode/bencode.py b/libmat/bencode/bencode.py index a0cc99a..a7967fc 100644 --- a/libmat/bencode/bencode.py +++ b/libmat/bencode/bencode.py | |||
| @@ -21,18 +21,18 @@ | |||
| 21 | # THE SOFTWARE. | 21 | # THE SOFTWARE. |
| 22 | # | 22 | # |
| 23 | 23 | ||
| 24 | ''' | 24 | """ |
| 25 | A quick (and also nice) lib to bencode/bdecode torrent files | 25 | A quick (and also nice) lib to bencode/bdecode torrent files |
| 26 | ''' | 26 | """ |
| 27 | 27 | ||
| 28 | 28 | ||
| 29 | class BTFailure(Exception): | 29 | class BTFailure(Exception): |
| 30 | '''Custom Exception''' | 30 | """Custom Exception""" |
| 31 | pass | 31 | pass |
| 32 | 32 | ||
| 33 | 33 | ||
| 34 | class Bencached(object): | 34 | class Bencached(object): |
| 35 | '''Custom type : cached string''' | 35 | """Custom type : cached string""" |
| 36 | __slots__ = ['bencoded'] | 36 | __slots__ = ['bencoded'] |
| 37 | 37 | ||
| 38 | def __init__(self, string): | 38 | def __init__(self, string): |
| @@ -40,10 +40,10 @@ class Bencached(object): | |||
| 40 | 40 | ||
| 41 | 41 | ||
| 42 | def decode_int(x, f): | 42 | def decode_int(x, f): |
| 43 | '''decode an int''' | 43 | """decode an int""" |
| 44 | f += 1 | 44 | f += 1 |
| 45 | newf = x.index('e', f) | 45 | newf = x.index('e', f) |
| 46 | if x[f:f+1] == '-0': | 46 | if x[f:f + 1] == '-0': |
| 47 | raise ValueError | 47 | raise ValueError |
| 48 | elif x[f] == '0' and newf != f + 1: | 48 | elif x[f] == '0' and newf != f + 1: |
| 49 | raise ValueError | 49 | raise ValueError |
| @@ -51,7 +51,7 @@ def decode_int(x, f): | |||
| 51 | 51 | ||
| 52 | 52 | ||
| 53 | def decode_string(x, f): | 53 | def decode_string(x, f): |
| 54 | '''decode a string''' | 54 | """decode a string""" |
| 55 | colon = x.index(':', f) | 55 | colon = x.index(':', f) |
| 56 | if x[f] == '0' and colon != f + 1: | 56 | if x[f] == '0' and colon != f + 1: |
| 57 | raise ValueError | 57 | raise ValueError |
| @@ -61,7 +61,7 @@ def decode_string(x, f): | |||
| 61 | 61 | ||
| 62 | 62 | ||
| 63 | def decode_list(x, f): | 63 | def decode_list(x, f): |
| 64 | '''decode a list''' | 64 | """decode a list""" |
| 65 | result = [] | 65 | result = [] |
| 66 | f += 1 | 66 | f += 1 |
| 67 | while x[f] != 'e': | 67 | while x[f] != 'e': |
| @@ -71,7 +71,7 @@ def decode_list(x, f): | |||
| 71 | 71 | ||
| 72 | 72 | ||
| 73 | def decode_dict(x, f): | 73 | def decode_dict(x, f): |
| 74 | '''decode a dict''' | 74 | """decode a dict""" |
| 75 | result = {} | 75 | result = {} |
| 76 | f += 1 | 76 | f += 1 |
| 77 | while x[f] != 'e': | 77 | while x[f] != 'e': |
| @@ -81,24 +81,24 @@ def decode_dict(x, f): | |||
| 81 | 81 | ||
| 82 | 82 | ||
| 83 | def encode_bool(x, r): | 83 | def encode_bool(x, r): |
| 84 | '''bencode a boolean''' | 84 | """bencode a boolean""" |
| 85 | encode_int(1 if r else 0, r) | 85 | encode_int(1 if r else 0, r) |
| 86 | 86 | ||
| 87 | 87 | ||
| 88 | def encode_int(x, r): | 88 | def encode_int(x, r): |
| 89 | '''bencode an integer/float''' | 89 | """bencode an integer/float""" |
| 90 | r.extend(('i', str(x), 'e')) | 90 | r.extend(('i', str(x), 'e')) |
| 91 | 91 | ||
| 92 | 92 | ||
| 93 | def encode_list(x, r): | 93 | def encode_list(x, r): |
| 94 | '''bencode a list/tuple''' | 94 | """bencode a list/tuple""" |
| 95 | r.append('l') | 95 | r.append('l') |
| 96 | [ENCODE_FUNC[type(item)](item, r) for item in x] | 96 | [ENCODE_FUNC[type(item)](item, r) for item in x] |
| 97 | r.append('e') | 97 | r.append('e') |
| 98 | 98 | ||
| 99 | 99 | ||
| 100 | def encode_dict(x, result): | 100 | def encode_dict(x, result): |
| 101 | '''bencode a dict''' | 101 | """bencode a dict""" |
| 102 | result.append('d') | 102 | result.append('d') |
| 103 | ilist = list(x.items()) | 103 | ilist = list(x.items()) |
| 104 | ilist.sort() | 104 | ilist.sort() |
| @@ -108,12 +108,11 @@ def encode_dict(x, result): | |||
| 108 | result.append('e') | 108 | result.append('e') |
| 109 | 109 | ||
| 110 | 110 | ||
| 111 | DECODE_FUNC = {str(x):decode_string for x in range(9)} | 111 | DECODE_FUNC = {str(x): decode_string for x in range(9)} |
| 112 | DECODE_FUNC['l'] = decode_list | 112 | DECODE_FUNC['l'] = decode_list |
| 113 | DECODE_FUNC['d'] = decode_dict | 113 | DECODE_FUNC['d'] = decode_dict |
| 114 | DECODE_FUNC['i'] = decode_int | 114 | DECODE_FUNC['i'] = decode_int |
| 115 | 115 | ||
| 116 | |||
| 117 | ENCODE_FUNC = {} | 116 | ENCODE_FUNC = {} |
| 118 | ENCODE_FUNC[Bencached] = lambda x, r: r.append(x.bencoded) | 117 | ENCODE_FUNC[Bencached] = lambda x, r: r.append(x.bencoded) |
| 119 | ENCODE_FUNC[int] = encode_int | 118 | ENCODE_FUNC[int] = encode_int |
| @@ -126,14 +125,14 @@ ENCODE_FUNC[bool] = encode_bool | |||
| 126 | 125 | ||
| 127 | 126 | ||
| 128 | def bencode(string): | 127 | def bencode(string): |
| 129 | '''bencode $string''' | 128 | """bencode $string""" |
| 130 | table = [] | 129 | table = [] |
| 131 | ENCODE_FUNC[type(string)](string, table) | 130 | ENCODE_FUNC[type(string)](string, table) |
| 132 | return ''.join(table) | 131 | return ''.join(table) |
| 133 | 132 | ||
| 134 | 133 | ||
| 135 | def bdecode(string): | 134 | def bdecode(string): |
| 136 | '''decode $string''' | 135 | """decode $string""" |
| 137 | try: | 136 | try: |
| 138 | result, lenght = DECODE_FUNC[string[0]](string, 0) | 137 | result, lenght = DECODE_FUNC[string[0]](string, 0) |
| 139 | except (IndexError, KeyError, ValueError): | 138 | except (IndexError, KeyError, ValueError): |
diff --git a/libmat/exceptions.py b/libmat/exceptions.py index 47da15c..e71c398 100644 --- a/libmat/exceptions.py +++ b/libmat/exceptions.py | |||
| @@ -1,14 +1,14 @@ | |||
| 1 | ''' Base exceptions for MAT | 1 | """ Base exceptions for MAT |
| 2 | ''' | 2 | """ |
| 3 | 3 | ||
| 4 | 4 | ||
| 5 | class UnableToRemoveFile(Exception): | 5 | class UnableToRemoveFile(Exception): |
| 6 | '''This exception is raised when a file could not be removed | 6 | """This exception is raised when a file could not be removed |
| 7 | ''' | 7 | """ |
| 8 | pass | 8 | pass |
| 9 | 9 | ||
| 10 | class UnableToWriteFile(Exception): | 10 | class UnableToWriteFile(Exception): |
| 11 | '''This exception is raised when a file | 11 | """This exception is raised when a file |
| 12 | can could not be chmod +w | 12 | can could not be chmod +w |
| 13 | ''' | 13 | """ |
| 14 | pass | 14 | pass |
diff --git a/libmat/exiftool.py b/libmat/exiftool.py index aa6849d..0e1fefd 100644 --- a/libmat/exiftool.py +++ b/libmat/exiftool.py | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | ''' Care about images with help of the amazing (perl) library Exiftool. | 1 | """ Care about images with help of the amazing (perl) library Exiftool. |
| 2 | ''' | 2 | """ |
| 3 | 3 | ||
| 4 | import subprocess | 4 | import subprocess |
| 5 | 5 | ||
| @@ -7,25 +7,24 @@ import parser | |||
| 7 | 7 | ||
| 8 | 8 | ||
| 9 | class ExiftoolStripper(parser.GenericParser): | 9 | class ExiftoolStripper(parser.GenericParser): |
| 10 | ''' A generic stripper class using exiftool as backend | 10 | """ A generic stripper class using exiftool as backend |
| 11 | ''' | 11 | """ |
| 12 | 12 | ||
| 13 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | 13 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): |
| 14 | super(ExiftoolStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) | 14 | super(ExiftoolStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) |
| 15 | self.allowed = set(['ExifTool Version Number', 'File Name', 'Directory', | 15 | self.allowed = {'ExifTool Version Number', 'File Name', 'Directory', 'File Size', 'File Modification Date/Time', |
| 16 | 'File Size', 'File Modification Date/Time', 'File Access Date/Time', 'File Permissions', | 16 | 'File Access Date/Time', 'File Permissions', 'File Type', 'File Type Extension', 'MIME Type', |
| 17 | 'File Type', 'File Type Extension', 'MIME Type', 'Image Width', 'Image Height', | 17 | 'Image Width', 'Image Height', 'Image Size', 'File Inode Change Date/Time', 'Megapixels'} |
| 18 | 'Image Size', 'File Inode Change Date/Time', 'Megapixels']) | ||
| 19 | self._set_allowed() | 18 | self._set_allowed() |
| 20 | 19 | ||
| 21 | def _set_allowed(self): | 20 | def _set_allowed(self): |
| 22 | ''' Virtual method. Set the allowed/harmless list of metadata | 21 | """ Virtual method. Set the allowed/harmless list of metadata |
| 23 | ''' | 22 | """ |
| 24 | raise NotImplementedError | 23 | raise NotImplementedError |
| 25 | 24 | ||
| 26 | def remove_all(self): | 25 | def remove_all(self): |
| 27 | ''' Remove all metadata with help of exiftool | 26 | """ Remove all metadata with help of exiftool |
| 28 | ''' | 27 | """ |
| 29 | try: | 28 | try: |
| 30 | if self.backup: | 29 | if self.backup: |
| 31 | self.create_backup_copy() | 30 | self.create_backup_copy() |
| @@ -38,16 +37,16 @@ class ExiftoolStripper(parser.GenericParser): | |||
| 38 | return False | 37 | return False |
| 39 | 38 | ||
| 40 | def is_clean(self): | 39 | def is_clean(self): |
| 41 | ''' Check if the file is clean with the help of exiftool | 40 | """ Check if the file is clean with the help of exiftool |
| 42 | ''' | 41 | """ |
| 43 | return not self.get_meta() | 42 | return not self.get_meta() |
| 44 | 43 | ||
| 45 | def get_meta(self): | 44 | def get_meta(self): |
| 46 | ''' Return every harmful meta with help of exiftool. | 45 | """ Return every harmful meta with help of exiftool. |
| 47 | Exiftool output looks like this: | 46 | Exiftool output looks like this: |
| 48 | field name : value | 47 | field name : value |
| 49 | field name : value | 48 | field name : value |
| 50 | ''' | 49 | """ |
| 51 | output = subprocess.Popen(['exiftool', self.filename], | 50 | output = subprocess.Popen(['exiftool', self.filename], |
| 52 | stdout=subprocess.PIPE).communicate()[0] | 51 | stdout=subprocess.PIPE).communicate()[0] |
| 53 | meta = {} | 52 | meta = {} |
| @@ -59,9 +58,9 @@ class ExiftoolStripper(parser.GenericParser): | |||
| 59 | 58 | ||
| 60 | 59 | ||
| 61 | class JpegStripper(ExiftoolStripper): | 60 | class JpegStripper(ExiftoolStripper): |
| 62 | ''' Care about jpeg files with help | 61 | """ Care about jpeg files with help |
| 63 | of exiftool | 62 | of exiftool |
| 64 | ''' | 63 | """ |
| 65 | def _set_allowed(self): | 64 | def _set_allowed(self): |
| 66 | self.allowed.update(['JFIF Version', 'Resolution Unit', | 65 | self.allowed.update(['JFIF Version', 'Resolution Unit', |
| 67 | 'X Resolution', 'Y Resolution', 'Encoding Process', | 66 | 'X Resolution', 'Y Resolution', 'Encoding Process', |
| @@ -69,9 +68,9 @@ class JpegStripper(ExiftoolStripper): | |||
| 69 | 68 | ||
| 70 | 69 | ||
| 71 | class PngStripper(ExiftoolStripper): | 70 | class PngStripper(ExiftoolStripper): |
| 72 | ''' Care about png files with help | 71 | """ Care about png files with help |
| 73 | of exiftool | 72 | of exiftool |
| 74 | ''' | 73 | """ |
| 75 | def _set_allowed(self): | 74 | def _set_allowed(self): |
| 76 | self.allowed.update(['Bit Depth', 'Color Type', | 75 | self.allowed.update(['Bit Depth', 'Color Type', |
| 77 | 'Compression', 'Filter', 'Interlace', 'Palette', | 76 | 'Compression', 'Filter', 'Interlace', 'Palette', |
diff --git a/libmat/hachoir_editor/typed_field.py b/libmat/hachoir_editor/typed_field.py index 0f0427b..606d39b 100644 --- a/libmat/hachoir_editor/typed_field.py +++ b/libmat/hachoir_editor/typed_field.py | |||
| @@ -5,18 +5,21 @@ from hachoir_core.field import ( | |||
| 5 | isInteger, isString) | 5 | isInteger, isString) |
| 6 | from field import FakeField | 6 | from field import FakeField |
| 7 | 7 | ||
| 8 | |||
| 8 | class EditableField(FakeField): | 9 | class EditableField(FakeField): |
| 9 | """ | 10 | """ |
| 10 | Pure virtual class used to write editable field class. | 11 | Pure virtual class used to write editable field class. |
| 11 | """ | 12 | """ |
| 12 | 13 | ||
| 13 | _is_altered = False | 14 | _is_altered = False |
| 15 | |||
| 14 | def __init__(self, parent, name, value=None): | 16 | def __init__(self, parent, name, value=None): |
| 15 | FakeField.__init__(self, parent, name) | 17 | FakeField.__init__(self, parent, name) |
| 16 | self._value = value | 18 | self._value = value |
| 17 | 19 | ||
| 18 | def _isAltered(self): | 20 | def _isAltered(self): |
| 19 | return self._is_altered | 21 | return self._is_altered |
| 22 | |||
| 20 | is_altered = property(_isAltered) | 23 | is_altered = property(_isAltered) |
| 21 | 24 | ||
| 22 | def hasValue(self): | 25 | def hasValue(self): |
| @@ -24,8 +27,10 @@ class EditableField(FakeField): | |||
| 24 | 27 | ||
| 25 | def _computeSize(self): | 28 | def _computeSize(self): |
| 26 | raise NotImplementedError() | 29 | raise NotImplementedError() |
| 30 | |||
| 27 | def _getValue(self): | 31 | def _getValue(self): |
| 28 | return self._value | 32 | return self._value |
| 33 | |||
| 29 | def _setValue(self, value): | 34 | def _setValue(self, value): |
| 30 | self._value = value | 35 | self._value = value |
| 31 | 36 | ||
| @@ -34,9 +39,11 @@ class EditableField(FakeField): | |||
| 34 | return self._getValue() | 39 | return self._getValue() |
| 35 | else: | 40 | else: |
| 36 | return FakeField._getValue(self) | 41 | return FakeField._getValue(self) |
| 42 | |||
| 37 | def _propSetValue(self, value): | 43 | def _propSetValue(self, value): |
| 38 | self._setValue(value) | 44 | self._setValue(value) |
| 39 | self._is_altered = True | 45 | self._is_altered = True |
| 46 | |||
| 40 | value = property(_propGetValue, _propSetValue) | 47 | value = property(_propGetValue, _propSetValue) |
| 41 | 48 | ||
| 42 | def _getSize(self): | 49 | def _getSize(self): |
| @@ -44,6 +51,7 @@ class EditableField(FakeField): | |||
| 44 | return self._computeSize() | 51 | return self._computeSize() |
| 45 | else: | 52 | else: |
| 46 | return FakeField._getSize(self) | 53 | return FakeField._getSize(self) |
| 54 | |||
| 47 | size = property(_getSize) | 55 | size = property(_getSize) |
| 48 | 56 | ||
| 49 | def _write(self, output): | 57 | def _write(self, output): |
| @@ -55,6 +63,7 @@ class EditableField(FakeField): | |||
| 55 | else: | 63 | else: |
| 56 | return FakeField.writeInto(self, output) | 64 | return FakeField.writeInto(self, output) |
| 57 | 65 | ||
| 66 | |||
| 58 | class EditableFixedField(EditableField): | 67 | class EditableFixedField(EditableField): |
| 59 | """ | 68 | """ |
| 60 | Editable field with fixed size. | 69 | Editable field with fixed size. |
| @@ -69,8 +78,10 @@ class EditableFixedField(EditableField): | |||
| 69 | 78 | ||
| 70 | def _getSize(self): | 79 | def _getSize(self): |
| 71 | return self._size | 80 | return self._size |
| 81 | |||
| 72 | size = property(_getSize) | 82 | size = property(_getSize) |
| 73 | 83 | ||
| 84 | |||
| 74 | class EditableBits(EditableFixedField): | 85 | class EditableBits(EditableFixedField): |
| 75 | def __init__(self, parent, name, *args): | 86 | def __init__(self, parent, name, *args): |
| 76 | if args: | 87 | if args: |
| @@ -90,14 +101,15 @@ class EditableBits(EditableFixedField): | |||
| 90 | self._is_altered = True | 101 | self._is_altered = True |
| 91 | 102 | ||
| 92 | def _setValue(self, value): | 103 | def _setValue(self, value): |
| 93 | if not(0 <= value < (1 << self._size)): | 104 | if not (0 <= value < (1 << self._size)): |
| 94 | raise ValueError("Invalid value, must be in range %s..%s" | 105 | raise ValueError("Invalid value, must be in range %s..%s" |
| 95 | % (0, (1 << self._size) - 1)) | 106 | % (0, (1 << self._size) - 1)) |
| 96 | self._value = value | 107 | self._value = value |
| 97 | 108 | ||
| 98 | def _write(self, output): | 109 | def _write(self, output): |
| 99 | output.writeBits(self._size, self._value, self._parent.endian) | 110 | output.writeBits(self._size, self._value, self._parent.endian) |
| 100 | 111 | ||
| 112 | |||
| 101 | class EditableBytes(EditableField): | 113 | class EditableBytes(EditableField): |
| 102 | def _setValue(self, value): | 114 | def _setValue(self, value): |
| 103 | if not value: raise ValueError( | 115 | if not value: raise ValueError( |
| @@ -110,11 +122,12 @@ class EditableBytes(EditableField): | |||
| 110 | def _write(self, output): | 122 | def _write(self, output): |
| 111 | output.writeBytes(self._value) | 123 | output.writeBytes(self._value) |
| 112 | 124 | ||
| 125 | |||
| 113 | class EditableString(EditableField): | 126 | class EditableString(EditableField): |
| 114 | MAX_SIZE = { | 127 | MAX_SIZE = { |
| 115 | "Pascal8": (1 << 8)-1, | 128 | "Pascal8": (1 << 8) - 1, |
| 116 | "Pascal16": (1 << 16)-1, | 129 | "Pascal16": (1 << 16) - 1, |
| 117 | "Pascal32": (1 << 32)-1, | 130 | "Pascal32": (1 << 32) - 1, |
| 118 | } | 131 | } |
| 119 | 132 | ||
| 120 | def __init__(self, parent, name, *args, **kw): | 133 | def __init__(self, parent, name, *args, **kw): |
| @@ -152,7 +165,7 @@ class EditableString(EditableField): | |||
| 152 | self._value = value | 165 | self._value = value |
| 153 | 166 | ||
| 154 | def _computeSize(self): | 167 | def _computeSize(self): |
| 155 | return (self._prefix_size + len(self._value) + len(self._suffix_str))*8 | 168 | return (self._prefix_size + len(self._value) + len(self._suffix_str)) * 8 |
| 156 | 169 | ||
| 157 | def _write(self, output): | 170 | def _write(self, output): |
| 158 | if self._format in GenericString.SUFFIX_FORMAT: | 171 | if self._format in GenericString.SUFFIX_FORMAT: |
| @@ -166,6 +179,7 @@ class EditableString(EditableField): | |||
| 166 | output.writeInteger(len(self._value), False, size, self._parent.endian) | 179 | output.writeInteger(len(self._value), False, size, self._parent.endian) |
| 167 | output.writeBytes(self._value) | 180 | output.writeBytes(self._value) |
| 168 | 181 | ||
| 182 | |||
| 169 | class EditableCharacter(EditableFixedField): | 183 | class EditableCharacter(EditableFixedField): |
| 170 | def __init__(self, parent, name, *args): | 184 | def __init__(self, parent, name, *args): |
| 171 | if args: | 185 | if args: |
| @@ -190,16 +204,17 @@ class EditableCharacter(EditableFixedField): | |||
| 190 | def _write(self, output): | 204 | def _write(self, output): |
| 191 | output.writeBytes(self._value) | 205 | output.writeBytes(self._value) |
| 192 | 206 | ||
| 207 | |||
| 193 | class EditableInteger(EditableFixedField): | 208 | class EditableInteger(EditableFixedField): |
| 194 | VALID_VALUE_SIGNED = { | 209 | VALID_VALUE_SIGNED = { |
| 195 | 8: (-(1 << 8), (1 << 8)-1), | 210 | 8: (-(1 << 8), (1 << 8) - 1), |
| 196 | 16: (-(1 << 15), (1 << 15)-1), | 211 | 16: (-(1 << 15), (1 << 15) - 1), |
| 197 | 32: (-(1 << 31), (1 << 31)-1), | 212 | 32: (-(1 << 31), (1 << 31) - 1), |
| 198 | } | 213 | } |
| 199 | VALID_VALUE_UNSIGNED = { | 214 | VALID_VALUE_UNSIGNED = { |
| 200 | 8: (0, (1 << 8)-1), | 215 | 8: (0, (1 << 8) - 1), |
| 201 | 16: (0, (1 << 16)-1), | 216 | 16: (0, (1 << 16) - 1), |
| 202 | 32: (0, (1 << 32)-1) | 217 | 32: (0, (1 << 32) - 1) |
| 203 | } | 218 | } |
| 204 | 219 | ||
| 205 | def __init__(self, parent, name, *args): | 220 | def __init__(self, parent, name, *args): |
| @@ -227,14 +242,15 @@ class EditableInteger(EditableFixedField): | |||
| 227 | else: | 242 | else: |
| 228 | valid = self.VALID_VALUE_UNSIGNED | 243 | valid = self.VALID_VALUE_UNSIGNED |
| 229 | minval, maxval = valid[self._size] | 244 | minval, maxval = valid[self._size] |
| 230 | if not(minval <= value <= maxval): | 245 | if not (minval <= value <= maxval): |
| 231 | raise ValueError("Invalid value, must be in range %s..%s" | 246 | raise ValueError("Invalid value, must be in range %s..%s" |
| 232 | % (minval, maxval)) | 247 | % (minval, maxval)) |
| 233 | self._value = value | 248 | self._value = value |
| 234 | 249 | ||
| 235 | def _write(self, output): | 250 | def _write(self, output): |
| 236 | output.writeInteger( | 251 | output.writeInteger( |
| 237 | self.value, self._signed, self._size//8, self._parent.endian) | 252 | self.value, self._signed, self._size // 8, self._parent.endian) |
| 253 | |||
| 238 | 254 | ||
| 239 | def createEditableField(fieldset, field): | 255 | def createEditableField(fieldset, field): |
| 240 | if isInteger(field): | 256 | if isInteger(field): |
| @@ -250,4 +266,3 @@ def createEditableField(fieldset, field): | |||
| 250 | else: | 266 | else: |
| 251 | cls = FakeField | 267 | cls = FakeField |
| 252 | return cls(fieldset, field.name) | 268 | return cls(fieldset, field.name) |
| 253 | |||
diff --git a/libmat/images.py b/libmat/images.py index 67c710f..0c4f3e0 100644 --- a/libmat/images.py +++ b/libmat/images.py | |||
| @@ -1,23 +1,23 @@ | |||
| 1 | ''' Takes care about pictures formats | 1 | """ Takes care about pictures formats |
| 2 | 2 | ||
| 3 | References: | 3 | References: |
| 4 | - JFIF: http://www.ecma-international.org/publications/techreports/E-TR-098.htm | 4 | - JFIF: http://www.ecma-international.org/publications/techreports/E-TR-098.htm |
| 5 | - PNG: http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html | 5 | - PNG: http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html |
| 6 | - PNG: http://www.w3.org/TR/PNG-Chunks.html | 6 | - PNG: http://www.w3.org/TR/PNG-Chunks.html |
| 7 | ''' | 7 | """ |
| 8 | 8 | ||
| 9 | import parser | 9 | import parser |
| 10 | 10 | ||
| 11 | 11 | ||
| 12 | class JpegStripper(parser.GenericParser): | 12 | class JpegStripper(parser.GenericParser): |
| 13 | ''' Represents a jpeg file. | 13 | """ Represents a jpeg file. |
| 14 | Custom Huffman and Quantization tables | 14 | Custom Huffman and Quantization tables |
| 15 | are stripped: they may leak | 15 | are stripped: they may leak |
| 16 | some info, and the quality loss is minor. | 16 | some info, and the quality loss is minor. |
| 17 | ''' | 17 | """ |
| 18 | def _should_remove(self, field): | 18 | def _should_remove(self, field): |
| 19 | ''' Return True if the field is compromising | 19 | """ Return True if the field is compromising |
| 20 | ''' | 20 | """ |
| 21 | field_list = frozenset([ | 21 | field_list = frozenset([ |
| 22 | 'start_image', # start of the image | 22 | 'start_image', # start of the image |
| 23 | 'app0', # JFIF data | 23 | 'app0', # JFIF data |
| @@ -35,11 +35,11 @@ class JpegStripper(parser.GenericParser): | |||
| 35 | 35 | ||
| 36 | 36 | ||
| 37 | class PngStripper(parser.GenericParser): | 37 | class PngStripper(parser.GenericParser): |
| 38 | ''' Represents a png file | 38 | """ Represents a png file |
| 39 | ''' | 39 | """ |
| 40 | def _should_remove(self, field): | 40 | def _should_remove(self, field): |
| 41 | ''' Return True if the field is compromising | 41 | """ Return True if the field is compromising |
| 42 | ''' | 42 | """ |
| 43 | field_list = frozenset([ | 43 | field_list = frozenset([ |
| 44 | 'id', | 44 | 'id', |
| 45 | 'header', # PNG header | 45 | 'header', # PNG header |
diff --git a/libmat/mat.py b/libmat/mat.py index 6e56d54..954b9a3 100644 --- a/libmat/mat.py +++ b/libmat/mat.py | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | #!/usr/bin/env python | 1 | #!/usr/bin/env python |
| 2 | 2 | ||
| 3 | ''' Metadata anonymisation toolkit library | 3 | """ Metadata anonymisation toolkit library |
| 4 | ''' | 4 | """ |
| 5 | 5 | ||
| 6 | import logging | 6 | import logging |
| 7 | import mimetypes | 7 | import mimetypes |
| @@ -18,15 +18,15 @@ import libmat.exceptions | |||
| 18 | __version__ = '0.5.3' | 18 | __version__ = '0.5.3' |
| 19 | __author__ = 'jvoisin' | 19 | __author__ = 'jvoisin' |
| 20 | 20 | ||
| 21 | #Silence | 21 | # Silence |
| 22 | LOGGING_LEVEL = logging.CRITICAL | 22 | LOGGING_LEVEL = logging.CRITICAL |
| 23 | hachoir_core.config.quiet = True | 23 | hachoir_core.config.quiet = True |
| 24 | fname = '' | 24 | fname = '' |
| 25 | 25 | ||
| 26 | #Verbose | 26 | # Verbose |
| 27 | #LOGGING_LEVEL = logging.DEBUG | 27 | # LOGGING_LEVEL = logging.DEBUG |
| 28 | #hachoir_core.config.quiet = False | 28 | # hachoir_core.config.quiet = False |
| 29 | #logname = 'report.log' | 29 | # logname = 'report.log' |
| 30 | 30 | ||
| 31 | logging.basicConfig(filename=fname, level=LOGGING_LEVEL) | 31 | logging.basicConfig(filename=fname, level=LOGGING_LEVEL) |
| 32 | 32 | ||
| @@ -34,10 +34,10 @@ import strippers # this is loaded here because we need LOGGING_LEVEL | |||
| 34 | 34 | ||
| 35 | 35 | ||
| 36 | def get_logo(): | 36 | def get_logo(): |
| 37 | ''' Return the path to the logo | 37 | """ Return the path to the logo |
| 38 | ''' | 38 | """ |
| 39 | if os.path.isfile(os.path.join(os.path.curdir, 'data/mat.png')): | 39 | if os.path.isfile(os.path.join(os.path.curdir, 'data/mat.png')): |
| 40 | return os.path.join(os.path.curdir,'data/mat.png') | 40 | return os.path.join(os.path.curdir, 'data/mat.png') |
| 41 | elif os.path.isfile('/usr/share/pixmaps/mat.png'): | 41 | elif os.path.isfile('/usr/share/pixmaps/mat.png'): |
| 42 | return '/usr/share/pixmaps/mat.png' | 42 | return '/usr/share/pixmaps/mat.png' |
| 43 | elif os.path.isfile('/usr/local/share/pixmaps/mat.png'): | 43 | elif os.path.isfile('/usr/local/share/pixmaps/mat.png'): |
| @@ -45,8 +45,8 @@ def get_logo(): | |||
| 45 | 45 | ||
| 46 | 46 | ||
| 47 | def get_datafile_path(filename): | 47 | def get_datafile_path(filename): |
| 48 | ''' Return the path to the given ressource | 48 | """ Return the path to the given ressource |
| 49 | ''' | 49 | """ |
| 50 | if os.path.isfile(os.path.join(os.path.curdir, 'data', filename)): | 50 | if os.path.isfile(os.path.join(os.path.curdir, 'data', filename)): |
| 51 | return os.path.join(os.path.curdir, 'data', filename) | 51 | return os.path.join(os.path.curdir, 'data', filename) |
| 52 | elif os.path.isfile(os.path.join('/usr/local/share/mat/', filename)): | 52 | elif os.path.isfile(os.path.join('/usr/local/share/mat/', filename)): |
| @@ -56,10 +56,10 @@ def get_datafile_path(filename): | |||
| 56 | 56 | ||
| 57 | 57 | ||
| 58 | def list_supported_formats(): | 58 | def list_supported_formats(): |
| 59 | ''' Return a list of all locally supported fileformat. | 59 | """ Return a list of all locally supported fileformat. |
| 60 | It parses that FORMATS file, and removes locally | 60 | It parses that FORMATS file, and removes locally |
| 61 | non-supported formats. | 61 | non-supported formats. |
| 62 | ''' | 62 | """ |
| 63 | handler = XMLParser() | 63 | handler = XMLParser() |
| 64 | parser = xml.sax.make_parser() | 64 | parser = xml.sax.make_parser() |
| 65 | parser.setContentHandler(handler) | 65 | parser.setContentHandler(handler) |
| @@ -76,9 +76,10 @@ def list_supported_formats(): | |||
| 76 | 76 | ||
| 77 | 77 | ||
| 78 | class XMLParser(xml.sax.handler.ContentHandler): | 78 | class XMLParser(xml.sax.handler.ContentHandler): |
| 79 | ''' Parse the supported format xml, and return a corresponding | 79 | """ Parse the supported format xml, and return a corresponding |
| 80 | list of dict | 80 | list of dict |
| 81 | ''' | 81 | """ |
| 82 | |||
| 82 | def __init__(self): | 83 | def __init__(self): |
| 83 | self.dict = {} | 84 | self.dict = {} |
| 84 | self.list = [] | 85 | self.list = [] |
| @@ -86,15 +87,15 @@ class XMLParser(xml.sax.handler.ContentHandler): | |||
| 86 | self.between = False | 87 | self.between = False |
| 87 | 88 | ||
| 88 | def startElement(self, name, attrs): | 89 | def startElement(self, name, attrs): |
| 89 | ''' Called when entering into xml tag | 90 | """ Called when entering into xml tag |
| 90 | ''' | 91 | """ |
| 91 | self.between = True | 92 | self.between = True |
| 92 | self.key = name | 93 | self.key = name |
| 93 | self.content = '' | 94 | self.content = '' |
| 94 | 95 | ||
| 95 | def endElement(self, name): | 96 | def endElement(self, name): |
| 96 | ''' Called when exiting a xml tag | 97 | """ Called when exiting a xml tag |
| 97 | ''' | 98 | """ |
| 98 | if name == 'format': # leaving a fileformat section | 99 | if name == 'format': # leaving a fileformat section |
| 99 | self.list.append(self.dict.copy()) | 100 | self.list.append(self.dict.copy()) |
| 100 | self.dict.clear() | 101 | self.dict.clear() |
| @@ -104,15 +105,15 @@ class XMLParser(xml.sax.handler.ContentHandler): | |||
| 104 | self.between = False | 105 | self.between = False |
| 105 | 106 | ||
| 106 | def characters(self, characters): | 107 | def characters(self, characters): |
| 107 | ''' Concatenate the content between opening and closing tags | 108 | """ Concatenate the content between opening and closing tags |
| 108 | ''' | 109 | """ |
| 109 | if self.between: | 110 | if self.between: |
| 110 | self.content += characters | 111 | self.content += characters |
| 111 | 112 | ||
| 112 | 113 | ||
| 113 | def secure_remove(filename): | 114 | def secure_remove(filename): |
| 114 | ''' Securely remove the file | 115 | """ Securely remove the file |
| 115 | ''' | 116 | """ |
| 116 | # I want the file removed, even if it's ro | 117 | # I want the file removed, even if it's ro |
| 117 | try: | 118 | try: |
| 118 | os.chmod(filename, 220) | 119 | os.chmod(filename, 220) |
| @@ -141,9 +142,9 @@ def secure_remove(filename): | |||
| 141 | 142 | ||
| 142 | 143 | ||
| 143 | def create_class_file(name, backup, **kwargs): | 144 | def create_class_file(name, backup, **kwargs): |
| 144 | ''' Return a $FILETYPEStripper() class, | 145 | """ Return a $FILETYPEStripper() class, |
| 145 | corresponding to the filetype of the given file | 146 | corresponding to the filetype of the given file |
| 146 | ''' | 147 | """ |
| 147 | if not os.path.isfile(name): # check if the file exists | 148 | if not os.path.isfile(name): # check if the file exists |
| 148 | logging.error('%s is not a valid file' % name) | 149 | logging.error('%s is not a valid file' % name) |
| 149 | return None | 150 | return None |
| @@ -153,7 +154,7 @@ def create_class_file(name, backup, **kwargs): | |||
| 153 | return None | 154 | return None |
| 154 | 155 | ||
| 155 | if not os.path.getsize(name): | 156 | if not os.path.getsize(name): |
| 156 | #check if the file is not empty (hachoir crash on empty files) | 157 | # check if the file is not empty (hachoir crash on empty files) |
| 157 | logging.error('%s is empty' % name) | 158 | logging.error('%s is empty' % name) |
| 158 | return None | 159 | return None |
| 159 | 160 | ||
diff --git a/libmat/misc.py b/libmat/misc.py index 450f381..b1a551c 100644 --- a/libmat/misc.py +++ b/libmat/misc.py | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | ''' Care about misc formats | 1 | """ Care about misc formats |
| 2 | ''' | 2 | """ |
| 3 | 3 | ||
| 4 | import parser | 4 | import parser |
| 5 | 5 | ||
| @@ -7,33 +7,34 @@ from bencode import bencode | |||
| 7 | 7 | ||
| 8 | 8 | ||
| 9 | class TorrentStripper(parser.GenericParser): | 9 | class TorrentStripper(parser.GenericParser): |
| 10 | ''' Represent a torrent file with the help | 10 | """ Represent a torrent file with the help |
| 11 | of the bencode lib from Petru Paler | 11 | of the bencode lib from Petru Paler |
| 12 | ''' | 12 | """ |
| 13 | |||
| 13 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | 14 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): |
| 14 | super(TorrentStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) | 15 | super(TorrentStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) |
| 15 | self.fields = frozenset(['announce', 'info', 'name', 'path', 'piece length', 'pieces', | 16 | self.fields = frozenset(['announce', 'info', 'name', 'path', 'piece length', 'pieces', |
| 16 | 'length', 'files', 'announce-list', 'nodes', 'httpseeds', 'private', 'root hash']) | 17 | 'length', 'files', 'announce-list', 'nodes', 'httpseeds', 'private', 'root hash']) |
| 17 | 18 | ||
| 18 | def __get_key_recursively(self, dictionary): | 19 | def __get_key_recursively(self, dictionary): |
| 19 | ''' Get recursively all keys from a dict and | 20 | """ Get recursively all keys from a dict and |
| 20 | its subdicts | 21 | its subdicts |
| 21 | ''' | 22 | """ |
| 22 | for i, j in list(dictionary.items()): | 23 | for i, j in list(dictionary.items()): |
| 23 | if isinstance(j, dict): | 24 | if isinstance(j, dict): |
| 24 | return set([i]).union(self.__get_key_recursively(j)) | 25 | return {i}.union(self.__get_key_recursively(j)) |
| 25 | return set([i]) | 26 | return {i} |
| 26 | 27 | ||
| 27 | def is_clean(self): | 28 | def is_clean(self): |
| 28 | ''' Check if the file is clean from harmful metadata | 29 | """ Check if the file is clean from harmful metadata |
| 29 | ''' | 30 | """ |
| 30 | with open(self.filename, 'r') as f: | 31 | with open(self.filename, 'r') as f: |
| 31 | decoded = bencode.bdecode(f.read()) | 32 | decoded = bencode.bdecode(f.read()) |
| 32 | return self.fields.issuperset(self.__get_key_recursively(decoded)) | 33 | return self.fields.issuperset(self.__get_key_recursively(decoded)) |
| 33 | 34 | ||
| 34 | def __get_meta_recursively(self, dictionary): | 35 | def __get_meta_recursively(self, dictionary): |
| 35 | ''' Get recursively all harmful metadata | 36 | """ Get recursively all harmful metadata |
| 36 | ''' | 37 | """ |
| 37 | d = dict() | 38 | d = dict() |
| 38 | for i, j in list(dictionary.items()): | 39 | for i, j in list(dictionary.items()): |
| 39 | if i not in self.fields: | 40 | if i not in self.fields: |
| @@ -43,15 +44,15 @@ class TorrentStripper(parser.GenericParser): | |||
| 43 | return d | 44 | return d |
| 44 | 45 | ||
| 45 | def get_meta(self): | 46 | def get_meta(self): |
| 46 | ''' Return a dict with all the meta of the file | 47 | """ Return a dict with all the meta of the file |
| 47 | ''' | 48 | """ |
| 48 | with open(self.filename, 'r') as f: | 49 | with open(self.filename, 'r') as f: |
| 49 | decoded = bencode.bdecode(f.read()) | 50 | decoded = bencode.bdecode(f.read()) |
| 50 | return self.__get_meta_recursively(decoded) | 51 | return self.__get_meta_recursively(decoded) |
| 51 | 52 | ||
| 52 | def __remove_all_recursively(self, dictionary): | 53 | def __remove_all_recursively(self, dictionary): |
| 53 | ''' Remove recursively all compromizing fields | 54 | """ Remove recursively all compromizing fields |
| 54 | ''' | 55 | """ |
| 55 | d = dict() | 56 | d = dict() |
| 56 | for i, j in [i for i in list(dictionary.items()) if i in self.fields]: | 57 | for i, j in [i for i in list(dictionary.items()) if i in self.fields]: |
| 57 | if isinstance(j, dict): | 58 | if isinstance(j, dict): |
| @@ -61,8 +62,8 @@ class TorrentStripper(parser.GenericParser): | |||
| 61 | return d | 62 | return d |
| 62 | 63 | ||
| 63 | def remove_all(self): | 64 | def remove_all(self): |
| 64 | ''' Remove all comprimizing fields | 65 | """ Remove all comprimizing fields |
| 65 | ''' | 66 | """ |
| 66 | decoded = '' | 67 | decoded = '' |
| 67 | with open(self.filename, 'r') as f: | 68 | with open(self.filename, 'r') as f: |
| 68 | decoded = bencode.bdecode(f.read()) | 69 | decoded = bencode.bdecode(f.read()) |
diff --git a/libmat/mutagenstripper.py b/libmat/mutagenstripper.py index 403c9a7..be89178 100644 --- a/libmat/mutagenstripper.py +++ b/libmat/mutagenstripper.py | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | ''' Take care of mutagen-supported formats (audio) | 1 | """ Take care of mutagen-supported formats (audio) |
| 2 | ''' | 2 | """ |
| 3 | 3 | ||
| 4 | import parser | 4 | import parser |
| 5 | 5 | ||
| @@ -23,9 +23,9 @@ class MutagenStripper(parser.GenericParser): | |||
| 23 | return True | 23 | return True |
| 24 | 24 | ||
| 25 | def get_meta(self): | 25 | def get_meta(self): |
| 26 | ''' | 26 | """ |
| 27 | Return the content of the metadata block is present | 27 | Return the content of the metadata block is present |
| 28 | ''' | 28 | """ |
| 29 | metadata = {} | 29 | metadata = {} |
| 30 | if self.mfile.tags: | 30 | if self.mfile.tags: |
| 31 | for key, value in self.mfile.tags: | 31 | for key, value in self.mfile.tags: |
diff --git a/libmat/office.py b/libmat/office.py index d020c46..bd4bd97 100644 --- a/libmat/office.py +++ b/libmat/office.py | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | ''' Care about office's formats | 1 | """ Care about office's formats |
| 2 | 2 | ||
| 3 | ''' | 3 | """ |
| 4 | 4 | ||
| 5 | import logging | 5 | import logging |
| 6 | import os | 6 | import os |
| @@ -21,14 +21,14 @@ import archive | |||
| 21 | 21 | ||
| 22 | 22 | ||
| 23 | class OpenDocumentStripper(archive.TerminalZipStripper): | 23 | class OpenDocumentStripper(archive.TerminalZipStripper): |
| 24 | ''' An open document file is a zip, with xml file into. | 24 | """ An open document file is a zip, with xml file into. |
| 25 | The one that interest us is meta.xml | 25 | The one that interest us is meta.xml |
| 26 | ''' | 26 | """ |
| 27 | 27 | ||
| 28 | def get_meta(self): | 28 | def get_meta(self): |
| 29 | ''' Return a dict with all the meta of the file by | 29 | """ Return a dict with all the meta of the file by |
| 30 | trying to read the meta.xml file. | 30 | trying to read the meta.xml file. |
| 31 | ''' | 31 | """ |
| 32 | metadata = super(OpenDocumentStripper, self).get_meta() | 32 | metadata = super(OpenDocumentStripper, self).get_meta() |
| 33 | zipin = zipfile.ZipFile(self.filename, 'r') | 33 | zipin = zipfile.ZipFile(self.filename, 'r') |
| 34 | try: | 34 | try: |
| @@ -49,13 +49,13 @@ class OpenDocumentStripper(archive.TerminalZipStripper): | |||
| 49 | return metadata | 49 | return metadata |
| 50 | 50 | ||
| 51 | def remove_all(self): | 51 | def remove_all(self): |
| 52 | ''' Removes metadata | 52 | """ Removes metadata |
| 53 | ''' | 53 | """ |
| 54 | return super(OpenDocumentStripper, self).remove_all(ending_blacklist=['meta.xml']) | 54 | return super(OpenDocumentStripper, self).remove_all(ending_blacklist=['meta.xml']) |
| 55 | 55 | ||
| 56 | def is_clean(self): | 56 | def is_clean(self): |
| 57 | ''' Check if the file is clean from harmful metadatas | 57 | """ Check if the file is clean from harmful metadatas |
| 58 | ''' | 58 | """ |
| 59 | clean_super = super(OpenDocumentStripper, self).is_clean() | 59 | clean_super = super(OpenDocumentStripper, self).is_clean() |
| 60 | if clean_super is False: | 60 | if clean_super is False: |
| 61 | return False | 61 | return False |
| @@ -70,20 +70,21 @@ class OpenDocumentStripper(archive.TerminalZipStripper): | |||
| 70 | 70 | ||
| 71 | 71 | ||
| 72 | class OpenXmlStripper(archive.TerminalZipStripper): | 72 | class OpenXmlStripper(archive.TerminalZipStripper): |
| 73 | ''' Represent an office openxml document, which is like | 73 | """ Represent an office openxml document, which is like |
| 74 | an opendocument format, with some tricky stuff added. | 74 | an opendocument format, with some tricky stuff added. |
| 75 | It contains mostly xml, but can have media blobs, crap, ... | 75 | It contains mostly xml, but can have media blobs, crap, ... |
| 76 | (I don't like this format.) | 76 | (I don't like this format.) |
| 77 | ''' | 77 | """ |
| 78 | |||
| 78 | def remove_all(self): | 79 | def remove_all(self): |
| 79 | return super(OpenXmlStripper, self).remove_all( | 80 | return super(OpenXmlStripper, self).remove_all( |
| 80 | beginning_blacklist=('docProps/'), whitelist=('.rels')) | 81 | beginning_blacklist='docProps/', whitelist='.rels') |
| 81 | 82 | ||
| 82 | def is_clean(self): | 83 | def is_clean(self): |
| 83 | ''' Check if the file is clean from harmful metadatas. | 84 | """ Check if the file is clean from harmful metadatas. |
| 84 | This implementation is faster than something like | 85 | This implementation is faster than something like |
| 85 | "return this.get_meta() == {}". | 86 | "return this.get_meta() == {}". |
| 86 | ''' | 87 | """ |
| 87 | clean_super = super(OpenXmlStripper, self).is_clean() | 88 | clean_super = super(OpenXmlStripper, self).is_clean() |
| 88 | if clean_super is False: | 89 | if clean_super is False: |
| 89 | return False | 90 | return False |
| @@ -96,8 +97,8 @@ class OpenXmlStripper(archive.TerminalZipStripper): | |||
| 96 | return True | 97 | return True |
| 97 | 98 | ||
| 98 | def get_meta(self): | 99 | def get_meta(self): |
| 99 | ''' Return a dict with all the meta of the file | 100 | """ Return a dict with all the meta of the file |
| 100 | ''' | 101 | """ |
| 101 | metadata = super(OpenXmlStripper, self).get_meta() | 102 | metadata = super(OpenXmlStripper, self).get_meta() |
| 102 | 103 | ||
| 103 | zipin = zipfile.ZipFile(self.filename, 'r') | 104 | zipin = zipfile.ZipFile(self.filename, 'r') |
| @@ -109,8 +110,9 @@ class OpenXmlStripper(archive.TerminalZipStripper): | |||
| 109 | 110 | ||
| 110 | 111 | ||
| 111 | class PdfStripper(parser.GenericParser): | 112 | class PdfStripper(parser.GenericParser): |
| 112 | ''' Represent a PDF file | 113 | """ Represent a PDF file |
| 113 | ''' | 114 | """ |
| 115 | |||
| 114 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | 116 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): |
| 115 | super(PdfStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) | 117 | super(PdfStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) |
| 116 | self.uri = 'file://' + os.path.abspath(self.filename) | 118 | self.uri = 'file://' + os.path.abspath(self.filename) |
| @@ -121,16 +123,16 @@ class PdfStripper(parser.GenericParser): | |||
| 121 | self.pdf_quality = False | 123 | self.pdf_quality = False |
| 122 | 124 | ||
| 123 | self.meta_list = frozenset(['title', 'author', 'subject', | 125 | self.meta_list = frozenset(['title', 'author', 'subject', |
| 124 | 'keywords', 'creator', 'producer', 'metadata']) | 126 | 'keywords', 'creator', 'producer', 'metadata']) |
| 125 | 127 | ||
| 126 | def is_clean(self): | 128 | def is_clean(self): |
| 127 | ''' Check if the file is clean from harmful metadatas | 129 | """ Check if the file is clean from harmful metadatas |
| 128 | ''' | 130 | """ |
| 129 | document = Poppler.Document.new_from_file(self.uri, self.password) | 131 | document = Poppler.Document.new_from_file(self.uri, self.password) |
| 130 | return not any(document.get_property(key) for key in self.meta_list) | 132 | return not any(document.get_property(key) for key in self.meta_list) |
| 131 | 133 | ||
| 132 | def remove_all(self): | 134 | def remove_all(self): |
| 133 | ''' Opening the PDF with poppler, then doing a render | 135 | """ Opening the PDF with poppler, then doing a render |
| 134 | on a cairo pdfsurface for each pages. | 136 | on a cairo pdfsurface for each pages. |
| 135 | 137 | ||
| 136 | http://cairographics.org/documentation/pycairo/2/ | 138 | http://cairographics.org/documentation/pycairo/2/ |
| @@ -138,7 +140,7 @@ class PdfStripper(parser.GenericParser): | |||
| 138 | The use of an intermediate tempfile is necessary because | 140 | The use of an intermediate tempfile is necessary because |
| 139 | python-cairo segfaults on unicode. | 141 | python-cairo segfaults on unicode. |
| 140 | See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=699457 | 142 | See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=699457 |
| 141 | ''' | 143 | """ |
| 142 | document = Poppler.Document.new_from_file(self.uri, self.password) | 144 | document = Poppler.Document.new_from_file(self.uri, self.password) |
| 143 | try: | 145 | try: |
| 144 | output = tempfile.mkstemp()[1] | 146 | output = tempfile.mkstemp()[1] |
| @@ -169,6 +171,7 @@ class PdfStripper(parser.GenericParser): | |||
| 169 | 171 | ||
| 170 | try: | 172 | try: |
| 171 | import pdfrw # For now, poppler cannot write meta, so we must use pdfrw | 173 | import pdfrw # For now, poppler cannot write meta, so we must use pdfrw |
| 174 | |||
| 172 | logging.debug('Removing %s\'s superficial metadata' % self.filename) | 175 | logging.debug('Removing %s\'s superficial metadata' % self.filename) |
| 173 | trailer = pdfrw.PdfReader(self.output) | 176 | trailer = pdfrw.PdfReader(self.output) |
| 174 | trailer.Info.Producer = None | 177 | trailer.Info.Producer = None |
| @@ -183,8 +186,8 @@ class PdfStripper(parser.GenericParser): | |||
| 183 | return True | 186 | return True |
| 184 | 187 | ||
| 185 | def get_meta(self): | 188 | def get_meta(self): |
| 186 | ''' Return a dict with all the meta of the file | 189 | """ Return a dict with all the meta of the file |
| 187 | ''' | 190 | """ |
| 188 | document = Poppler.Document.new_from_file(self.uri, self.password) | 191 | document = Poppler.Document.new_from_file(self.uri, self.password) |
| 189 | metadata = {} | 192 | metadata = {} |
| 190 | for key in self.meta_list: | 193 | for key in self.meta_list: |
diff --git a/libmat/parser.py b/libmat/parser.py index 1765da8..eed3140 100644 --- a/libmat/parser.py +++ b/libmat/parser.py | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | ''' Parent class of all parser | 1 | """ Parent class of all parser |
| 2 | ''' | 2 | """ |
| 3 | 3 | ||
| 4 | import os | 4 | import os |
| 5 | import shutil | 5 | import shutil |
| @@ -22,8 +22,8 @@ FIELD = object() | |||
| 22 | 22 | ||
| 23 | 23 | ||
| 24 | class GenericParser(object): | 24 | class GenericParser(object): |
| 25 | ''' Parent class of all parsers | 25 | """ Parent class of all parsers |
| 26 | ''' | 26 | """ |
| 27 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | 27 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): |
| 28 | self.filename = '' | 28 | self.filename = '' |
| 29 | self.parser = parser | 29 | self.parser = parser |
| @@ -40,15 +40,15 @@ class GenericParser(object): | |||
| 40 | self.output = hachoir_core.cmd_line.unicodeFilename(output) | 40 | self.output = hachoir_core.cmd_line.unicodeFilename(output) |
| 41 | 41 | ||
| 42 | def __del__(self): | 42 | def __del__(self): |
| 43 | ''' Remove tempfile if it was not used | 43 | """ Remove tempfile if it was not used |
| 44 | ''' | 44 | """ |
| 45 | if os.path.exists(self.output): | 45 | if os.path.exists(self.output): |
| 46 | mat.secure_remove(self.output) | 46 | mat.secure_remove(self.output) |
| 47 | 47 | ||
| 48 | def is_clean(self): | 48 | def is_clean(self): |
| 49 | ''' | 49 | """ |
| 50 | Check if the file is clean from harmful metadatas | 50 | Check if the file is clean from harmful metadatas |
| 51 | ''' | 51 | """ |
| 52 | for field in self.editor: | 52 | for field in self.editor: |
| 53 | if self._should_remove(field): | 53 | if self._should_remove(field): |
| 54 | return self._is_clean(self.editor) | 54 | return self._is_clean(self.editor) |
| @@ -65,16 +65,16 @@ class GenericParser(object): | |||
| 65 | return True | 65 | return True |
| 66 | 66 | ||
| 67 | def remove_all(self): | 67 | def remove_all(self): |
| 68 | ''' Remove all compromising fields | 68 | """ Remove all compromising fields |
| 69 | ''' | 69 | """ |
| 70 | state = self._remove_all(self.editor) | 70 | state = self._remove_all(self.editor) |
| 71 | hachoir_core.field.writeIntoFile(self.editor, self.output) | 71 | hachoir_core.field.writeIntoFile(self.editor, self.output) |
| 72 | self.do_backup() | 72 | self.do_backup() |
| 73 | return state | 73 | return state |
| 74 | 74 | ||
| 75 | def _remove_all(self, fieldset): | 75 | def _remove_all(self, fieldset): |
| 76 | ''' Recursive way to handle tree metadatas | 76 | """ Recursive way to handle tree metadatas |
| 77 | ''' | 77 | """ |
| 78 | try: | 78 | try: |
| 79 | for field in fieldset: | 79 | for field in fieldset: |
| 80 | remove = self._should_remove(field) | 80 | remove = self._should_remove(field) |
| @@ -87,20 +87,20 @@ class GenericParser(object): | |||
| 87 | return False | 87 | return False |
| 88 | 88 | ||
| 89 | def _remove(self, fieldset, field): | 89 | def _remove(self, fieldset, field): |
| 90 | ''' Delete the given field | 90 | """ Delete the given field |
| 91 | ''' | 91 | """ |
| 92 | del fieldset[field] | 92 | del fieldset[field] |
| 93 | 93 | ||
| 94 | def get_meta(self): | 94 | def get_meta(self): |
| 95 | ''' Return a dict with all the meta of the file | 95 | """ Return a dict with all the meta of the file |
| 96 | ''' | 96 | """ |
| 97 | metadata = {} | 97 | metadata = {} |
| 98 | self._get_meta(self.editor, metadata) | 98 | self._get_meta(self.editor, metadata) |
| 99 | return metadata | 99 | return metadata |
| 100 | 100 | ||
| 101 | def _get_meta(self, fieldset, metadata): | 101 | def _get_meta(self, fieldset, metadata): |
| 102 | ''' Recursive way to handle tree metadatas | 102 | """ Recursive way to handle tree metadatas |
| 103 | ''' | 103 | """ |
| 104 | for field in fieldset: | 104 | for field in fieldset: |
| 105 | remove = self._should_remove(field) | 105 | remove = self._should_remove(field) |
| 106 | if remove: | 106 | if remove: |
| @@ -112,22 +112,22 @@ class GenericParser(object): | |||
| 112 | self._get_meta(field, None) | 112 | self._get_meta(field, None) |
| 113 | 113 | ||
| 114 | def _should_remove(self, key): | 114 | def _should_remove(self, key): |
| 115 | ''' Return True if the field is compromising | 115 | """ Return True if the field is compromising |
| 116 | abstract method | 116 | abstract method |
| 117 | ''' | 117 | """ |
| 118 | raise NotImplementedError | 118 | raise NotImplementedError |
| 119 | 119 | ||
| 120 | def create_backup_copy(self): | 120 | def create_backup_copy(self): |
| 121 | ''' Create a backup copy | 121 | """ Create a backup copy |
| 122 | ''' | 122 | """ |
| 123 | shutil.copy2(self.filename, self.filename + '.bak') | 123 | shutil.copy2(self.filename, self.filename + '.bak') |
| 124 | 124 | ||
| 125 | def do_backup(self): | 125 | def do_backup(self): |
| 126 | ''' Keep a backup of the file if asked. | 126 | """ Keep a backup of the file if asked. |
| 127 | 127 | ||
| 128 | The process of double-renaming is not very elegant, | 128 | The process of double-renaming is not very elegant, |
| 129 | but it greatly simplify new strippers implementation. | 129 | but it greatly simplify new strippers implementation. |
| 130 | ''' | 130 | """ |
| 131 | if self.backup: | 131 | if self.backup: |
| 132 | shutil.move(self.filename, self.filename + '.bak') | 132 | shutil.move(self.filename, self.filename + '.bak') |
| 133 | else: | 133 | else: |
diff --git a/libmat/strippers.py b/libmat/strippers.py index d873a39..008442e 100644 --- a/libmat/strippers.py +++ b/libmat/strippers.py | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | ''' Manage which fileformat can be processed | 1 | """ Manage which fileformat can be processed |
| 2 | ''' | 2 | """ |
| 3 | 3 | ||
| 4 | import archive | 4 | import archive |
| 5 | import audio | 5 | import audio |
