From 6ba3e3f20d7d52895bc44f9fc35b068cfce47133 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sat, 25 Jul 2015 17:14:23 +0200 Subject: _MASSIVE_ pep8 revamp Thank you so much PyCharm --- libmat/__init__.py | 1 - libmat/archive.py | 128 ++++++++++++++++++++--------------- libmat/audio.py | 28 ++++---- libmat/bencode/__init__.py | 1 - libmat/bencode/bencode.py | 33 +++++---- libmat/exceptions.py | 12 ++-- libmat/exiftool.py | 39 ++++++----- libmat/hachoir_editor/typed_field.py | 47 ++++++++----- libmat/images.py | 20 +++--- libmat/mat.py | 55 +++++++-------- libmat/misc.py | 39 +++++------ libmat/mutagenstripper.py | 8 +-- libmat/office.py | 55 ++++++++------- libmat/parser.py | 48 ++++++------- libmat/strippers.py | 4 +- 15 files changed, 275 insertions(+), 243 deletions(-) (limited to 'libmat') diff --git a/libmat/__init__.py b/libmat/__init__.py index 8b13789..e69de29 100644 --- a/libmat/__init__.py +++ b/libmat/__init__.py @@ -1 +0,0 @@ - diff --git a/libmat/archive.py b/libmat/archive.py index d483dcc..4c62dc8 100644 --- a/libmat/archive.py +++ b/libmat/archive.py @@ -1,5 +1,5 @@ -''' Take care of archives formats -''' +""" Take care of archives formats +""" import datetime import logging @@ -16,23 +16,24 @@ import parser # Zip files do not support dates older than 01/01/1980 ZIP_EPOCH = (1980, 1, 1, 0, 0, 0) ZIP_EPOCH_SECONDS = (datetime.datetime(1980, 1, 1, 0, 0, 0) - - datetime.datetime(1970, 1, 1, 1, 0, 0)).total_seconds() + - datetime.datetime(1970, 1, 1, 1, 0, 0)).total_seconds() class GenericArchiveStripper(parser.GenericParser): - ''' Represent a generic archive - ''' + """ Represent a generic archive + """ + def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): super(GenericArchiveStripper, self).__init__(filename, - parser, mime, backup, is_writable, **kwargs) + parser, mime, backup, is_writable, **kwargs) self.compression = '' self.add2archive = kwargs['add2archive'] self.tempdir = tempfile.mkdtemp() def __del__(self): - ''' Remove the files inside the temp dir, + """ Remove the files inside the temp dir, then remove the temp dir - ''' + """ for root, dirs, files in os.walk(self.tempdir): for item in files: path_file = os.path.join(root, item) @@ -40,28 +41,30 @@ class GenericArchiveStripper(parser.GenericParser): shutil.rmtree(self.tempdir) def is_clean(self, list_unsupported=False): - ''' Virtual method to check for harmul metadata - ''' + """ Virtual method to check for harmul metadata + """ raise NotImplementedError def list_unsupported(self): - ''' Get a list of every non-supported files present in the archive - ''' + """ Get a list of every non-supported files present in the archive + """ return self.is_clean(list_unsupported=True) def remove_all(self): - ''' Virtual method to remove all metadata - ''' + """ Virtual method to remove all metadata + """ raise NotImplementedError class ZipStripper(GenericArchiveStripper): - ''' Represent a zip file - ''' - def __is_zipfile_clean(self, fileinfo): - ''' Check if a ZipInfo object is clean of metadata added + """ Represent a zip file + """ + + @staticmethod + def __is_zipfile_clean(fileinfo): + """ Check if a ZipInfo object is clean of metadata added by zip itself, independently of the corresponding file metadata - ''' + """ if fileinfo.comment != '': return False elif fileinfo.date_time != ZIP_EPOCH: @@ -71,11 +74,11 @@ class ZipStripper(GenericArchiveStripper): return True def is_clean(self, list_unsupported=False): - ''' Check if the given file is clean from harmful metadata + """ Check if the given file is clean from harmful metadata When list_unsupported is True, the method returns a list of all non-supported/archives files contained in the archive. - ''' + """ ret_list = [] zipin = zipfile.ZipFile(self.filename, 'r') if zipin.comment != '' and not list_unsupported: @@ -86,7 +89,7 @@ class ZipStripper(GenericArchiveStripper): path = os.path.join(self.tempdir, item.filename) if not self.__is_zipfile_clean(item) and not list_unsupported: logging.debug('%s from %s has compromising zipinfo' % - (item.filename, self.filename)) + (item.filename, self.filename)) return False if os.path.isfile(path): cfile = mat.create_class_file(path, False, add2archive=self.add2archive) @@ -97,7 +100,7 @@ class ZipStripper(GenericArchiveStripper): return False else: logging.info('%s\'s fileformat is not supported or harmless.' - % item.filename) + % item.filename) basename, ext = os.path.splitext(path) if os.path.basename(item.filename) not in ('mimetype', '.rels'): if ext not in parser.NOMETA: @@ -110,7 +113,7 @@ class ZipStripper(GenericArchiveStripper): return True def get_meta(self): - ''' Return all the metadata of a zip archive''' + """ Return all the metadata of a zip archive""" zipin = zipfile.ZipFile(self.filename, 'r') metadata = {} if zipin.comment != '': @@ -129,13 +132,14 @@ class ZipStripper(GenericArchiveStripper): metadata[item.filename] = str(cfile_meta) else: logging.info('%s\'s fileformat is not supported or harmless' - % item.filename) + % item.filename) zipin.close() return metadata - def __get_zipinfo_meta(self, zipinfo): - ''' Return all the metadata of a ZipInfo - ''' + @staticmethod + def __get_zipinfo_meta(zipinfo): + """ Return all the metadata of a ZipInfo + """ metadata = {} if zipinfo.comment != '': metadata['comment'] = zipinfo.comment @@ -145,13 +149,19 @@ class ZipStripper(GenericArchiveStripper): metadata['system'] = "windows" if zipinfo.create_system == 2 else "unknown" return metadata - def remove_all(self, whitelist=[], beginning_blacklist=[], ending_blacklist=[]): - ''' Remove all metadata from a zip archive, even thoses + def remove_all(self, whitelist=None, beginning_blacklist=None, ending_blacklist=None): + """ Remove all metadata from a zip archive, even thoses added by Python's zipfile itself. It will not add files starting with "begining_blacklist", or ending with "ending_blacklist". This method also add files present in whitelist to the archive. - ''' + """ + if not ending_blacklist: + ending_blacklist = [] + if not beginning_blacklist: + beginning_blacklist = [] + if not whitelist: + whitelist = [] zipin = zipfile.ZipFile(self.filename, 'r') zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True) for item in zipin.infolist(): @@ -166,7 +176,7 @@ class ZipStripper(GenericArchiveStripper): if cfile is not None: # Handle read-only files inside archive old_stat = os.stat(path).st_mode - os.chmod(path, old_stat|stat.S_IWUSR) + os.chmod(path, old_stat | stat.S_IWUSR) cfile.remove_all() os.chmod(path, old_stat) logging.debug('Processing %s from %s' % (item.filename, self.filename)) @@ -186,11 +196,12 @@ class ZipStripper(GenericArchiveStripper): class TarStripper(GenericArchiveStripper): - ''' Represent a tarfile archive - ''' + """ Represent a tarfile archive + """ + def _remove(self, current_file): - ''' Remove the meta added by tarfile itself to the file - ''' + """ Remove the meta added by tarfile itself to the file + """ current_file.mtime = 0 current_file.uid = 0 current_file.gid = 0 @@ -198,11 +209,13 @@ class TarStripper(GenericArchiveStripper): current_file.gname = '' return current_file - def remove_all(self, whitelist=[]): - ''' Remove all harmful metadata from the tarfile. + def remove_all(self, whitelist=None): + """ Remove all harmful metadata from the tarfile. The method will also add every files matching whitelist in the produced archive. - ''' + """ + if not whitelist: + whitelist = [] tarin = tarfile.open(self.filename, 'r' + self.compression, encoding='utf-8') tarout = tarfile.open(self.output, 'w' + self.compression, encoding='utf-8') for item in tarin.getmembers(): @@ -213,14 +226,14 @@ class TarStripper(GenericArchiveStripper): if cfile is not None: # Handle read-only files inside archive old_stat = os.stat(path).st_mode - os.chmod(path, old_stat|stat.S_IWUSR) + os.chmod(path, old_stat | stat.S_IWUSR) cfile.remove_all() os.chmod(path, old_stat) elif self.add2archive or os.path.splitext(item.name)[1] in parser.NOMETA: logging.debug('%s\' format is either not supported or harmless' % item.name) elif item.name in whitelist: logging.debug('%s is not supported, but MAT was told to add it anyway.' - % item.name) + % item.name) else: # Don't add the file to the archive logging.debug('%s will not be added' % item.name) continue @@ -230,9 +243,10 @@ class TarStripper(GenericArchiveStripper): self.do_backup() return True - def is_file_clean(self, current_file): - ''' Check metadatas added by tarfile - ''' + @staticmethod + def is_file_clean(current_file): + """ Check metadatas added by tarfile + """ if current_file.mtime != 0: return False elif current_file.uid != 0: @@ -246,17 +260,17 @@ class TarStripper(GenericArchiveStripper): return True def is_clean(self, list_unsupported=False): - ''' Check if the file is clean from harmful metadatas + """ Check if the file is clean from harmful metadatas When list_unsupported is True, the method returns a list of all non-supported/archives files contained in the archive. - ''' + """ ret_list = [] tarin = tarfile.open(self.filename, 'r' + self.compression) for item in tarin.getmembers(): if not self.is_file_clean(item) and not list_unsupported: logging.debug('%s from %s has compromising tarinfo' % - (item.name, self.filename)) + (item.name, self.filename)) return False tarin.extract(item, self.tempdir) path = os.path.join(self.tempdir, item.name) @@ -265,7 +279,7 @@ class TarStripper(GenericArchiveStripper): if cfile is not None: if not cfile.is_clean(): logging.debug('%s from %s has metadata' % - (item.name.decode("utf8"), self.filename)) + (item.name.decode("utf8"), self.filename)) if not list_unsupported: return False # Nested archives are treated like unsupported files @@ -283,8 +297,8 @@ class TarStripper(GenericArchiveStripper): return True def get_meta(self): - ''' Return a dict with all the meta of the tarfile - ''' + """ Return a dict with all the meta of the tarfile + """ tarin = tarfile.open(self.filename, 'r' + self.compression) metadata = {} for item in tarin.getmembers(): @@ -312,24 +326,26 @@ class TarStripper(GenericArchiveStripper): class TerminalZipStripper(ZipStripper): - ''' Represent a terminal level archive. + """ Represent a terminal level archive. This type of archive can not contain nested archives. It is used for formats like docx, which are basically ziped xml. - ''' + """ class GzipStripper(TarStripper): - ''' Represent a tar.gz archive - ''' + """ Represent a tar.gz archive + """ + def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): super(GzipStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) self.compression = ':gz' class Bzip2Stripper(TarStripper): - ''' Represent a tar.bz2 archive - ''' + """ Represent a tar.bz2 archive + """ + def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) self.compression = ':bz2' diff --git a/libmat/audio.py b/libmat/audio.py index dae9d75..2747dc1 100644 --- a/libmat/audio.py +++ b/libmat/audio.py @@ -1,5 +1,5 @@ -''' Care about audio fileformat -''' +""" Care about audio fileformat +""" try: from mutagen.flac import FLAC @@ -12,41 +12,41 @@ import mutagenstripper class MpegAudioStripper(parser.GenericParser): - ''' Represent mpeg audio file (mp3, ...) - ''' + """ Represent mpeg audio file (mp3, ...) + """ def _should_remove(self, field): return field.name in ("id3v1", "id3v2") class OggStripper(mutagenstripper.MutagenStripper): - ''' Represent an ogg vorbis file - ''' + """ Represent an ogg vorbis file + """ def _create_mfile(self): self.mfile = OggVorbis(self.filename) class FlacStripper(mutagenstripper.MutagenStripper): - ''' Represent a Flac audio file - ''' + """ Represent a Flac audio file + """ def _create_mfile(self): self.mfile = FLAC(self.filename) def remove_all(self): - ''' Remove the "metadata" block from the file - ''' + """ Remove the "metadata" block from the file + """ super(FlacStripper, self).remove_all() self.mfile.clear_pictures() self.mfile.save() return True def is_clean(self): - ''' Check if the "metadata" block is present in the file - ''' + """ Check if the "metadata" block is present in the file + """ return super(FlacStripper, self).is_clean() and not self.mfile.pictures def get_meta(self): - ''' Return the content of the metadata block if present - ''' + """ Return the content of the metadata block if present + """ metadata = super(FlacStripper, self).get_meta() if self.mfile.pictures: metadata['picture:'] = 'yes' diff --git a/libmat/bencode/__init__.py b/libmat/bencode/__init__.py index 8b13789..e69de29 100644 --- a/libmat/bencode/__init__.py +++ b/libmat/bencode/__init__.py @@ -1 +0,0 @@ - diff --git a/libmat/bencode/bencode.py b/libmat/bencode/bencode.py index a0cc99a..a7967fc 100644 --- a/libmat/bencode/bencode.py +++ b/libmat/bencode/bencode.py @@ -21,18 +21,18 @@ # THE SOFTWARE. # -''' +""" A quick (and also nice) lib to bencode/bdecode torrent files -''' +""" class BTFailure(Exception): - '''Custom Exception''' + """Custom Exception""" pass class Bencached(object): - '''Custom type : cached string''' + """Custom type : cached string""" __slots__ = ['bencoded'] def __init__(self, string): @@ -40,10 +40,10 @@ class Bencached(object): def decode_int(x, f): - '''decode an int''' + """decode an int""" f += 1 newf = x.index('e', f) - if x[f:f+1] == '-0': + if x[f:f + 1] == '-0': raise ValueError elif x[f] == '0' and newf != f + 1: raise ValueError @@ -51,7 +51,7 @@ def decode_int(x, f): def decode_string(x, f): - '''decode a string''' + """decode a string""" colon = x.index(':', f) if x[f] == '0' and colon != f + 1: raise ValueError @@ -61,7 +61,7 @@ def decode_string(x, f): def decode_list(x, f): - '''decode a list''' + """decode a list""" result = [] f += 1 while x[f] != 'e': @@ -71,7 +71,7 @@ def decode_list(x, f): def decode_dict(x, f): - '''decode a dict''' + """decode a dict""" result = {} f += 1 while x[f] != 'e': @@ -81,24 +81,24 @@ def decode_dict(x, f): def encode_bool(x, r): - '''bencode a boolean''' + """bencode a boolean""" encode_int(1 if r else 0, r) def encode_int(x, r): - '''bencode an integer/float''' + """bencode an integer/float""" r.extend(('i', str(x), 'e')) def encode_list(x, r): - '''bencode a list/tuple''' + """bencode a list/tuple""" r.append('l') [ENCODE_FUNC[type(item)](item, r) for item in x] r.append('e') def encode_dict(x, result): - '''bencode a dict''' + """bencode a dict""" result.append('d') ilist = list(x.items()) ilist.sort() @@ -108,12 +108,11 @@ def encode_dict(x, result): result.append('e') -DECODE_FUNC = {str(x):decode_string for x in range(9)} +DECODE_FUNC = {str(x): decode_string for x in range(9)} DECODE_FUNC['l'] = decode_list DECODE_FUNC['d'] = decode_dict DECODE_FUNC['i'] = decode_int - ENCODE_FUNC = {} ENCODE_FUNC[Bencached] = lambda x, r: r.append(x.bencoded) ENCODE_FUNC[int] = encode_int @@ -126,14 +125,14 @@ ENCODE_FUNC[bool] = encode_bool def bencode(string): - '''bencode $string''' + """bencode $string""" table = [] ENCODE_FUNC[type(string)](string, table) return ''.join(table) def bdecode(string): - '''decode $string''' + """decode $string""" try: result, lenght = DECODE_FUNC[string[0]](string, 0) except (IndexError, KeyError, ValueError): diff --git a/libmat/exceptions.py b/libmat/exceptions.py index 47da15c..e71c398 100644 --- a/libmat/exceptions.py +++ b/libmat/exceptions.py @@ -1,14 +1,14 @@ -''' Base exceptions for MAT -''' +""" Base exceptions for MAT +""" class UnableToRemoveFile(Exception): - '''This exception is raised when a file could not be removed - ''' + """This exception is raised when a file could not be removed + """ pass class UnableToWriteFile(Exception): - '''This exception is raised when a file + """This exception is raised when a file can could not be chmod +w - ''' + """ pass diff --git a/libmat/exiftool.py b/libmat/exiftool.py index aa6849d..0e1fefd 100644 --- a/libmat/exiftool.py +++ b/libmat/exiftool.py @@ -1,5 +1,5 @@ -''' Care about images with help of the amazing (perl) library Exiftool. -''' +""" Care about images with help of the amazing (perl) library Exiftool. +""" import subprocess @@ -7,25 +7,24 @@ import parser class ExiftoolStripper(parser.GenericParser): - ''' A generic stripper class using exiftool as backend - ''' + """ A generic stripper class using exiftool as backend + """ def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): super(ExiftoolStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) - self.allowed = set(['ExifTool Version Number', 'File Name', 'Directory', - 'File Size', 'File Modification Date/Time', 'File Access Date/Time', 'File Permissions', - 'File Type', 'File Type Extension', 'MIME Type', 'Image Width', 'Image Height', - 'Image Size', 'File Inode Change Date/Time', 'Megapixels']) + self.allowed = {'ExifTool Version Number', 'File Name', 'Directory', 'File Size', 'File Modification Date/Time', + 'File Access Date/Time', 'File Permissions', 'File Type', 'File Type Extension', 'MIME Type', + 'Image Width', 'Image Height', 'Image Size', 'File Inode Change Date/Time', 'Megapixels'} self._set_allowed() def _set_allowed(self): - ''' Virtual method. Set the allowed/harmless list of metadata - ''' + """ Virtual method. Set the allowed/harmless list of metadata + """ raise NotImplementedError def remove_all(self): - ''' Remove all metadata with help of exiftool - ''' + """ Remove all metadata with help of exiftool + """ try: if self.backup: self.create_backup_copy() @@ -38,16 +37,16 @@ class ExiftoolStripper(parser.GenericParser): return False def is_clean(self): - ''' Check if the file is clean with the help of exiftool - ''' + """ Check if the file is clean with the help of exiftool + """ return not self.get_meta() def get_meta(self): - ''' Return every harmful meta with help of exiftool. + """ Return every harmful meta with help of exiftool. Exiftool output looks like this: field name : value field name : value - ''' + """ output = subprocess.Popen(['exiftool', self.filename], stdout=subprocess.PIPE).communicate()[0] meta = {} @@ -59,9 +58,9 @@ class ExiftoolStripper(parser.GenericParser): class JpegStripper(ExiftoolStripper): - ''' Care about jpeg files with help + """ Care about jpeg files with help of exiftool - ''' + """ def _set_allowed(self): self.allowed.update(['JFIF Version', 'Resolution Unit', 'X Resolution', 'Y Resolution', 'Encoding Process', @@ -69,9 +68,9 @@ class JpegStripper(ExiftoolStripper): class PngStripper(ExiftoolStripper): - ''' Care about png files with help + """ Care about png files with help of exiftool - ''' + """ def _set_allowed(self): self.allowed.update(['Bit Depth', 'Color Type', 'Compression', 'Filter', 'Interlace', 'Palette', diff --git a/libmat/hachoir_editor/typed_field.py b/libmat/hachoir_editor/typed_field.py index 0f0427b..606d39b 100644 --- a/libmat/hachoir_editor/typed_field.py +++ b/libmat/hachoir_editor/typed_field.py @@ -5,18 +5,21 @@ from hachoir_core.field import ( isInteger, isString) from field import FakeField + class EditableField(FakeField): """ Pure virtual class used to write editable field class. """ _is_altered = False + def __init__(self, parent, name, value=None): FakeField.__init__(self, parent, name) self._value = value def _isAltered(self): return self._is_altered + is_altered = property(_isAltered) def hasValue(self): @@ -24,8 +27,10 @@ class EditableField(FakeField): def _computeSize(self): raise NotImplementedError() + def _getValue(self): return self._value + def _setValue(self, value): self._value = value @@ -34,9 +39,11 @@ class EditableField(FakeField): return self._getValue() else: return FakeField._getValue(self) + def _propSetValue(self, value): self._setValue(value) self._is_altered = True + value = property(_propGetValue, _propSetValue) def _getSize(self): @@ -44,6 +51,7 @@ class EditableField(FakeField): return self._computeSize() else: return FakeField._getSize(self) + size = property(_getSize) def _write(self, output): @@ -55,6 +63,7 @@ class EditableField(FakeField): else: return FakeField.writeInto(self, output) + class EditableFixedField(EditableField): """ Editable field with fixed size. @@ -69,8 +78,10 @@ class EditableFixedField(EditableField): def _getSize(self): return self._size + size = property(_getSize) + class EditableBits(EditableFixedField): def __init__(self, parent, name, *args): if args: @@ -90,14 +101,15 @@ class EditableBits(EditableFixedField): self._is_altered = True def _setValue(self, value): - if not(0 <= value < (1 << self._size)): + if not (0 <= value < (1 << self._size)): raise ValueError("Invalid value, must be in range %s..%s" - % (0, (1 << self._size) - 1)) + % (0, (1 << self._size) - 1)) self._value = value def _write(self, output): output.writeBits(self._size, self._value, self._parent.endian) + class EditableBytes(EditableField): def _setValue(self, value): if not value: raise ValueError( @@ -110,11 +122,12 @@ class EditableBytes(EditableField): def _write(self, output): output.writeBytes(self._value) + class EditableString(EditableField): MAX_SIZE = { - "Pascal8": (1 << 8)-1, - "Pascal16": (1 << 16)-1, - "Pascal32": (1 << 32)-1, + "Pascal8": (1 << 8) - 1, + "Pascal16": (1 << 16) - 1, + "Pascal32": (1 << 32) - 1, } def __init__(self, parent, name, *args, **kw): @@ -152,7 +165,7 @@ class EditableString(EditableField): self._value = value def _computeSize(self): - return (self._prefix_size + len(self._value) + len(self._suffix_str))*8 + return (self._prefix_size + len(self._value) + len(self._suffix_str)) * 8 def _write(self, output): if self._format in GenericString.SUFFIX_FORMAT: @@ -166,6 +179,7 @@ class EditableString(EditableField): output.writeInteger(len(self._value), False, size, self._parent.endian) output.writeBytes(self._value) + class EditableCharacter(EditableFixedField): def __init__(self, parent, name, *args): if args: @@ -190,16 +204,17 @@ class EditableCharacter(EditableFixedField): def _write(self, output): output.writeBytes(self._value) + class EditableInteger(EditableFixedField): VALID_VALUE_SIGNED = { - 8: (-(1 << 8), (1 << 8)-1), - 16: (-(1 << 15), (1 << 15)-1), - 32: (-(1 << 31), (1 << 31)-1), + 8: (-(1 << 8), (1 << 8) - 1), + 16: (-(1 << 15), (1 << 15) - 1), + 32: (-(1 << 31), (1 << 31) - 1), } VALID_VALUE_UNSIGNED = { - 8: (0, (1 << 8)-1), - 16: (0, (1 << 16)-1), - 32: (0, (1 << 32)-1) + 8: (0, (1 << 8) - 1), + 16: (0, (1 << 16) - 1), + 32: (0, (1 << 32) - 1) } def __init__(self, parent, name, *args): @@ -227,14 +242,15 @@ class EditableInteger(EditableFixedField): else: valid = self.VALID_VALUE_UNSIGNED minval, maxval = valid[self._size] - if not(minval <= value <= maxval): + if not (minval <= value <= maxval): raise ValueError("Invalid value, must be in range %s..%s" - % (minval, maxval)) + % (minval, maxval)) self._value = value def _write(self, output): output.writeInteger( - self.value, self._signed, self._size//8, self._parent.endian) + self.value, self._signed, self._size // 8, self._parent.endian) + def createEditableField(fieldset, field): if isInteger(field): @@ -250,4 +266,3 @@ def createEditableField(fieldset, field): else: cls = FakeField return cls(fieldset, field.name) - diff --git a/libmat/images.py b/libmat/images.py index 67c710f..0c4f3e0 100644 --- a/libmat/images.py +++ b/libmat/images.py @@ -1,23 +1,23 @@ -''' Takes care about pictures formats +""" Takes care about pictures formats References: - JFIF: http://www.ecma-international.org/publications/techreports/E-TR-098.htm - PNG: http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html - PNG: http://www.w3.org/TR/PNG-Chunks.html -''' +""" import parser class JpegStripper(parser.GenericParser): - ''' Represents a jpeg file. + """ Represents a jpeg file. Custom Huffman and Quantization tables are stripped: they may leak some info, and the quality loss is minor. - ''' + """ def _should_remove(self, field): - ''' Return True if the field is compromising - ''' + """ Return True if the field is compromising + """ field_list = frozenset([ 'start_image', # start of the image 'app0', # JFIF data @@ -35,11 +35,11 @@ class JpegStripper(parser.GenericParser): class PngStripper(parser.GenericParser): - ''' Represents a png file - ''' + """ Represents a png file + """ def _should_remove(self, field): - ''' Return True if the field is compromising - ''' + """ Return True if the field is compromising + """ field_list = frozenset([ 'id', 'header', # PNG header diff --git a/libmat/mat.py b/libmat/mat.py index 6e56d54..954b9a3 100644 --- a/libmat/mat.py +++ b/libmat/mat.py @@ -1,7 +1,7 @@ #!/usr/bin/env python -''' Metadata anonymisation toolkit library -''' +""" Metadata anonymisation toolkit library +""" import logging import mimetypes @@ -18,15 +18,15 @@ import libmat.exceptions __version__ = '0.5.3' __author__ = 'jvoisin' -#Silence +# Silence LOGGING_LEVEL = logging.CRITICAL hachoir_core.config.quiet = True fname = '' -#Verbose -#LOGGING_LEVEL = logging.DEBUG -#hachoir_core.config.quiet = False -#logname = 'report.log' +# Verbose +# LOGGING_LEVEL = logging.DEBUG +# hachoir_core.config.quiet = False +# logname = 'report.log' logging.basicConfig(filename=fname, level=LOGGING_LEVEL) @@ -34,10 +34,10 @@ import strippers # this is loaded here because we need LOGGING_LEVEL def get_logo(): - ''' Return the path to the logo - ''' + """ Return the path to the logo + """ if os.path.isfile(os.path.join(os.path.curdir, 'data/mat.png')): - return os.path.join(os.path.curdir,'data/mat.png') + return os.path.join(os.path.curdir, 'data/mat.png') elif os.path.isfile('/usr/share/pixmaps/mat.png'): return '/usr/share/pixmaps/mat.png' elif os.path.isfile('/usr/local/share/pixmaps/mat.png'): @@ -45,8 +45,8 @@ def get_logo(): def get_datafile_path(filename): - ''' Return the path to the given ressource - ''' + """ Return the path to the given ressource + """ if os.path.isfile(os.path.join(os.path.curdir, 'data', filename)): return os.path.join(os.path.curdir, 'data', filename) elif os.path.isfile(os.path.join('/usr/local/share/mat/', filename)): @@ -56,10 +56,10 @@ def get_datafile_path(filename): def list_supported_formats(): - ''' Return a list of all locally supported fileformat. + """ Return a list of all locally supported fileformat. It parses that FORMATS file, and removes locally non-supported formats. - ''' + """ handler = XMLParser() parser = xml.sax.make_parser() parser.setContentHandler(handler) @@ -76,9 +76,10 @@ def list_supported_formats(): class XMLParser(xml.sax.handler.ContentHandler): - ''' Parse the supported format xml, and return a corresponding + """ Parse the supported format xml, and return a corresponding list of dict - ''' + """ + def __init__(self): self.dict = {} self.list = [] @@ -86,15 +87,15 @@ class XMLParser(xml.sax.handler.ContentHandler): self.between = False def startElement(self, name, attrs): - ''' Called when entering into xml tag - ''' + """ Called when entering into xml tag + """ self.between = True self.key = name self.content = '' def endElement(self, name): - ''' Called when exiting a xml tag - ''' + """ Called when exiting a xml tag + """ if name == 'format': # leaving a fileformat section self.list.append(self.dict.copy()) self.dict.clear() @@ -104,15 +105,15 @@ class XMLParser(xml.sax.handler.ContentHandler): self.between = False def characters(self, characters): - ''' Concatenate the content between opening and closing tags - ''' + """ Concatenate the content between opening and closing tags + """ if self.between: self.content += characters def secure_remove(filename): - ''' Securely remove the file - ''' + """ Securely remove the file + """ # I want the file removed, even if it's ro try: os.chmod(filename, 220) @@ -141,9 +142,9 @@ def secure_remove(filename): def create_class_file(name, backup, **kwargs): - ''' Return a $FILETYPEStripper() class, + """ Return a $FILETYPEStripper() class, corresponding to the filetype of the given file - ''' + """ if not os.path.isfile(name): # check if the file exists logging.error('%s is not a valid file' % name) return None @@ -153,7 +154,7 @@ def create_class_file(name, backup, **kwargs): return None if not os.path.getsize(name): - #check if the file is not empty (hachoir crash on empty files) + # check if the file is not empty (hachoir crash on empty files) logging.error('%s is empty' % name) return None diff --git a/libmat/misc.py b/libmat/misc.py index 450f381..b1a551c 100644 --- a/libmat/misc.py +++ b/libmat/misc.py @@ -1,5 +1,5 @@ -''' Care about misc formats -''' +""" Care about misc formats +""" import parser @@ -7,33 +7,34 @@ from bencode import bencode class TorrentStripper(parser.GenericParser): - ''' Represent a torrent file with the help + """ Represent a torrent file with the help of the bencode lib from Petru Paler - ''' + """ + def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): super(TorrentStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) self.fields = frozenset(['announce', 'info', 'name', 'path', 'piece length', 'pieces', - 'length', 'files', 'announce-list', 'nodes', 'httpseeds', 'private', 'root hash']) + 'length', 'files', 'announce-list', 'nodes', 'httpseeds', 'private', 'root hash']) def __get_key_recursively(self, dictionary): - ''' Get recursively all keys from a dict and + """ Get recursively all keys from a dict and its subdicts - ''' + """ for i, j in list(dictionary.items()): if isinstance(j, dict): - return set([i]).union(self.__get_key_recursively(j)) - return set([i]) + return {i}.union(self.__get_key_recursively(j)) + return {i} def is_clean(self): - ''' Check if the file is clean from harmful metadata - ''' + """ Check if the file is clean from harmful metadata + """ with open(self.filename, 'r') as f: decoded = bencode.bdecode(f.read()) return self.fields.issuperset(self.__get_key_recursively(decoded)) def __get_meta_recursively(self, dictionary): - ''' Get recursively all harmful metadata - ''' + """ Get recursively all harmful metadata + """ d = dict() for i, j in list(dictionary.items()): if i not in self.fields: @@ -43,15 +44,15 @@ class TorrentStripper(parser.GenericParser): return d def get_meta(self): - ''' Return a dict with all the meta of the file - ''' + """ Return a dict with all the meta of the file + """ with open(self.filename, 'r') as f: decoded = bencode.bdecode(f.read()) return self.__get_meta_recursively(decoded) def __remove_all_recursively(self, dictionary): - ''' Remove recursively all compromizing fields - ''' + """ Remove recursively all compromizing fields + """ d = dict() for i, j in [i for i in list(dictionary.items()) if i in self.fields]: if isinstance(j, dict): @@ -61,8 +62,8 @@ class TorrentStripper(parser.GenericParser): return d def remove_all(self): - ''' Remove all comprimizing fields - ''' + """ Remove all comprimizing fields + """ decoded = '' with open(self.filename, 'r') as f: decoded = bencode.bdecode(f.read()) diff --git a/libmat/mutagenstripper.py b/libmat/mutagenstripper.py index 403c9a7..be89178 100644 --- a/libmat/mutagenstripper.py +++ b/libmat/mutagenstripper.py @@ -1,5 +1,5 @@ -''' Take care of mutagen-supported formats (audio) -''' +""" Take care of mutagen-supported formats (audio) +""" import parser @@ -23,9 +23,9 @@ class MutagenStripper(parser.GenericParser): return True def get_meta(self): - ''' + """ Return the content of the metadata block is present - ''' + """ metadata = {} if self.mfile.tags: for key, value in self.mfile.tags: diff --git a/libmat/office.py b/libmat/office.py index d020c46..bd4bd97 100644 --- a/libmat/office.py +++ b/libmat/office.py @@ -1,6 +1,6 @@ -''' Care about office's formats +""" Care about office's formats -''' +""" import logging import os @@ -21,14 +21,14 @@ import archive class OpenDocumentStripper(archive.TerminalZipStripper): - ''' An open document file is a zip, with xml file into. + """ An open document file is a zip, with xml file into. The one that interest us is meta.xml - ''' + """ def get_meta(self): - ''' Return a dict with all the meta of the file by + """ Return a dict with all the meta of the file by trying to read the meta.xml file. - ''' + """ metadata = super(OpenDocumentStripper, self).get_meta() zipin = zipfile.ZipFile(self.filename, 'r') try: @@ -49,13 +49,13 @@ class OpenDocumentStripper(archive.TerminalZipStripper): return metadata def remove_all(self): - ''' Removes metadata - ''' + """ Removes metadata + """ return super(OpenDocumentStripper, self).remove_all(ending_blacklist=['meta.xml']) def is_clean(self): - ''' Check if the file is clean from harmful metadatas - ''' + """ Check if the file is clean from harmful metadatas + """ clean_super = super(OpenDocumentStripper, self).is_clean() if clean_super is False: return False @@ -70,20 +70,21 @@ class OpenDocumentStripper(archive.TerminalZipStripper): class OpenXmlStripper(archive.TerminalZipStripper): - ''' Represent an office openxml document, which is like + """ Represent an office openxml document, which is like an opendocument format, with some tricky stuff added. It contains mostly xml, but can have media blobs, crap, ... (I don't like this format.) - ''' + """ + def remove_all(self): return super(OpenXmlStripper, self).remove_all( - beginning_blacklist=('docProps/'), whitelist=('.rels')) + beginning_blacklist='docProps/', whitelist='.rels') def is_clean(self): - ''' Check if the file is clean from harmful metadatas. + """ Check if the file is clean from harmful metadatas. This implementation is faster than something like "return this.get_meta() == {}". - ''' + """ clean_super = super(OpenXmlStripper, self).is_clean() if clean_super is False: return False @@ -96,8 +97,8 @@ class OpenXmlStripper(archive.TerminalZipStripper): return True def get_meta(self): - ''' Return a dict with all the meta of the file - ''' + """ Return a dict with all the meta of the file + """ metadata = super(OpenXmlStripper, self).get_meta() zipin = zipfile.ZipFile(self.filename, 'r') @@ -109,8 +110,9 @@ class OpenXmlStripper(archive.TerminalZipStripper): class PdfStripper(parser.GenericParser): - ''' Represent a PDF file - ''' + """ Represent a PDF file + """ + def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): super(PdfStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) self.uri = 'file://' + os.path.abspath(self.filename) @@ -121,16 +123,16 @@ class PdfStripper(parser.GenericParser): self.pdf_quality = False self.meta_list = frozenset(['title', 'author', 'subject', - 'keywords', 'creator', 'producer', 'metadata']) + 'keywords', 'creator', 'producer', 'metadata']) def is_clean(self): - ''' Check if the file is clean from harmful metadatas - ''' + """ Check if the file is clean from harmful metadatas + """ document = Poppler.Document.new_from_file(self.uri, self.password) return not any(document.get_property(key) for key in self.meta_list) def remove_all(self): - ''' Opening the PDF with poppler, then doing a render + """ Opening the PDF with poppler, then doing a render on a cairo pdfsurface for each pages. http://cairographics.org/documentation/pycairo/2/ @@ -138,7 +140,7 @@ class PdfStripper(parser.GenericParser): The use of an intermediate tempfile is necessary because python-cairo segfaults on unicode. See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=699457 - ''' + """ document = Poppler.Document.new_from_file(self.uri, self.password) try: output = tempfile.mkstemp()[1] @@ -169,6 +171,7 @@ class PdfStripper(parser.GenericParser): try: import pdfrw # For now, poppler cannot write meta, so we must use pdfrw + logging.debug('Removing %s\'s superficial metadata' % self.filename) trailer = pdfrw.PdfReader(self.output) trailer.Info.Producer = None @@ -183,8 +186,8 @@ class PdfStripper(parser.GenericParser): return True def get_meta(self): - ''' Return a dict with all the meta of the file - ''' + """ Return a dict with all the meta of the file + """ document = Poppler.Document.new_from_file(self.uri, self.password) metadata = {} for key in self.meta_list: diff --git a/libmat/parser.py b/libmat/parser.py index 1765da8..eed3140 100644 --- a/libmat/parser.py +++ b/libmat/parser.py @@ -1,5 +1,5 @@ -''' Parent class of all parser -''' +""" Parent class of all parser +""" import os import shutil @@ -22,8 +22,8 @@ FIELD = object() class GenericParser(object): - ''' Parent class of all parsers - ''' + """ Parent class of all parsers + """ def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): self.filename = '' self.parser = parser @@ -40,15 +40,15 @@ class GenericParser(object): self.output = hachoir_core.cmd_line.unicodeFilename(output) def __del__(self): - ''' Remove tempfile if it was not used - ''' + """ Remove tempfile if it was not used + """ if os.path.exists(self.output): mat.secure_remove(self.output) def is_clean(self): - ''' + """ Check if the file is clean from harmful metadatas - ''' + """ for field in self.editor: if self._should_remove(field): return self._is_clean(self.editor) @@ -65,16 +65,16 @@ class GenericParser(object): return True def remove_all(self): - ''' Remove all compromising fields - ''' + """ Remove all compromising fields + """ state = self._remove_all(self.editor) hachoir_core.field.writeIntoFile(self.editor, self.output) self.do_backup() return state def _remove_all(self, fieldset): - ''' Recursive way to handle tree metadatas - ''' + """ Recursive way to handle tree metadatas + """ try: for field in fieldset: remove = self._should_remove(field) @@ -87,20 +87,20 @@ class GenericParser(object): return False def _remove(self, fieldset, field): - ''' Delete the given field - ''' + """ Delete the given field + """ del fieldset[field] def get_meta(self): - ''' Return a dict with all the meta of the file - ''' + """ Return a dict with all the meta of the file + """ metadata = {} self._get_meta(self.editor, metadata) return metadata def _get_meta(self, fieldset, metadata): - ''' Recursive way to handle tree metadatas - ''' + """ Recursive way to handle tree metadatas + """ for field in fieldset: remove = self._should_remove(field) if remove: @@ -112,22 +112,22 @@ class GenericParser(object): self._get_meta(field, None) def _should_remove(self, key): - ''' Return True if the field is compromising + """ Return True if the field is compromising abstract method - ''' + """ raise NotImplementedError def create_backup_copy(self): - ''' Create a backup copy - ''' + """ Create a backup copy + """ shutil.copy2(self.filename, self.filename + '.bak') def do_backup(self): - ''' Keep a backup of the file if asked. + """ Keep a backup of the file if asked. The process of double-renaming is not very elegant, but it greatly simplify new strippers implementation. - ''' + """ if self.backup: shutil.move(self.filename, self.filename + '.bak') else: diff --git a/libmat/strippers.py b/libmat/strippers.py index d873a39..008442e 100644 --- a/libmat/strippers.py +++ b/libmat/strippers.py @@ -1,5 +1,5 @@ -''' Manage which fileformat can be processed -''' +""" Manage which fileformat can be processed +""" import archive import audio -- cgit v1.3