diff options
| author | jvoisin | 2018-05-16 22:36:59 +0200 |
|---|---|---|
| committer | jvoisin | 2018-05-16 22:36:59 +0200 |
| commit | fa7d18784ca18c5c70bbb36cde9d5915843456c3 (patch) | |
| tree | 3c7b36ea63ecf05ba6fc777c4f3a3a74fa60a450 | |
| parent | 0354c3b7e3a455cce130b6226926436a21128641 (diff) | |
Do a pylint pass
| -rwxr-xr-x | main.py | 20 | ||||
| -rw-r--r-- | src/__init__.py | 3 | ||||
| -rw-r--r-- | src/audio.py | 2 | ||||
| -rw-r--r-- | src/harmless.py | 1 | ||||
| -rw-r--r-- | src/images.py | 59 | ||||
| -rw-r--r-- | src/office.py | 27 | ||||
| -rw-r--r-- | src/parser_factory.py | 2 | ||||
| -rw-r--r-- | src/pdf.py | 12 | ||||
| -rw-r--r-- | src/torrent.py | 42 |
9 files changed, 89 insertions, 79 deletions
| @@ -12,7 +12,7 @@ from src import parser_factory, unsupported_extensions | |||
| 12 | 12 | ||
| 13 | __version__ = '0.1.0' | 13 | __version__ = '0.1.0' |
| 14 | 14 | ||
| 15 | def __check_file(filename:str, mode:int = os.R_OK) -> bool: | 15 | def __check_file(filename: str, mode: int = os.R_OK) -> bool: |
| 16 | if not os.path.isfile(filename): | 16 | if not os.path.isfile(filename): |
| 17 | print("[-] %s is not a regular file." % filename) | 17 | print("[-] %s is not a regular file." % filename) |
| 18 | return False | 18 | return False |
| @@ -26,9 +26,9 @@ def create_arg_parser(): | |||
| 26 | parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2') | 26 | parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2') |
| 27 | parser.add_argument('files', nargs='*') | 27 | parser.add_argument('files', nargs='*') |
| 28 | parser.add_argument('-v', '--version', action='version', | 28 | parser.add_argument('-v', '--version', action='version', |
| 29 | version='MAT2 %s' % __version__) | 29 | version='MAT2 %s' % __version__) |
| 30 | parser.add_argument('-l', '--list', action='store_true', | 30 | parser.add_argument('-l', '--list', action='store_true', |
| 31 | help='list all supported fileformats') | 31 | help='list all supported fileformats') |
| 32 | 32 | ||
| 33 | info = parser.add_mutually_exclusive_group() | 33 | info = parser.add_mutually_exclusive_group() |
| 34 | info.add_argument('-c', '--check', action='store_true', | 34 | info.add_argument('-c', '--check', action='store_true', |
| @@ -40,7 +40,7 @@ def create_arg_parser(): | |||
| 40 | return parser | 40 | return parser |
| 41 | 41 | ||
| 42 | 42 | ||
| 43 | def show_meta(filename:str): | 43 | def show_meta(filename: str): |
| 44 | if not __check_file(filename): | 44 | if not __check_file(filename): |
| 45 | return | 45 | return |
| 46 | 46 | ||
| @@ -48,18 +48,18 @@ def show_meta(filename:str): | |||
| 48 | if p is None: | 48 | if p is None: |
| 49 | print("[-] %s's format (%s) is not supported" % (filename, mtype)) | 49 | print("[-] %s's format (%s) is not supported" % (filename, mtype)) |
| 50 | return | 50 | return |
| 51 | |||
| 51 | print("[+] Metadata for %s:" % filename) | 52 | print("[+] Metadata for %s:" % filename) |
| 52 | for k,v in p.get_meta().items(): | 53 | for k, v in p.get_meta().items(): |
| 53 | try: # FIXME this is ugly. | 54 | try: # FIXME this is ugly. |
| 54 | print(" %s: %s" % (k, v)) | 55 | print(" %s: %s" % (k, v)) |
| 55 | except UnicodeEncodeError: | 56 | except UnicodeEncodeError: |
| 56 | print(" %s: harmful content" % k) | 57 | print(" %s: harmful content" % k) |
| 57 | 58 | ||
| 58 | 59 | def clean_meta(params: Tuple[str, bool]) -> bool: | |
| 59 | def clean_meta(params:Tuple[str, bool]) -> bool: | ||
| 60 | filename, is_lightweigth = params | 60 | filename, is_lightweigth = params |
| 61 | if not __check_file(filename, os.R_OK|os.W_OK): | 61 | if not __check_file(filename, os.R_OK|os.W_OK): |
| 62 | return | 62 | return False |
| 63 | 63 | ||
| 64 | p, mtype = parser_factory.get_parser(filename) | 64 | p, mtype = parser_factory.get_parser(filename) |
| 65 | if p is None: | 65 | if p is None: |
| @@ -102,12 +102,12 @@ def main(): | |||
| 102 | if not args.list: | 102 | if not args.list: |
| 103 | return arg_parser.print_help() | 103 | return arg_parser.print_help() |
| 104 | show_parsers() | 104 | show_parsers() |
| 105 | return | 105 | return 0 |
| 106 | 106 | ||
| 107 | elif args.show: | 107 | elif args.show: |
| 108 | for f in __get_files_recursively(args.files): | 108 | for f in __get_files_recursively(args.files): |
| 109 | show_meta(f) | 109 | show_meta(f) |
| 110 | return | 110 | return 0 |
| 111 | 111 | ||
| 112 | else: | 112 | else: |
| 113 | p = multiprocessing.Pool() | 113 | p = multiprocessing.Pool() |
diff --git a/src/__init__.py b/src/__init__.py index 3f5c478..07d3036 100644 --- a/src/__init__.py +++ b/src/__init__.py | |||
| @@ -2,4 +2,5 @@ | |||
| 2 | 2 | ||
| 3 | # A set of extension that aren't supported, despite matching a supported mimetype | 3 | # A set of extension that aren't supported, despite matching a supported mimetype |
| 4 | unsupported_extensions = set(['bat', 'c', 'h', 'ksh', 'pl', 'txt', 'asc', | 4 | unsupported_extensions = set(['bat', 'c', 'h', 'ksh', 'pl', 'txt', 'asc', |
| 5 | 'text', 'pot', 'brf', 'srt', 'rdf', 'wsdl', 'xpdl', 'xsl', 'xsd']) | 5 | 'text', 'pot', 'brf', 'srt', 'rdf', 'wsdl', |
| 6 | 'xpdl', 'xsl', 'xsd']) | ||
diff --git a/src/audio.py b/src/audio.py index 4a385b2..3a6aa79 100644 --- a/src/audio.py +++ b/src/audio.py | |||
| @@ -9,7 +9,7 @@ class MutagenParser(abstract.AbstractParser): | |||
| 9 | def get_meta(self): | 9 | def get_meta(self): |
| 10 | f = mutagen.File(self.filename) | 10 | f = mutagen.File(self.filename) |
| 11 | if f.tags: | 11 | if f.tags: |
| 12 | return {k:', '.join(v) for k,v in f.tags.items()} | 12 | return {k:', '.join(v) for k, v in f.tags.items()} |
| 13 | return {} | 13 | return {} |
| 14 | 14 | ||
| 15 | def remove_all(self): | 15 | def remove_all(self): |
diff --git a/src/harmless.py b/src/harmless.py index fbc2897..aa00582 100644 --- a/src/harmless.py +++ b/src/harmless.py | |||
| @@ -6,6 +6,7 @@ class HarmlessParser(abstract.AbstractParser): | |||
| 6 | mimetypes = {'application/xml', 'text/plain'} | 6 | mimetypes = {'application/xml', 'text/plain'} |
| 7 | 7 | ||
| 8 | def __init__(self, filename: str): | 8 | def __init__(self, filename: str): |
| 9 | super().__init__(filename) | ||
| 9 | self.filename = filename | 10 | self.filename = filename |
| 10 | self.output_filename = filename | 11 | self.output_filename = filename |
| 11 | 12 | ||
diff --git a/src/images.py b/src/images.py index 6cc3dfe..c84952a 100644 --- a/src/images.py +++ b/src/images.py | |||
| @@ -14,11 +14,12 @@ from . import abstract | |||
| 14 | class PNGParser(abstract.AbstractParser): | 14 | class PNGParser(abstract.AbstractParser): |
| 15 | mimetypes = {'image/png', } | 15 | mimetypes = {'image/png', } |
| 16 | meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', | 16 | meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', |
| 17 | 'Directory', 'FileSize', 'FileModifyDate', 'FileAccessDate', | 17 | 'Directory', 'FileSize', 'FileModifyDate', |
| 18 | "FileInodeChangeDate", 'FilePermissions', 'FileType', | 18 | 'FileAccessDate', 'FileInodeChangeDate', |
| 19 | 'FileTypeExtension', 'MIMEType', 'ImageWidth', 'BitDepth', 'ColorType', | 19 | 'FilePermissions', 'FileType', 'FileTypeExtension', |
| 20 | 'Compression', 'Filter', 'Interlace', 'BackgroundColor', 'ImageSize', | 20 | 'MIMEType', 'ImageWidth', 'BitDepth', 'ColorType', |
| 21 | 'Megapixels', 'ImageHeight'} | 21 | 'Compression', 'Filter', 'Interlace', 'BackgroundColor', |
| 22 | 'ImageSize', 'Megapixels', 'ImageHeight'} | ||
| 22 | 23 | ||
| 23 | def __init__(self, filename): | 24 | def __init__(self, filename): |
| 24 | super().__init__(filename) | 25 | super().__init__(filename) |
| @@ -63,36 +64,38 @@ class GdkPixbufAbstractParser(abstract.AbstractParser): | |||
| 63 | class JPGParser(GdkPixbufAbstractParser): | 64 | class JPGParser(GdkPixbufAbstractParser): |
| 64 | mimetypes = {'image/jpeg'} | 65 | mimetypes = {'image/jpeg'} |
| 65 | meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', | 66 | meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', |
| 66 | 'Directory', 'FileSize', 'FileModifyDate', 'FileAccessDate', | 67 | 'Directory', 'FileSize', 'FileModifyDate', |
| 67 | "FileInodeChangeDate", 'FilePermissions', 'FileType', | 68 | 'FileAccessDate', "FileInodeChangeDate", |
| 68 | 'FileTypeExtension', 'MIMEType', 'ImageWidth', | 69 | 'FilePermissions', 'FileType', 'FileTypeExtension', |
| 69 | 'ImageSize', 'BitsPerSample', 'ColorComponents', 'EncodingProcess', | 70 | 'MIMEType', 'ImageWidth', 'ImageSize', 'BitsPerSample', |
| 70 | 'JFIFVersion', 'ResolutionUnit', 'XResolution', 'YCbCrSubSampling', | 71 | 'ColorComponents', 'EncodingProcess', 'JFIFVersion', |
| 71 | 'YResolution', 'Megapixels', 'ImageHeight'} | 72 | 'ResolutionUnit', 'XResolution', 'YCbCrSubSampling', |
| 73 | 'YResolution', 'Megapixels', 'ImageHeight'} | ||
| 72 | 74 | ||
| 73 | 75 | ||
| 74 | class TiffParser(GdkPixbufAbstractParser): | 76 | class TiffParser(GdkPixbufAbstractParser): |
| 75 | mimetypes = {'image/tiff'} | 77 | mimetypes = {'image/tiff'} |
| 76 | meta_whitelist = {'Compression', 'ExifByteOrder', 'ExtraSamples', | 78 | meta_whitelist = {'Compression', 'ExifByteOrder', 'ExtraSamples', |
| 77 | 'FillOrder', 'PhotometricInterpretation', 'PlanarConfiguration', | 79 | 'FillOrder', 'PhotometricInterpretation', |
| 78 | 'RowsPerStrip', 'SamplesPerPixel', 'StripByteCounts', | 80 | 'PlanarConfiguration', 'RowsPerStrip', 'SamplesPerPixel', |
| 79 | 'StripOffsets', 'BitsPerSample', 'Directory', 'ExifToolVersion', | 81 | 'StripByteCounts', 'StripOffsets', 'BitsPerSample', |
| 80 | 'FileAccessDate', 'FileInodeChangeDate', 'FileModifyDate', | 82 | 'Directory', 'ExifToolVersion', 'FileAccessDate', |
| 81 | 'FileName', 'FilePermissions', 'FileSize', 'FileType', | 83 | 'FileInodeChangeDate', 'FileModifyDate', 'FileName', |
| 82 | 'FileTypeExtension', 'ImageHeight', 'ImageSize', 'ImageWidth', | 84 | 'FilePermissions', 'FileSize', 'FileType', |
| 83 | 'MIMEType', 'Megapixels', 'SourceFile'} | 85 | 'FileTypeExtension', 'ImageHeight', 'ImageSize', |
| 86 | 'ImageWidth', 'MIMEType', 'Megapixels', 'SourceFile'} | ||
| 84 | 87 | ||
| 85 | 88 | ||
| 86 | class BMPParser(GdkPixbufAbstractParser): | 89 | class BMPParser(GdkPixbufAbstractParser): |
| 87 | mimetypes = {'image/x-ms-bmp'} | 90 | mimetypes = {'image/x-ms-bmp'} |
| 88 | meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory', | 91 | meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory', |
| 89 | 'FileSize', 'FileModifyDate', 'FileAccessDate', | 92 | 'FileSize', 'FileModifyDate', 'FileAccessDate', |
| 90 | 'FileInodeChangeDate', 'FilePermissions', 'FileType', | 93 | 'FileInodeChangeDate', 'FilePermissions', 'FileType', |
| 91 | 'FileTypeExtension', 'MIMEType', 'BMPVersion', 'ImageWidth', | 94 | 'FileTypeExtension', 'MIMEType', 'BMPVersion', |
| 92 | 'ImageHeight', 'Planes', 'BitDepth', 'Compression', 'ImageLength', | 95 | 'ImageWidth', 'ImageHeight', 'Planes', 'BitDepth', |
| 93 | 'PixelsPerMeterX', 'PixelsPerMeterY', 'NumColors', | 96 | 'Compression', 'ImageLength', 'PixelsPerMeterX', |
| 94 | 'NumImportantColors', 'RedMask', 'GreenMask', 'BlueMask', | 97 | 'PixelsPerMeterY', 'NumColors', 'NumImportantColors', |
| 95 | 'AlphaMask', 'ColorSpace', 'RedEndpoint', 'GreenEndpoint', | 98 | 'RedMask', 'GreenMask', 'BlueMask', 'AlphaMask', |
| 96 | 'BlueEndpoint', 'GammaRed', 'GammaGreen', 'GammaBlue', 'ImageSize', | 99 | 'ColorSpace', 'RedEndpoint', 'GreenEndpoint', |
| 97 | 'Megapixels'} | 100 | 'BlueEndpoint', 'GammaRed', 'GammaGreen', 'GammaBlue', |
| 98 | 101 | 'ImageSize', 'Megapixels'} | |
diff --git a/src/office.py b/src/office.py index da6168e..749fc7d 100644 --- a/src/office.py +++ b/src/office.py | |||
| @@ -9,14 +9,14 @@ from . import abstract, parser_factory | |||
| 9 | 9 | ||
| 10 | 10 | ||
| 11 | class ArchiveBasedAbstractParser(abstract.AbstractParser): | 11 | class ArchiveBasedAbstractParser(abstract.AbstractParser): |
| 12 | def _clean_zipinfo(self, zipinfo:zipfile.ZipInfo) -> zipfile.ZipInfo: | 12 | def _clean_zipinfo(self, zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo: |
| 13 | zipinfo.compress_type = zipfile.ZIP_DEFLATED | 13 | zipinfo.compress_type = zipfile.ZIP_DEFLATED |
| 14 | zipinfo.create_system = 3 # Linux | 14 | zipinfo.create_system = 3 # Linux |
| 15 | zipinfo.comment = b'' | 15 | zipinfo.comment = b'' |
| 16 | zipinfo.date_time = (1980, 1, 1, 0, 0, 0) | 16 | zipinfo.date_time = (1980, 1, 1, 0, 0, 0) |
| 17 | return zipinfo | 17 | return zipinfo |
| 18 | 18 | ||
| 19 | def _get_zipinfo_meta(self, zipinfo:zipfile.ZipInfo) -> dict: | 19 | def _get_zipinfo_meta(self, zipinfo: zipfile.ZipInfo) -> dict: |
| 20 | metadata = {} | 20 | metadata = {} |
| 21 | if zipinfo.create_system == 3: | 21 | if zipinfo.create_system == 3: |
| 22 | #metadata['create_system'] = 'Linux' | 22 | #metadata['create_system'] = 'Linux' |
| @@ -35,7 +35,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): | |||
| 35 | return metadata | 35 | return metadata |
| 36 | 36 | ||
| 37 | 37 | ||
| 38 | def _clean_internal_file(self, item:zipfile.ZipInfo, temp_folder:str, zin:zipfile.ZipFile, zout:zipfile.ZipFile): | 38 | def _clean_internal_file(self, item: zipfile.ZipInfo, temp_folder: str, |
| 39 | zin: zipfile.ZipFile, zout: zipfile.ZipFile): | ||
| 39 | zin.extract(member=item, path=temp_folder) | 40 | zin.extract(member=item, path=temp_folder) |
| 40 | tmp_parser, mtype = parser_factory.get_parser(os.path.join(temp_folder, item.filename)) | 41 | tmp_parser, mtype = parser_factory.get_parser(os.path.join(temp_folder, item.filename)) |
| 41 | if not tmp_parser: | 42 | if not tmp_parser: |
| @@ -50,9 +51,9 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): | |||
| 50 | 51 | ||
| 51 | class MSOfficeParser(ArchiveBasedAbstractParser): | 52 | class MSOfficeParser(ArchiveBasedAbstractParser): |
| 52 | mimetypes = { | 53 | mimetypes = { |
| 53 | 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', | 54 | 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', |
| 54 | 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', | 55 | 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', |
| 55 | 'application/vnd.openxmlformats-officedocument.presentationml.presentation' | 56 | 'application/vnd.openxmlformats-officedocument.presentationml.presentation' |
| 56 | } | 57 | } |
| 57 | files_to_keep = {'_rels/.rels', 'word/_rels/document.xml.rels'} | 58 | files_to_keep = {'_rels/.rels', 'word/_rels/document.xml.rels'} |
| 58 | 59 | ||
| @@ -103,13 +104,13 @@ class MSOfficeParser(ArchiveBasedAbstractParser): | |||
| 103 | 104 | ||
| 104 | class LibreOfficeParser(ArchiveBasedAbstractParser): | 105 | class LibreOfficeParser(ArchiveBasedAbstractParser): |
| 105 | mimetypes = { | 106 | mimetypes = { |
| 106 | 'application/vnd.oasis.opendocument.text', | 107 | 'application/vnd.oasis.opendocument.text', |
| 107 | 'application/vnd.oasis.opendocument.spreadsheet', | 108 | 'application/vnd.oasis.opendocument.spreadsheet', |
| 108 | 'application/vnd.oasis.opendocument.presentation', | 109 | 'application/vnd.oasis.opendocument.presentation', |
| 109 | 'application/vnd.oasis.opendocument.graphics', | 110 | 'application/vnd.oasis.opendocument.graphics', |
| 110 | 'application/vnd.oasis.opendocument.chart', | 111 | 'application/vnd.oasis.opendocument.chart', |
| 111 | 'application/vnd.oasis.opendocument.formula', | 112 | 'application/vnd.oasis.opendocument.formula', |
| 112 | 'application/vnd.oasis.opendocument.image', | 113 | 'application/vnd.oasis.opendocument.image', |
| 113 | } | 114 | } |
| 114 | 115 | ||
| 115 | def get_meta(self): | 116 | def get_meta(self): |
diff --git a/src/parser_factory.py b/src/parser_factory.py index 2c30659..48616b0 100644 --- a/src/parser_factory.py +++ b/src/parser_factory.py | |||
| @@ -2,10 +2,10 @@ import os | |||
| 2 | import mimetypes | 2 | import mimetypes |
| 3 | import importlib | 3 | import importlib |
| 4 | import pkgutil | 4 | import pkgutil |
| 5 | from typing import TypeVar | ||
| 5 | 6 | ||
| 6 | from . import abstract, unsupported_extensions | 7 | from . import abstract, unsupported_extensions |
| 7 | 8 | ||
| 8 | from typing import TypeVar | ||
| 9 | 9 | ||
| 10 | T = TypeVar('T', bound='abstract.AbstractParser') | 10 | T = TypeVar('T', bound='abstract.AbstractParser') |
| 11 | 11 | ||
| @@ -21,8 +21,8 @@ logging.basicConfig(level=logging.DEBUG) | |||
| 21 | class PDFParser(abstract.AbstractParser): | 21 | class PDFParser(abstract.AbstractParser): |
| 22 | mimetypes = {'application/pdf', } | 22 | mimetypes = {'application/pdf', } |
| 23 | meta_list = {'author', 'creation-date', 'creator', 'format', 'keywords', | 23 | meta_list = {'author', 'creation-date', 'creator', 'format', 'keywords', |
| 24 | 'metadata', 'mod-date', 'producer', 'subject', 'title', | 24 | 'metadata', 'mod-date', 'producer', 'subject', 'title', |
| 25 | 'viewer-preferences'} | 25 | 'viewer-preferences'} |
| 26 | 26 | ||
| 27 | def __init__(self, filename): | 27 | def __init__(self, filename): |
| 28 | super().__init__(filename) | 28 | super().__init__(filename) |
| @@ -103,7 +103,8 @@ class PDFParser(abstract.AbstractParser): | |||
| 103 | 103 | ||
| 104 | return True | 104 | return True |
| 105 | 105 | ||
| 106 | def __remove_superficial_meta(self, in_file:str, out_file: str) -> bool: | 106 | @staticmethod |
| 107 | def __remove_superficial_meta(in_file: str, out_file: str) -> bool: | ||
| 107 | document = Poppler.Document.new_from_file('file://' + in_file) | 108 | document = Poppler.Document.new_from_file('file://' + in_file) |
| 108 | document.set_producer('') | 109 | document.set_producer('') |
| 109 | document.set_creator('') | 110 | document.set_creator('') |
| @@ -112,7 +113,8 @@ class PDFParser(abstract.AbstractParser): | |||
| 112 | return True | 113 | return True |
| 113 | 114 | ||
| 114 | 115 | ||
| 115 | def __parse_metadata_field(self, data:str) -> dict: | 116 | @staticmethod |
| 117 | def __parse_metadata_field(data: str) -> dict: | ||
| 116 | metadata = {} | 118 | metadata = {} |
| 117 | for (_, key, value) in re.findall(r"<(xmp|pdfx|pdf|xmpMM):(.+)>(.+)</\1:\2>", data, re.I): | 119 | for (_, key, value) in re.findall(r"<(xmp|pdfx|pdf|xmpMM):(.+)>(.+)</\1:\2>", data, re.I): |
| 118 | metadata[key] = value | 120 | metadata[key] = value |
| @@ -128,6 +130,6 @@ class PDFParser(abstract.AbstractParser): | |||
| 128 | if document.get_property(key): | 130 | if document.get_property(key): |
| 129 | metadata[key] = document.get_property(key) | 131 | metadata[key] = document.get_property(key) |
| 130 | if 'metadata' in metadata: | 132 | if 'metadata' in metadata: |
| 131 | parsed_meta = self.__parse_metadata_field(metadata['metadata']) | 133 | parsed_meta = self.__parse_metadata_field(metadata['metadata']) |
| 132 | return {**metadata, **parsed_meta} | 134 | return {**metadata, **parsed_meta} |
| 133 | return metadata | 135 | return metadata |
diff --git a/src/torrent.py b/src/torrent.py index bdf83ce..cb4b5e3 100644 --- a/src/torrent.py +++ b/src/torrent.py | |||
| @@ -11,7 +11,7 @@ class TorrentParser(abstract.AbstractParser): | |||
| 11 | d = _BencodeHandler().bdecode(f.read()) | 11 | d = _BencodeHandler().bdecode(f.read()) |
| 12 | if d is None: | 12 | if d is None: |
| 13 | return {'Unknown meta': 'Unable to parse torrent file "%s".' % self.filename} | 13 | return {'Unknown meta': 'Unable to parse torrent file "%s".' % self.filename} |
| 14 | for k,v in d.items(): | 14 | for k, v in d.items(): |
| 15 | if k not in self.whitelist: | 15 | if k not in self.whitelist: |
| 16 | metadata[k.decode('utf-8')] = v | 16 | metadata[k.decode('utf-8')] = v |
| 17 | return metadata | 17 | return metadata |
| @@ -23,7 +23,7 @@ class TorrentParser(abstract.AbstractParser): | |||
| 23 | d = _BencodeHandler().bdecode(f.read()) | 23 | d = _BencodeHandler().bdecode(f.read()) |
| 24 | if d is None: | 24 | if d is None: |
| 25 | return False | 25 | return False |
| 26 | for k,v in d.items(): | 26 | for k, v in d.items(): |
| 27 | if k in self.whitelist: | 27 | if k in self.whitelist: |
| 28 | cleaned[k] = v | 28 | cleaned[k] = v |
| 29 | with open(self.output_filename, 'wb') as f: | 29 | with open(self.output_filename, 'wb') as f: |
| @@ -39,21 +39,22 @@ class _BencodeHandler(object): | |||
| 39 | """ | 39 | """ |
| 40 | def __init__(self): | 40 | def __init__(self): |
| 41 | self.__decode_func = { | 41 | self.__decode_func = { |
| 42 | ord('d'): self.__decode_dict, | 42 | ord('d'): self.__decode_dict, |
| 43 | ord('i'): self.__decode_int, | 43 | ord('i'): self.__decode_int, |
| 44 | ord('l'): self.__decode_list, | 44 | ord('l'): self.__decode_list, |
| 45 | } | 45 | } |
| 46 | for i in range(0, 10): | 46 | for i in range(0, 10): |
| 47 | self.__decode_func[ord(str(i))] = self.__decode_string | 47 | self.__decode_func[ord(str(i))] = self.__decode_string |
| 48 | 48 | ||
| 49 | self.__encode_func = { | 49 | self.__encode_func = { |
| 50 | bytes: self.__encode_string, | 50 | bytes: self.__encode_string, |
| 51 | dict: self.__encode_dict, | 51 | dict: self.__encode_dict, |
| 52 | int: self.__encode_int, | 52 | int: self.__encode_int, |
| 53 | list: self.__encode_list, | 53 | list: self.__encode_list, |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | def __decode_int(self, s:str) -> (int, str): | 56 | @staticmethod |
| 57 | def __decode_int(s: str) -> (int, str): | ||
| 57 | s = s[1:] | 58 | s = s[1:] |
| 58 | next_idx = s.index(b'e') | 59 | next_idx = s.index(b'e') |
| 59 | if s.startswith(b'-0'): | 60 | if s.startswith(b'-0'): |
| @@ -62,7 +63,8 @@ class _BencodeHandler(object): | |||
| 62 | raise ValueError # no leading zero except for zero itself | 63 | raise ValueError # no leading zero except for zero itself |
| 63 | return int(s[:next_idx]), s[next_idx+1:] | 64 | return int(s[:next_idx]), s[next_idx+1:] |
| 64 | 65 | ||
| 65 | def __decode_string(self, s:str) -> (str, str): | 66 | @staticmethod |
| 67 | def __decode_string(s: str) -> (str, str): | ||
| 66 | sep = s.index(b':') | 68 | sep = s.index(b':') |
| 67 | str_len = int(s[:sep]) | 69 | str_len = int(s[:sep]) |
| 68 | if str_len < 0: | 70 | if str_len < 0: |
| @@ -72,7 +74,7 @@ class _BencodeHandler(object): | |||
| 72 | s = s[1:] | 74 | s = s[1:] |
| 73 | return s[sep:sep+str_len], s[sep+str_len:] | 75 | return s[sep:sep+str_len], s[sep+str_len:] |
| 74 | 76 | ||
| 75 | def __decode_list(self, s:str) -> (list, str): | 77 | def __decode_list(self, s: str) -> (list, str): |
| 76 | r = list() | 78 | r = list() |
| 77 | s = s[1:] # skip leading `l` | 79 | s = s[1:] # skip leading `l` |
| 78 | while s[0] != ord('e'): | 80 | while s[0] != ord('e'): |
| @@ -80,7 +82,7 @@ class _BencodeHandler(object): | |||
| 80 | r.append(v) | 82 | r.append(v) |
| 81 | return r, s[1:] | 83 | return r, s[1:] |
| 82 | 84 | ||
| 83 | def __decode_dict(self, s:str) -> (dict, str): | 85 | def __decode_dict(self, s: str) -> (dict, str): |
| 84 | r = dict() | 86 | r = dict() |
| 85 | s = s[1:] # skip leading `d` | 87 | s = s[1:] # skip leading `d` |
| 86 | while s[0] != ord(b'e'): | 88 | while s[0] != ord(b'e'): |
| @@ -89,30 +91,30 @@ class _BencodeHandler(object): | |||
| 89 | return r, s[1:] | 91 | return r, s[1:] |
| 90 | 92 | ||
| 91 | @staticmethod | 93 | @staticmethod |
| 92 | def __encode_int(x:str) -> bytes: | 94 | def __encode_int(x: str) -> bytes: |
| 93 | return b'i' + bytes(str(x), 'utf-8') + b'e' | 95 | return b'i' + bytes(str(x), 'utf-8') + b'e' |
| 94 | 96 | ||
| 95 | @staticmethod | 97 | @staticmethod |
| 96 | def __encode_string(x:str) -> bytes: | 98 | def __encode_string(x: str) -> bytes: |
| 97 | return bytes((str(len(x))), 'utf-8') + b':' + x | 99 | return bytes((str(len(x))), 'utf-8') + b':' + x |
| 98 | 100 | ||
| 99 | def __encode_list(self, x:str) -> bytes: | 101 | def __encode_list(self, x: str) -> bytes: |
| 100 | ret = b'' | 102 | ret = b'' |
| 101 | for i in x: | 103 | for i in x: |
| 102 | ret += self.__encode_func[type(i)](i) | 104 | ret += self.__encode_func[type(i)](i) |
| 103 | return b'l' + ret + b'e' | 105 | return b'l' + ret + b'e' |
| 104 | 106 | ||
| 105 | def __encode_dict(self, x:str) -> bytes: | 107 | def __encode_dict(self, x: str) -> bytes: |
| 106 | ret = b'' | 108 | ret = b'' |
| 107 | for k, v in sorted(x.items()): | 109 | for k, v in sorted(x.items()): |
| 108 | ret += self.__encode_func[type(k)](k) | 110 | ret += self.__encode_func[type(k)](k) |
| 109 | ret += self.__encode_func[type(v)](v) | 111 | ret += self.__encode_func[type(v)](v) |
| 110 | return b'd' + ret + b'e' | 112 | return b'd' + ret + b'e' |
| 111 | 113 | ||
| 112 | def bencode(self, s:str) -> bytes: | 114 | def bencode(self, s: str) -> bytes: |
| 113 | return self.__encode_func[type(s)](s) | 115 | return self.__encode_func[type(s)](s) |
| 114 | 116 | ||
| 115 | def bdecode(self, s:str): | 117 | def bdecode(self, s: str): |
| 116 | try: | 118 | try: |
| 117 | r, l = self.__decode_func[s[0]](s) | 119 | r, l = self.__decode_func[s[0]](s) |
| 118 | except (IndexError, KeyError, ValueError) as e: | 120 | except (IndexError, KeyError, ValueError) as e: |
