diff options
| author | jvoisin | 2018-04-04 23:21:48 +0200 |
|---|---|---|
| committer | jvoisin | 2018-04-04 23:21:48 +0200 |
| commit | 0239ab3b6a6ae38dbf5ba439f91f0cee498711a9 (patch) | |
| tree | 19159991a5fe0d504ebbd2396800a9a1fda38a09 | |
| parent | 9fa76c4c2038a776cc94af5d9f307bbaad52083c (diff) | |
Add some white lines to make the code more compliant
| -rwxr-xr-x | main.py | 5 | ||||
| -rw-r--r-- | src/abstract.py | 1 | ||||
| -rw-r--r-- | src/audio.py | 4 | ||||
| -rw-r--r-- | src/harmless.py | 1 | ||||
| -rw-r--r-- | src/images.py | 2 | ||||
| -rw-r--r-- | src/office.py | 2 | ||||
| -rw-r--r-- | src/parser_factory.py | 2 | ||||
| -rw-r--r-- | tests/test_climat2.py | 1 | ||||
| -rw-r--r-- | tests/test_libmat2.py | 3 |
9 files changed, 21 insertions, 0 deletions
| @@ -16,6 +16,7 @@ def __check_file(filename:str, mode:int = os.R_OK) -> bool: | |||
| 16 | return False | 16 | return False |
| 17 | return True | 17 | return True |
| 18 | 18 | ||
| 19 | |||
| 19 | def create_arg_parser(): | 20 | def create_arg_parser(): |
| 20 | parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2') | 21 | parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2') |
| 21 | parser.add_argument('files', nargs='*') | 22 | parser.add_argument('files', nargs='*') |
| @@ -29,6 +30,7 @@ def create_arg_parser(): | |||
| 29 | help='list all the harmful metadata of a file without removing them') | 30 | help='list all the harmful metadata of a file without removing them') |
| 30 | return parser | 31 | return parser |
| 31 | 32 | ||
| 33 | |||
| 32 | def show_meta(filename:str): | 34 | def show_meta(filename:str): |
| 33 | if not __check_file(filename): | 35 | if not __check_file(filename): |
| 34 | return | 36 | return |
| @@ -44,6 +46,7 @@ def show_meta(filename:str): | |||
| 44 | except UnicodeEncodeError: | 46 | except UnicodeEncodeError: |
| 45 | print(" %s: harmful content" % k) | 47 | print(" %s: harmful content" % k) |
| 46 | 48 | ||
| 49 | |||
| 47 | def clean_meta(filename:str): | 50 | def clean_meta(filename:str): |
| 48 | if not __check_file(filename, os.R_OK|os.W_OK): | 51 | if not __check_file(filename, os.R_OK|os.W_OK): |
| 49 | return | 52 | return |
| @@ -54,6 +57,7 @@ def clean_meta(filename:str): | |||
| 54 | return | 57 | return |
| 55 | p.remove_all() | 58 | p.remove_all() |
| 56 | 59 | ||
| 60 | |||
| 57 | def show_parsers(): | 61 | def show_parsers(): |
| 58 | print('[+] Supported formats:') | 62 | print('[+] Supported formats:') |
| 59 | for parser in parser_factory._get_parsers(): | 63 | for parser in parser_factory._get_parsers(): |
| @@ -61,6 +65,7 @@ def show_parsers(): | |||
| 61 | extensions = ', '.join(mimetypes.guess_all_extensions(mtype)) | 65 | extensions = ', '.join(mimetypes.guess_all_extensions(mtype)) |
| 62 | print(' - %s (%s)' % (mtype, extensions)) | 66 | print(' - %s (%s)' % (mtype, extensions)) |
| 63 | 67 | ||
| 68 | |||
| 64 | def __get_files_recursively(files): | 69 | def __get_files_recursively(files): |
| 65 | for f in files: | 70 | for f in files: |
| 66 | if os.path.isfile(f): | 71 | if os.path.isfile(f): |
diff --git a/src/abstract.py b/src/abstract.py index 4626789..04c1535 100644 --- a/src/abstract.py +++ b/src/abstract.py | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | import abc | 1 | import abc |
| 2 | 2 | ||
| 3 | |||
| 3 | class AbstractParser(abc.ABC): | 4 | class AbstractParser(abc.ABC): |
| 4 | meta_list = set() | 5 | meta_list = set() |
| 5 | mimetypes = set() | 6 | mimetypes = set() |
diff --git a/src/audio.py b/src/audio.py index a56828f..4a385b2 100644 --- a/src/audio.py +++ b/src/audio.py | |||
| @@ -4,6 +4,7 @@ import mutagen | |||
| 4 | 4 | ||
| 5 | from . import abstract | 5 | from . import abstract |
| 6 | 6 | ||
| 7 | |||
| 7 | class MutagenParser(abstract.AbstractParser): | 8 | class MutagenParser(abstract.AbstractParser): |
| 8 | def get_meta(self): | 9 | def get_meta(self): |
| 9 | f = mutagen.File(self.filename) | 10 | f = mutagen.File(self.filename) |
| @@ -18,6 +19,7 @@ class MutagenParser(abstract.AbstractParser): | |||
| 18 | f.save() | 19 | f.save() |
| 19 | return True | 20 | return True |
| 20 | 21 | ||
| 22 | |||
| 21 | class MP3Parser(MutagenParser): | 23 | class MP3Parser(MutagenParser): |
| 22 | mimetypes = {'audio/mpeg', } | 24 | mimetypes = {'audio/mpeg', } |
| 23 | 25 | ||
| @@ -28,8 +30,10 @@ class MP3Parser(MutagenParser): | |||
| 28 | metadata[key.rstrip(' \t\r\n\0')] = ', '.join(map(str, meta[key].text)) | 30 | metadata[key.rstrip(' \t\r\n\0')] = ', '.join(map(str, meta[key].text)) |
| 29 | return metadata | 31 | return metadata |
| 30 | 32 | ||
| 33 | |||
| 31 | class OGGParser(MutagenParser): | 34 | class OGGParser(MutagenParser): |
| 32 | mimetypes = {'audio/ogg', } | 35 | mimetypes = {'audio/ogg', } |
| 33 | 36 | ||
| 37 | |||
| 34 | class FLACParser(MutagenParser): | 38 | class FLACParser(MutagenParser): |
| 35 | mimetypes = {'audio/flac', } | 39 | mimetypes = {'audio/flac', } |
diff --git a/src/harmless.py b/src/harmless.py index 235dabe..9e7c1b4 100644 --- a/src/harmless.py +++ b/src/harmless.py | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | from . import abstract | 1 | from . import abstract |
| 2 | 2 | ||
| 3 | |||
| 3 | class HarmlessParser(abstract.AbstractParser): | 4 | class HarmlessParser(abstract.AbstractParser): |
| 4 | """ This is the parser for filetypes that do not contain metadata. """ | 5 | """ This is the parser for filetypes that do not contain metadata. """ |
| 5 | mimetypes = {'application/xml', 'text/plain', 'application/rdf+xml'} | 6 | mimetypes = {'application/xml', 'text/plain', 'application/rdf+xml'} |
diff --git a/src/images.py b/src/images.py index afc0658..2c1fd2e 100644 --- a/src/images.py +++ b/src/images.py | |||
| @@ -10,6 +10,7 @@ from gi.repository import GdkPixbuf | |||
| 10 | 10 | ||
| 11 | from . import abstract | 11 | from . import abstract |
| 12 | 12 | ||
| 13 | |||
| 13 | class PNGParser(abstract.AbstractParser): | 14 | class PNGParser(abstract.AbstractParser): |
| 14 | mimetypes = {'image/png', } | 15 | mimetypes = {'image/png', } |
| 15 | meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', | 16 | meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', |
| @@ -31,6 +32,7 @@ class PNGParser(abstract.AbstractParser): | |||
| 31 | surface.write_to_png(self.output_filename) | 32 | surface.write_to_png(self.output_filename) |
| 32 | return True | 33 | return True |
| 33 | 34 | ||
| 35 | |||
| 34 | class GdkPixbufAbstractParser(abstract.AbstractParser): | 36 | class GdkPixbufAbstractParser(abstract.AbstractParser): |
| 35 | """ GdkPixbuf can handle a lot of surfaces, so we're rending images on it, | 37 | """ GdkPixbuf can handle a lot of surfaces, so we're rending images on it, |
| 36 | this has the side-effect of removing metadata completely. | 38 | this has the side-effect of removing metadata completely. |
diff --git a/src/office.py b/src/office.py index 9729e19..11692c3 100644 --- a/src/office.py +++ b/src/office.py | |||
| @@ -7,6 +7,7 @@ import zipfile | |||
| 7 | 7 | ||
| 8 | from . import abstract, parser_factory | 8 | from . import abstract, parser_factory |
| 9 | 9 | ||
| 10 | |||
| 10 | class ArchiveBasedAbstractParser(abstract.AbstractParser): | 11 | class ArchiveBasedAbstractParser(abstract.AbstractParser): |
| 11 | def _clean_zipinfo(self, zipinfo:zipfile.ZipInfo) -> zipfile.ZipInfo: | 12 | def _clean_zipinfo(self, zipinfo:zipfile.ZipInfo) -> zipfile.ZipInfo: |
| 12 | zipinfo.compress_type = zipfile.ZIP_DEFLATED | 13 | zipinfo.compress_type = zipfile.ZIP_DEFLATED |
| @@ -46,6 +47,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): | |||
| 46 | with open(tmp_parser.output_filename, 'rb') as f: | 47 | with open(tmp_parser.output_filename, 'rb') as f: |
| 47 | zout.writestr(clean_zinfo, f.read()) | 48 | zout.writestr(clean_zinfo, f.read()) |
| 48 | 49 | ||
| 50 | |||
| 49 | class MSOfficeParser(ArchiveBasedAbstractParser): | 51 | class MSOfficeParser(ArchiveBasedAbstractParser): |
| 50 | mimetypes = { | 52 | mimetypes = { |
| 51 | 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', | 53 | 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', |
diff --git a/src/parser_factory.py b/src/parser_factory.py index ecec789..68e9e9c 100644 --- a/src/parser_factory.py +++ b/src/parser_factory.py | |||
| @@ -16,6 +16,7 @@ for module_loader, name, ispkg in pkgutil.walk_packages('.src'): | |||
| 16 | continue | 16 | continue |
| 17 | importlib.import_module(name) | 17 | importlib.import_module(name) |
| 18 | 18 | ||
| 19 | |||
| 19 | def _get_parsers() -> list: | 20 | def _get_parsers() -> list: |
| 20 | """ Get all our parsers!""" | 21 | """ Get all our parsers!""" |
| 21 | def __get_parsers(cls): | 22 | def __get_parsers(cls): |
| @@ -23,6 +24,7 @@ def _get_parsers() -> list: | |||
| 23 | [g for s in cls.__subclasses__() for g in __get_parsers(s)] | 24 | [g for s in cls.__subclasses__() for g in __get_parsers(s)] |
| 24 | return __get_parsers(abstract.AbstractParser) | 25 | return __get_parsers(abstract.AbstractParser) |
| 25 | 26 | ||
| 27 | |||
| 26 | def get_parser(filename: str) -> (T, str): | 28 | def get_parser(filename: str) -> (T, str): |
| 27 | mtype, _ = mimetypes.guess_type(filename) | 29 | mtype, _ = mimetypes.guess_type(filename) |
| 28 | 30 | ||
diff --git a/tests/test_climat2.py b/tests/test_climat2.py index f395001..b9c52b5 100644 --- a/tests/test_climat2.py +++ b/tests/test_climat2.py | |||
| @@ -13,6 +13,7 @@ class TestHelp(unittest.TestCase): | |||
| 13 | stdout, _ = proc.communicate() | 13 | stdout, _ = proc.communicate() |
| 14 | self.assertIn(b'usage: main.py [-h] [-c] [-l] [-s] [files [files ...]]', stdout) | 14 | self.assertIn(b'usage: main.py [-h] [-c] [-l] [-s] [files [files ...]]', stdout) |
| 15 | 15 | ||
| 16 | |||
| 16 | class TestGetMeta(unittest.TestCase): | 17 | class TestGetMeta(unittest.TestCase): |
| 17 | def test_pdf(self): | 18 | def test_pdf(self): |
| 18 | proc = subprocess.Popen(['./main.py', '--show', './tests/data/dirty.pdf'], | 19 | proc = subprocess.Popen(['./main.py', '--show', './tests/data/dirty.pdf'], |
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index c2864c6..4cfb80a 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py | |||
| @@ -8,6 +8,7 @@ import tempfile | |||
| 8 | 8 | ||
| 9 | from src import pdf, images, audio, office, parser_factory | 9 | from src import pdf, images, audio, office, parser_factory |
| 10 | 10 | ||
| 11 | |||
| 11 | class TestParserFactory(unittest.TestCase): | 12 | class TestParserFactory(unittest.TestCase): |
| 12 | def test_subsubcalss(self): | 13 | def test_subsubcalss(self): |
| 13 | """ Test that our module auto-detection is handling sub-sub-classes """ | 14 | """ Test that our module auto-detection is handling sub-sub-classes """ |
| @@ -15,6 +16,7 @@ class TestParserFactory(unittest.TestCase): | |||
| 15 | self.assertEqual(mimetype, 'audio/mpeg') | 16 | self.assertEqual(mimetype, 'audio/mpeg') |
| 16 | self.assertEqual(parser.__class__, audio.MP3Parser) | 17 | self.assertEqual(parser.__class__, audio.MP3Parser) |
| 17 | 18 | ||
| 19 | |||
| 18 | class TestGetMeta(unittest.TestCase): | 20 | class TestGetMeta(unittest.TestCase): |
| 19 | def test_pdf(self): | 21 | def test_pdf(self): |
| 20 | p = pdf.PDFParser('./tests/data/dirty.pdf') | 22 | p = pdf.PDFParser('./tests/data/dirty.pdf') |
| @@ -132,6 +134,7 @@ class TestDeepCleaning(unittest.TestCase): | |||
| 132 | 134 | ||
| 133 | os.remove('./tests/data/clean.odt') | 135 | os.remove('./tests/data/clean.odt') |
| 134 | 136 | ||
| 137 | |||
| 135 | class TestCleaning(unittest.TestCase): | 138 | class TestCleaning(unittest.TestCase): |
| 136 | def test_pdf(self): | 139 | def test_pdf(self): |
| 137 | shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf') | 140 | shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf') |
