Add some white lines to make the code more compliant

author: jvoisin 2018-04-04 23:21:48 +0200
committer: jvoisin 2018-04-04 23:21:48 +0200
commit: 0239ab3b6a6ae38dbf5ba439f91f0cee498711a9 (patch)
tree: 19159991a5fe0d504ebbd2396800a9a1fda38a09
parent: 9fa76c4c2038a776cc94af5d9f307bbaad52083c (diff)
9 files changed, 21 insertions, 0 deletions
diff --git a/main.py b/main.py
index 65158e6..80355ae 100755
--- a/main.py
+++ b/main.py
@@ -16,6 +16,7 @@ def __check_file(filename:str, mode:int = os.R_OK) -> bool:
        return False
    return True
 def create_arg_parser():
    parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
    parser.add_argument('files', nargs='*')
@@ -29,6 +30,7 @@ def create_arg_parser():
                      help='list all the harmful metadata of a file without removing them')
    return parser
 def show_meta(filename:str):
    if not __check_file(filename):
        return
@@ -44,6 +46,7 @@ def show_meta(filename:str):
        except UnicodeEncodeError:
            print("  %s: harmful content" % k)
 def clean_meta(filename:str):
    if not __check_file(filename, os.R_OK|os.W_OK):
        return
@@ -54,6 +57,7 @@ def clean_meta(filename:str):
        return
    p.remove_all()
 def show_parsers():
    print('[+] Supported formats:')
    for parser in parser_factory._get_parsers():
@@ -61,6 +65,7 @@ def show_parsers():
            extensions = ', '.join(mimetypes.guess_all_extensions(mtype))
            print('  - %s (%s)' % (mtype, extensions))
 def __get_files_recursively(files):
    for f in files:
        if os.path.isfile(f):
diff --git a/src/abstract.py b/src/abstract.py
index 4626789..04c1535 100644
--- a/src/abstract.py
+++ b/src/abstract.py
@@ -1,5 +1,6 @@
 import abc
 class AbstractParser(abc.ABC):
    meta_list = set()
    mimetypes = set()
diff --git a/src/audio.py b/src/audio.py
index a56828f..4a385b2 100644
--- a/src/audio.py
+++ b/src/audio.py
@@ -4,6 +4,7 @@ import mutagen
 from . import abstract
 class MutagenParser(abstract.AbstractParser):
    def get_meta(self):
        f = mutagen.File(self.filename)
@@ -18,6 +19,7 @@ class MutagenParser(abstract.AbstractParser):
        f.save()
        return True
 class MP3Parser(MutagenParser):
    mimetypes = {'audio/mpeg', }
@@ -28,8 +30,10 @@ class MP3Parser(MutagenParser):
            metadata[key.rstrip(' \t\r\n\0')] = ', '.join(map(str, meta[key].text))
        return metadata
 class OGGParser(MutagenParser):
    mimetypes = {'audio/ogg', }
 class FLACParser(MutagenParser):
    mimetypes = {'audio/flac', }
diff --git a/src/harmless.py b/src/harmless.py
index 235dabe..9e7c1b4 100644
--- a/src/harmless.py
+++ b/src/harmless.py
@@ -1,5 +1,6 @@
 from . import abstract
 class HarmlessParser(abstract.AbstractParser):
    """ This is the parser for filetypes that do not contain metadata. """
    mimetypes = {'application/xml', 'text/plain', 'application/rdf+xml'}
diff --git a/src/images.py b/src/images.py
index afc0658..2c1fd2e 100644
--- a/src/images.py
+++ b/src/images.py
@@ -10,6 +10,7 @@ from gi.repository import GdkPixbuf
 from . import abstract
 class PNGParser(abstract.AbstractParser):
    mimetypes = {'image/png', }
    meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
@@ -31,6 +32,7 @@ class PNGParser(abstract.AbstractParser):
        surface.write_to_png(self.output_filename)
        return True
 class GdkPixbufAbstractParser(abstract.AbstractParser):
    """ GdkPixbuf can handle a lot of surfaces, so we're rending images on it,
        this has the side-effect of removing metadata completely.
diff --git a/src/office.py b/src/office.py
index 9729e19..11692c3 100644
--- a/src/office.py
+++ b/src/office.py
@@ -7,6 +7,7 @@ import zipfile
 from . import abstract, parser_factory
 class ArchiveBasedAbstractParser(abstract.AbstractParser):
    def _clean_zipinfo(self, zipinfo:zipfile.ZipInfo) -> zipfile.ZipInfo:
        zipinfo.compress_type = zipfile.ZIP_DEFLATED
@@ -46,6 +47,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
        with open(tmp_parser.output_filename, 'rb') as f:
            zout.writestr(clean_zinfo, f.read())
 class MSOfficeParser(ArchiveBasedAbstractParser):
    mimetypes = {
            'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
diff --git a/src/parser_factory.py b/src/parser_factory.py
index ecec789..68e9e9c 100644
--- a/src/parser_factory.py
+++ b/src/parser_factory.py
@@ -16,6 +16,7 @@ for module_loader, name, ispkg in pkgutil.walk_packages('.src'):
        continue
    importlib.import_module(name)
 def _get_parsers() -> list:
    """ Get all our parsers!"""
    def __get_parsers(cls):
@@ -23,6 +24,7 @@ def _get_parsers() -> list:
            [g for s in cls.__subclasses__() for g in __get_parsers(s)]
    return __get_parsers(abstract.AbstractParser)
 def get_parser(filename: str) -> (T, str):
    mtype, _ = mimetypes.guess_type(filename)
diff --git a/tests/test_climat2.py b/tests/test_climat2.py
index f395001..b9c52b5 100644
--- a/tests/test_climat2.py
+++ b/tests/test_climat2.py
@@ -13,6 +13,7 @@ class TestHelp(unittest.TestCase):
        stdout, _ = proc.communicate()
        self.assertIn(b'usage: main.py [-h] [-c] [-l] [-s] [files [files ...]]', stdout)
 class TestGetMeta(unittest.TestCase):
    def test_pdf(self):
        proc = subprocess.Popen(['./main.py', '--show', './tests/data/dirty.pdf'],
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py
index c2864c6..4cfb80a 100644
--- a/tests/test_libmat2.py
+++ b/tests/test_libmat2.py
@@ -8,6 +8,7 @@ import tempfile
 from src import pdf, images, audio, office, parser_factory
 class TestParserFactory(unittest.TestCase):
    def test_subsubcalss(self):
        """ Test that our module auto-detection is handling sub-sub-classes """
@@ -15,6 +16,7 @@ class TestParserFactory(unittest.TestCase):
        self.assertEqual(mimetype, 'audio/mpeg')
        self.assertEqual(parser.__class__, audio.MP3Parser)
 class TestGetMeta(unittest.TestCase):
    def test_pdf(self):
        p = pdf.PDFParser('./tests/data/dirty.pdf')
@@ -132,6 +134,7 @@ class TestDeepCleaning(unittest.TestCase):
        os.remove('./tests/data/clean.odt')
 class TestCleaning(unittest.TestCase):
    def test_pdf(self):
        shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')
author	jvoisin	2018-04-04 23:21:48 +0200
committer	jvoisin	2018-04-04 23:21:48 +0200
commit	0239ab3b6a6ae38dbf5ba439f91f0cee498711a9 (patch)
tree	19159991a5fe0d504ebbd2396800a9a1fda38a09
parent	9fa76c4c2038a776cc94af5d9f307bbaad52083c (diff)

diff --git a/main.py b/main.py index 65158e6..80355ae 100755 --- a/main.py +++ b/main.py
@@ -16,6 +16,7 @@ def __check_file(filename:str, mode:int = os.R_OK) -> bool:
16	return False	16	return False
17	return True	17	return True
18		18
		19
19	def create_arg_parser():	20	def create_arg_parser():
20	parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')	21	parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
21	parser.add_argument('files', nargs='*')	22	parser.add_argument('files', nargs='*')
@@ -29,6 +30,7 @@ def create_arg_parser():
29	help='list all the harmful metadata of a file without removing them')	30	help='list all the harmful metadata of a file without removing them')
30	return parser	31	return parser
31		32
		33
32	def show_meta(filename:str):	34	def show_meta(filename:str):
33	if not __check_file(filename):	35	if not __check_file(filename):
34	return	36	return
@@ -44,6 +46,7 @@ def show_meta(filename:str):
44	except UnicodeEncodeError:	46	except UnicodeEncodeError:
45	print(" %s: harmful content" % k)	47	print(" %s: harmful content" % k)
46		48
		49
47	def clean_meta(filename:str):	50	def clean_meta(filename:str):
48	if not __check_file(filename, os.R_OK\|os.W_OK):	51	if not __check_file(filename, os.R_OK\|os.W_OK):
49	return	52	return
@@ -54,6 +57,7 @@ def clean_meta(filename:str):
54	return	57	return
55	p.remove_all()	58	p.remove_all()
56		59
		60
57	def show_parsers():	61	def show_parsers():
58	print('[+] Supported formats:')	62	print('[+] Supported formats:')
59	for parser in parser_factory._get_parsers():	63	for parser in parser_factory._get_parsers():
@@ -61,6 +65,7 @@ def show_parsers():
61	extensions = ', '.join(mimetypes.guess_all_extensions(mtype))	65	extensions = ', '.join(mimetypes.guess_all_extensions(mtype))
62	print(' - %s (%s)' % (mtype, extensions))	66	print(' - %s (%s)' % (mtype, extensions))
63		67
		68
64	def __get_files_recursively(files):	69	def __get_files_recursively(files):
65	for f in files:	70	for f in files:
66	if os.path.isfile(f):	71	if os.path.isfile(f):


diff --git a/src/abstract.py b/src/abstract.py index 4626789..04c1535 100644 --- a/src/abstract.py +++ b/src/abstract.py
@@ -1,5 +1,6 @@
1	import abc	1	import abc
2		2
		3
3	class AbstractParser(abc.ABC):	4	class AbstractParser(abc.ABC):
4	meta_list = set()	5	meta_list = set()
5	mimetypes = set()	6	mimetypes = set()


diff --git a/src/audio.py b/src/audio.py index a56828f..4a385b2 100644 --- a/src/audio.py +++ b/src/audio.py
@@ -4,6 +4,7 @@ import mutagen
4		4
5	from . import abstract	5	from . import abstract
6		6
		7
7	class MutagenParser(abstract.AbstractParser):	8	class MutagenParser(abstract.AbstractParser):
8	def get_meta(self):	9	def get_meta(self):
9	f = mutagen.File(self.filename)	10	f = mutagen.File(self.filename)
@@ -18,6 +19,7 @@ class MutagenParser(abstract.AbstractParser):
18	f.save()	19	f.save()
19	return True	20	return True
20		21
		22
21	class MP3Parser(MutagenParser):	23	class MP3Parser(MutagenParser):
22	mimetypes = {'audio/mpeg', }	24	mimetypes = {'audio/mpeg', }
23		25
@@ -28,8 +30,10 @@ class MP3Parser(MutagenParser):
28	metadata[key.rstrip(' \t\r\n\0')] = ', '.join(map(str, meta[key].text))	30	metadata[key.rstrip(' \t\r\n\0')] = ', '.join(map(str, meta[key].text))
29	return metadata	31	return metadata
30		32
		33
31	class OGGParser(MutagenParser):	34	class OGGParser(MutagenParser):
32	mimetypes = {'audio/ogg', }	35	mimetypes = {'audio/ogg', }
33		36
		37
34	class FLACParser(MutagenParser):	38	class FLACParser(MutagenParser):
35	mimetypes = {'audio/flac', }	39	mimetypes = {'audio/flac', }


diff --git a/src/harmless.py b/src/harmless.py index 235dabe..9e7c1b4 100644 --- a/src/harmless.py +++ b/src/harmless.py
@@ -1,5 +1,6 @@
1	from . import abstract	1	from . import abstract
2		2
		3
3	class HarmlessParser(abstract.AbstractParser):	4	class HarmlessParser(abstract.AbstractParser):
4	""" This is the parser for filetypes that do not contain metadata. """	5	""" This is the parser for filetypes that do not contain metadata. """
5	mimetypes = {'application/xml', 'text/plain', 'application/rdf+xml'}	6	mimetypes = {'application/xml', 'text/plain', 'application/rdf+xml'}


diff --git a/src/images.py b/src/images.py index afc0658..2c1fd2e 100644 --- a/src/images.py +++ b/src/images.py
@@ -10,6 +10,7 @@ from gi.repository import GdkPixbuf
10		10
11	from . import abstract	11	from . import abstract
12		12
		13
13	class PNGParser(abstract.AbstractParser):	14	class PNGParser(abstract.AbstractParser):
14	mimetypes = {'image/png', }	15	mimetypes = {'image/png', }
15	meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',	16	meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
@@ -31,6 +32,7 @@ class PNGParser(abstract.AbstractParser):
31	surface.write_to_png(self.output_filename)	32	surface.write_to_png(self.output_filename)
32	return True	33	return True
33		34
		35
34	class GdkPixbufAbstractParser(abstract.AbstractParser):	36	class GdkPixbufAbstractParser(abstract.AbstractParser):
35	""" GdkPixbuf can handle a lot of surfaces, so we're rending images on it,	37	""" GdkPixbuf can handle a lot of surfaces, so we're rending images on it,
36	this has the side-effect of removing metadata completely.	38	this has the side-effect of removing metadata completely.


diff --git a/src/office.py b/src/office.py index 9729e19..11692c3 100644 --- a/src/office.py +++ b/src/office.py
@@ -7,6 +7,7 @@ import zipfile
7		7
8	from . import abstract, parser_factory	8	from . import abstract, parser_factory
9		9
		10
10	class ArchiveBasedAbstractParser(abstract.AbstractParser):	11	class ArchiveBasedAbstractParser(abstract.AbstractParser):
11	def _clean_zipinfo(self, zipinfo:zipfile.ZipInfo) -> zipfile.ZipInfo:	12	def _clean_zipinfo(self, zipinfo:zipfile.ZipInfo) -> zipfile.ZipInfo:
12	zipinfo.compress_type = zipfile.ZIP_DEFLATED	13	zipinfo.compress_type = zipfile.ZIP_DEFLATED
@@ -46,6 +47,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
46	with open(tmp_parser.output_filename, 'rb') as f:	47	with open(tmp_parser.output_filename, 'rb') as f:
47	zout.writestr(clean_zinfo, f.read())	48	zout.writestr(clean_zinfo, f.read())
48		49
		50
49	class MSOfficeParser(ArchiveBasedAbstractParser):	51	class MSOfficeParser(ArchiveBasedAbstractParser):
50	mimetypes = {	52	mimetypes = {
51	'application/vnd.openxmlformats-officedocument.wordprocessingml.document',	53	'application/vnd.openxmlformats-officedocument.wordprocessingml.document',


diff --git a/src/parser_factory.py b/src/parser_factory.py index ecec789..68e9e9c 100644 --- a/src/parser_factory.py +++ b/src/parser_factory.py
@@ -16,6 +16,7 @@ for module_loader, name, ispkg in pkgutil.walk_packages('.src'):
16	continue	16	continue
17	importlib.import_module(name)	17	importlib.import_module(name)
18		18
		19
19	def _get_parsers() -> list:	20	def _get_parsers() -> list:
20	""" Get all our parsers!"""	21	""" Get all our parsers!"""
21	def __get_parsers(cls):	22	def __get_parsers(cls):
@@ -23,6 +24,7 @@ def _get_parsers() -> list:
23	[g for s in cls.__subclasses__() for g in __get_parsers(s)]	24	[g for s in cls.__subclasses__() for g in __get_parsers(s)]
24	return __get_parsers(abstract.AbstractParser)	25	return __get_parsers(abstract.AbstractParser)
25		26
		27
26	def get_parser(filename: str) -> (T, str):	28	def get_parser(filename: str) -> (T, str):
27	mtype, _ = mimetypes.guess_type(filename)	29	mtype, _ = mimetypes.guess_type(filename)
28		30


diff --git a/tests/test_climat2.py b/tests/test_climat2.py index f395001..b9c52b5 100644 --- a/tests/test_climat2.py +++ b/tests/test_climat2.py
@@ -13,6 +13,7 @@ class TestHelp(unittest.TestCase):
13	stdout, _ = proc.communicate()	13	stdout, _ = proc.communicate()
14	self.assertIn(b'usage: main.py [-h] [-c] [-l] [-s] [files [files ...]]', stdout)	14	self.assertIn(b'usage: main.py [-h] [-c] [-l] [-s] [files [files ...]]', stdout)
15		15
		16
16	class TestGetMeta(unittest.TestCase):	17	class TestGetMeta(unittest.TestCase):
17	def test_pdf(self):	18	def test_pdf(self):
18	proc = subprocess.Popen(['./main.py', '--show', './tests/data/dirty.pdf'],	19	proc = subprocess.Popen(['./main.py', '--show', './tests/data/dirty.pdf'],


diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index c2864c6..4cfb80a 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py
@@ -8,6 +8,7 @@ import tempfile
8		8
9	from src import pdf, images, audio, office, parser_factory	9	from src import pdf, images, audio, office, parser_factory
10		10
		11
11	class TestParserFactory(unittest.TestCase):	12	class TestParserFactory(unittest.TestCase):
12	def test_subsubcalss(self):	13	def test_subsubcalss(self):
13	""" Test that our module auto-detection is handling sub-sub-classes """	14	""" Test that our module auto-detection is handling sub-sub-classes """
@@ -15,6 +16,7 @@ class TestParserFactory(unittest.TestCase):
15	self.assertEqual(mimetype, 'audio/mpeg')	16	self.assertEqual(mimetype, 'audio/mpeg')
16	self.assertEqual(parser.__class__, audio.MP3Parser)	17	self.assertEqual(parser.__class__, audio.MP3Parser)
17		18
		19
18	class TestGetMeta(unittest.TestCase):	20	class TestGetMeta(unittest.TestCase):
19	def test_pdf(self):	21	def test_pdf(self):
20	p = pdf.PDFParser('./tests/data/dirty.pdf')	22	p = pdf.PDFParser('./tests/data/dirty.pdf')
@@ -132,6 +134,7 @@ class TestDeepCleaning(unittest.TestCase):
132		134
133	os.remove('./tests/data/clean.odt')	135	os.remove('./tests/data/clean.odt')
134		136
		137
135	class TestCleaning(unittest.TestCase):	138	class TestCleaning(unittest.TestCase):
136	def test_pdf(self):	139	def test_pdf(self):
137	shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')	140	shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')