summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xcli.py11
-rw-r--r--gui.py14
-rw-r--r--lib/archive.py15
-rw-r--r--lib/audio.py10
-rw-r--r--lib/images.py10
-rw-r--r--lib/mat.py2
-rw-r--r--lib/misc.py3
-rw-r--r--lib/office.py23
-rw-r--r--lib/parser.py7
-rw-r--r--test/clitest.py2
-rw-r--r--test/test.py22
11 files changed, 75 insertions, 44 deletions
diff --git a/cli.py b/cli.py
index bfedbf6..f72602e 100755
--- a/cli.py
+++ b/cli.py
@@ -12,6 +12,9 @@ __version__ = '0.1'
12 12
13 13
14def parse(): 14def parse():
15 '''
16 Get, and parse options passed to the program
17 '''
15 parser = optparse.OptionParser(usage='%prog [options] filename') 18 parser = optparse.OptionParser(usage='%prog [options] filename')
16 parser.add_option('--add2archive', '-a', action='store_true', 19 parser.add_option('--add2archive', '-a', action='store_true',
17 default=False, help='Add to outputed archive non-supported filetypes') 20 default=False, help='Add to outputed archive non-supported filetypes')
@@ -33,7 +36,10 @@ def parse():
33 return values, arguments 36 return values, arguments
34 37
35 38
36def display_version(*args): 39def display_version(*_):
40 '''
41 Display the program's version, and exit
42 '''
37 print('Metadata Anonymisation Toolkit version %s') % mat.__version__ 43 print('Metadata Anonymisation Toolkit version %s') % mat.__version__
38 print('CLI version %s') % __version__ 44 print('CLI version %s') % __version__
39 print('Hachoir version %s') % hachoir_core.__version__ 45 print('Hachoir version %s') % hachoir_core.__version__
@@ -87,6 +93,9 @@ def clean_meta_ugly(class_file, filename):
87 93
88 94
89def main(): 95def main():
96 '''
97 main function : get args, and launch the appropriate function
98 '''
90 args, filenames = parse() 99 args, filenames = parse()
91 100
92 #func receive the function correponding to the options given as parameters 101 #func receive the function correponding to the options given as parameters
diff --git a/gui.py b/gui.py
index e509935..19fff58 100644
--- a/gui.py
+++ b/gui.py
@@ -1,6 +1,9 @@
1#!/usr/bin/env python 1#!/usr/bin/env python
2 2
3#from gi.repository import gtk, GObject 3'''
4 Metadata anonymisation toolkit - GUI edition
5'''
6
4import gtk 7import gtk
5import gobject 8import gobject
6 9
@@ -232,7 +235,7 @@ class ListStoreApp:
232 w.set_comments('This software was coded during the GSoC 2011') 235 w.set_comments('This software was coded during the GSoC 2011')
233 w.set_website('https://gitweb.torproject.org/user/jvoisin/mat.git') 236 w.set_website('https://gitweb.torproject.org/user/jvoisin/mat.git')
234 w.set_website_label('Website') 237 w.set_website_label('Website')
235 w.set_authors(['Julien (jvoisin) Voisin', ]) 238 w.set_authors(['Julien (jvoisin) Voisin',])
236 w.set_program_name('Metadata Anonymistion Toolkit') 239 w.set_program_name('Metadata Anonymistion Toolkit')
237 click = w.run() 240 click = w.run()
238 if click: 241 if click:
@@ -282,7 +285,7 @@ non-anonymised) file to outputed archive')
282 if response is 0: # gtk.STOCK_OK 285 if response is 0: # gtk.STOCK_OK
283 dialog.destroy() 286 dialog.destroy()
284 287
285 def invert(self, button, name): # still not better :/ 288 def invert(self, _, name): # still not better :/
286 ''' 289 '''
287 Invert a preference state 290 Invert a preference state
288 ''' 291 '''
@@ -293,7 +296,10 @@ non-anonymised) file to outputed archive')
293 elif name is 'backup': 296 elif name is 'backup':
294 self.backup = not self.backup 297 self.backup = not self.backup
295 298
296 def clear_model(self, button=None): 299 def clear_model(self, _):
300 '''
301 Clear the whole list of files
302 '''
297 self.liststore.clear() 303 self.liststore.clear()
298 304
299 def all_if_empy(self, iterator): 305 def all_if_empy(self, iterator):
diff --git a/lib/archive.py b/lib/archive.py
index f9e4dba..108134c 100644
--- a/lib/archive.py
+++ b/lib/archive.py
@@ -1,3 +1,6 @@
1'''
2 Take care of archives formats
3'''
1import tarfile 4import tarfile
2import zipfile 5import zipfile
3 6
@@ -67,6 +70,9 @@ class ZipStripper(GenericArchiveStripper):
67 return True 70 return True
68 71
69 def is_clean(self): 72 def is_clean(self):
73 '''
74 Check if the given file is clean from harmful metadata
75 '''
70 zipin = zipfile.ZipFile(self.filename, 'r') 76 zipin = zipfile.ZipFile(self.filename, 'r')
71 if zipin.comment != '': 77 if zipin.comment != '':
72 logging.debug('%s has a comment' % self.filename) 78 logging.debug('%s has a comment' % self.filename)
@@ -154,6 +160,9 @@ harmless format' % item.filename)
154 160
155 161
156class TarStripper(GenericArchiveStripper): 162class TarStripper(GenericArchiveStripper):
163 '''
164 Represent a tarfile archive
165 '''
157 def _remove(self, current_file): 166 def _remove(self, current_file):
158 ''' 167 '''
159 remove the meta added by tar itself to the file 168 remove the meta added by tar itself to the file
@@ -209,6 +218,9 @@ class TarStripper(GenericArchiveStripper):
209 return True 218 return True
210 219
211 def is_clean(self): 220 def is_clean(self):
221 '''
222 Check if the file is clean from harmful metadatas
223 '''
212 tarin = tarfile.open(self.filename, 'r' + self.compression) 224 tarin = tarfile.open(self.filename, 'r' + self.compression)
213 for item in tarin.getmembers(): 225 for item in tarin.getmembers():
214 if not self.is_file_clean(item): 226 if not self.is_file_clean(item):
@@ -233,6 +245,9 @@ class TarStripper(GenericArchiveStripper):
233 return True 245 return True
234 246
235 def get_meta(self): 247 def get_meta(self):
248 '''
249 Return a dict with all the meta of the file
250 '''
236 tarin = tarfile.open(self.filename, 'r' + self.compression) 251 tarin = tarfile.open(self.filename, 'r' + self.compression)
237 metadata = {} 252 metadata = {}
238 for current_file in tarin.getmembers(): 253 for current_file in tarin.getmembers():
diff --git a/lib/audio.py b/lib/audio.py
index 73030af..f1f53ff 100644
--- a/lib/audio.py
+++ b/lib/audio.py
@@ -3,9 +3,9 @@
3''' 3'''
4try: 4try:
5 from mutagen.flac import FLAC 5 from mutagen.flac import FLAC
6 from mutagen.apev2 import APEv2 6 from mutagen.apev2 import APEv2File
7 from mutagen.oggvorbis import OggVorbis 7 from mutagen.oggvorbis import OggVorbis
8except: 8except ImportError:
9 pass 9 pass
10 10
11 11
@@ -70,7 +70,7 @@ class Apev2Stripper(parser.GenericParser):
70 shutil.copy2(self.filename, self.output) 70 shutil.copy2(self.filename, self.output)
71 self.filename = self.output 71 self.filename = self.output
72 72
73 mfile = APEv2(self.filename) 73 mfile = APEv2File(self.filename)
74 mfile.delete() 74 mfile.delete()
75 mfile.save() 75 mfile.save()
76 76
@@ -78,7 +78,7 @@ class Apev2Stripper(parser.GenericParser):
78 ''' 78 '''
79 Check if the "metadata" block is present in the file 79 Check if the "metadata" block is present in the file
80 ''' 80 '''
81 mfile = APEv2(self.filename) 81 mfile = APEv2File(self.filename)
82 if mfile.tags is None: 82 if mfile.tags is None:
83 return True 83 return True
84 else: 84 else:
@@ -89,7 +89,7 @@ class Apev2Stripper(parser.GenericParser):
89 Return the content of the metadata block if present 89 Return the content of the metadata block if present
90 ''' 90 '''
91 metadata = {} 91 metadata = {}
92 mfile = APEv2(self.filename) 92 mfile = APEv2File(self.filename)
93 if mfile.tags is None: 93 if mfile.tags is None:
94 return metadata 94 return metadata
95 for key, value in mfile.tags: 95 for key, value in mfile.tags:
diff --git a/lib/images.py b/lib/images.py
index df3d256..9fa9999 100644
--- a/lib/images.py
+++ b/lib/images.py
@@ -1,3 +1,7 @@
1'''
2 Takes care about pictures formats
3'''
4
1import parser 5import parser
2 6
3 7
@@ -6,6 +10,9 @@ class JpegStripper(parser.GenericParser):
6 Represents a .jpeg file 10 Represents a .jpeg file
7 ''' 11 '''
8 def _should_remove(self, field): 12 def _should_remove(self, field):
13 '''
14 return True if the field is compromizing
15 '''
9 if field.name.startswith('comment'): 16 if field.name.startswith('comment'):
10 return True 17 return True
11 elif field.name in ("photoshop", "exif", "adobe"): 18 elif field.name in ("photoshop", "exif", "adobe"):
@@ -19,6 +26,9 @@ class PngStripper(parser.GenericParser):
19 Represents a .png file 26 Represents a .png file
20 ''' 27 '''
21 def _should_remove(self, field): 28 def _should_remove(self, field):
29 '''
30 return True if the field is compromizing
31 '''
22 if field.name.startswith("text["): 32 if field.name.startswith("text["):
23 return True 33 return True
24 elif field.name is "time": 34 elif field.name is "time":
diff --git a/lib/mat.py b/lib/mat.py
index fa6cf96..8226c7e 100644
--- a/lib/mat.py
+++ b/lib/mat.py
@@ -7,7 +7,6 @@
7import os 7import os
8import subprocess 8import subprocess
9import logging 9import logging
10import mimetypes
11 10
12import hachoir_core.cmd_line 11import hachoir_core.cmd_line
13import hachoir_parser 12import hachoir_parser
@@ -102,7 +101,6 @@ def create_class_file(name, backup, add2archive):
102 return 101 return
103 102
104 mime = parser.mime_type 103 mime = parser.mime_type
105 print mime
106 104
107 if mime.startswith('application/vnd.oasis.opendocument'): 105 if mime.startswith('application/vnd.oasis.opendocument'):
108 mime = 'application/vnd.oasis.opendocument' # opendocument fileformat 106 mime = 'application/vnd.oasis.opendocument' # opendocument fileformat
diff --git a/lib/misc.py b/lib/misc.py
index f846388..acbaed8 100644
--- a/lib/misc.py
+++ b/lib/misc.py
@@ -31,6 +31,9 @@ class TorrentStripper(parser.GenericParser):
31 return True 31 return True
32 32
33 def get_meta(self): 33 def get_meta(self):
34 '''
35 Return a dict with all the meta of the file
36 '''
34 metadata = {} 37 metadata = {}
35 for field in self.editor['root']: 38 for field in self.editor['root']:
36 if self._should_remove(field): 39 if self._should_remove(field):
diff --git a/lib/office.py b/lib/office.py
index 966a64d..2320e40 100644
--- a/lib/office.py
+++ b/lib/office.py
@@ -3,14 +3,9 @@
3''' 3'''
4 4
5import os 5import os
6import mimetypes
7import subprocess
8import tempfile
9import glob
10import logging 6import logging
11import zipfile 7import zipfile
12import re 8import re
13import shutil
14from xml.etree import ElementTree 9from xml.etree import ElementTree
15 10
16try: 11try:
@@ -103,13 +98,13 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
103 zipin.getinfo('meta.xml') 98 zipin.getinfo('meta.xml')
104 return False 99 return False
105 except KeyError: # no meta.xml in the file 100 except KeyError: # no meta.xml in the file
106 zipin.close() 101 zipin.close()
107 czf = archive.ZipStripper(self.filename, self.parser, 102 czf = archive.ZipStripper(self.filename, self.parser,
108 'application/zip', self.backup, self.add2archive) 103 'application/zip', self.backup, self.add2archive)
109 if czf.is_clean(): 104 if czf.is_clean():
110 return True 105 return True
111 else: 106 else:
112 return False 107 return False
113 return True 108 return True
114 109
115 110
@@ -172,7 +167,7 @@ class PdfStripper(parser.GenericParser):
172 ''' 167 '''
173 Return a dict with all the meta of the file 168 Return a dict with all the meta of the file
174 ''' 169 '''
175 metadata={} 170 metadata = {}
176 for key in self.meta_list: 171 for key in self.meta_list:
177 if key == 'creation-date' or key == 'mod-date': 172 if key == 'creation-date' or key == 'mod-date':
178 #creation and modification are set to -1 173 #creation and modification are set to -1
@@ -181,5 +176,5 @@ class PdfStripper(parser.GenericParser):
181 else: 176 else:
182 if self.document.get_property(key) is not None and \ 177 if self.document.get_property(key) is not None and \
183 self.document.get_property(key) != '': 178 self.document.get_property(key) != '':
184 metadata[key] = self.document.get_property(key) 179 metadata[key] = self.document.get_property(key)
185 return metadata 180 return metadata
diff --git a/lib/parser.py b/lib/parser.py
index 385dd78..044ef0a 100644
--- a/lib/parser.py
+++ b/lib/parser.py
@@ -6,7 +6,6 @@ import hachoir_core
6import hachoir_editor 6import hachoir_editor
7 7
8import os 8import os
9import mimetypes
10 9
11import mat 10import mat
12 11
@@ -14,6 +13,9 @@ NOMETA = ('.bmp', 'html', '.py', '.rdf', '.txt', '.xml')
14 13
15 14
16class GenericParser(object): 15class GenericParser(object):
16 '''
17 Parent class of all parsers
18 '''
17 def __init__(self, filename, parser, mime, backup, add2archive): 19 def __init__(self, filename, parser, mime, backup, add2archive):
18 self.filename = '' 20 self.filename = ''
19 self.parser = parser 21 self.parser = parser
@@ -30,7 +32,6 @@ class GenericParser(object):
30 self.basename = os.path.basename(filename) # only filename 32 self.basename = os.path.basename(filename) # only filename
31 33
32 34
33
34 def is_clean(self): 35 def is_clean(self):
35 ''' 36 '''
36 Check if the file is clean from harmful metadatas 37 Check if the file is clean from harmful metadatas
@@ -68,7 +69,7 @@ class GenericParser(object):
68 69
69 def get_meta(self): 70 def get_meta(self):
70 ''' 71 '''
71 return a dict with all the meta of the file 72 Return a dict with all the meta of the file
72 ''' 73 '''
73 metadata = {} 74 metadata = {}
74 for field in self.editor: 75 for field in self.editor:
diff --git a/test/clitest.py b/test/clitest.py
index f71a622..453333a 100644
--- a/test/clitest.py
+++ b/test/clitest.py
@@ -8,7 +8,6 @@ import subprocess
8import sys 8import sys
9 9
10sys.path.append('..') 10sys.path.append('..')
11#import cli
12from lib import mat 11from lib import mat
13import test 12import test
14 13
@@ -57,7 +56,6 @@ class TestisCleancli(test.MATTest):
57 ''' 56 '''
58 check if cli.py correctly check if a file is clean or not 57 check if cli.py correctly check if a file is clean or not
59 ''' 58 '''
60 #FIXME : use an external file with string as const ?
61 def test_clean(self): 59 def test_clean(self):
62 '''test is_clean on clean files''' 60 '''test is_clean on clean files'''
63 for clean, _ in self.file_list: 61 for clean, _ in self.file_list:
diff --git a/test/test.py b/test/test.py
index ac63f60..227170b 100644
--- a/test/test.py
+++ b/test/test.py
@@ -12,32 +12,28 @@ import sys
12import tempfile 12import tempfile
13import unittest 13import unittest
14 14
15sys.path.append('..')
16from lib import mat
17
18VERBOSITY = 3 15VERBOSITY = 3
19FILE_LIST = zip(glob.glob('clean*'), glob.glob('dirty*')) 16FILE_LIST = zip(glob.glob('clean*'), glob.glob('dirty*'))
20 17
21class MATTest(unittest.TestCase): 18class MATTest(unittest.TestCase):
22 def setUp(self):
23 '''
24 Create working copy of the clean and the dirty file in the TMP dir
25 ''' 19 '''
20 Parent class of all test-functions
21 '''
22 def setUp(self):
23 '''
24 Create working copy of the clean and the dirty file in the TMP dir
25 '''
26 self.file_list = [] 26 self.file_list = []
27 self.tmpdir = tempfile.mkdtemp() 27 self.tmpdir = tempfile.mkdtemp()
28 28
29 for clean, dirty in FILE_LIST: 29 for clean, dirty in FILE_LIST:
30 shutil.copy2(clean, self.tmpdir + os.sep + clean) 30 shutil.copy2(clean, self.tmpdir + os.sep + clean)
31 shutil.copy2(dirty, self.tmpdir + os.sep + dirty) 31 shutil.copy2(dirty, self.tmpdir + os.sep + dirty)
32
33 self.file_list.append((self.tmpdir + os.sep + clean, 32 self.file_list.append((self.tmpdir + os.sep + clean,
34 self.tmpdir + os.sep + dirty)) 33 self.tmpdir + os.sep + dirty))
35 34
36 def tearDown(self): 35 def tearDown(self):
37 ''' 36 '''
38 Remove the tmp folder 37 Remove the tmp folder
39 ''' 38 '''
40 for clean, dirty in self.file_list:
41 mat.secure_remove(clean)
42 mat.secure_remove(dirty)
43 shutil.rmtree(self.tmpdir) 39 shutil.rmtree(self.tmpdir)