diff options
| -rwxr-xr-x | cli.py | 11 | ||||
| -rw-r--r-- | gui.py | 14 | ||||
| -rw-r--r-- | lib/archive.py | 15 | ||||
| -rw-r--r-- | lib/audio.py | 10 | ||||
| -rw-r--r-- | lib/images.py | 10 | ||||
| -rw-r--r-- | lib/mat.py | 2 | ||||
| -rw-r--r-- | lib/misc.py | 3 | ||||
| -rw-r--r-- | lib/office.py | 23 | ||||
| -rw-r--r-- | lib/parser.py | 7 | ||||
| -rw-r--r-- | test/clitest.py | 2 | ||||
| -rw-r--r-- | test/test.py | 22 |
11 files changed, 75 insertions, 44 deletions
| @@ -12,6 +12,9 @@ __version__ = '0.1' | |||
| 12 | 12 | ||
| 13 | 13 | ||
| 14 | def parse(): | 14 | def parse(): |
| 15 | ''' | ||
| 16 | Get, and parse options passed to the program | ||
| 17 | ''' | ||
| 15 | parser = optparse.OptionParser(usage='%prog [options] filename') | 18 | parser = optparse.OptionParser(usage='%prog [options] filename') |
| 16 | parser.add_option('--add2archive', '-a', action='store_true', | 19 | parser.add_option('--add2archive', '-a', action='store_true', |
| 17 | default=False, help='Add to outputed archive non-supported filetypes') | 20 | default=False, help='Add to outputed archive non-supported filetypes') |
| @@ -33,7 +36,10 @@ def parse(): | |||
| 33 | return values, arguments | 36 | return values, arguments |
| 34 | 37 | ||
| 35 | 38 | ||
| 36 | def display_version(*args): | 39 | def display_version(*_): |
| 40 | ''' | ||
| 41 | Display the program's version, and exit | ||
| 42 | ''' | ||
| 37 | print('Metadata Anonymisation Toolkit version %s') % mat.__version__ | 43 | print('Metadata Anonymisation Toolkit version %s') % mat.__version__ |
| 38 | print('CLI version %s') % __version__ | 44 | print('CLI version %s') % __version__ |
| 39 | print('Hachoir version %s') % hachoir_core.__version__ | 45 | print('Hachoir version %s') % hachoir_core.__version__ |
| @@ -87,6 +93,9 @@ def clean_meta_ugly(class_file, filename): | |||
| 87 | 93 | ||
| 88 | 94 | ||
| 89 | def main(): | 95 | def main(): |
| 96 | ''' | ||
| 97 | main function : get args, and launch the appropriate function | ||
| 98 | ''' | ||
| 90 | args, filenames = parse() | 99 | args, filenames = parse() |
| 91 | 100 | ||
| 92 | #func receive the function correponding to the options given as parameters | 101 | #func receive the function correponding to the options given as parameters |
| @@ -1,6 +1,9 @@ | |||
| 1 | #!/usr/bin/env python | 1 | #!/usr/bin/env python |
| 2 | 2 | ||
| 3 | #from gi.repository import gtk, GObject | 3 | ''' |
| 4 | Metadata anonymisation toolkit - GUI edition | ||
| 5 | ''' | ||
| 6 | |||
| 4 | import gtk | 7 | import gtk |
| 5 | import gobject | 8 | import gobject |
| 6 | 9 | ||
| @@ -232,7 +235,7 @@ class ListStoreApp: | |||
| 232 | w.set_comments('This software was coded during the GSoC 2011') | 235 | w.set_comments('This software was coded during the GSoC 2011') |
| 233 | w.set_website('https://gitweb.torproject.org/user/jvoisin/mat.git') | 236 | w.set_website('https://gitweb.torproject.org/user/jvoisin/mat.git') |
| 234 | w.set_website_label('Website') | 237 | w.set_website_label('Website') |
| 235 | w.set_authors(['Julien (jvoisin) Voisin', ]) | 238 | w.set_authors(['Julien (jvoisin) Voisin',]) |
| 236 | w.set_program_name('Metadata Anonymistion Toolkit') | 239 | w.set_program_name('Metadata Anonymistion Toolkit') |
| 237 | click = w.run() | 240 | click = w.run() |
| 238 | if click: | 241 | if click: |
| @@ -282,7 +285,7 @@ non-anonymised) file to outputed archive') | |||
| 282 | if response is 0: # gtk.STOCK_OK | 285 | if response is 0: # gtk.STOCK_OK |
| 283 | dialog.destroy() | 286 | dialog.destroy() |
| 284 | 287 | ||
| 285 | def invert(self, button, name): # still not better :/ | 288 | def invert(self, _, name): # still not better :/ |
| 286 | ''' | 289 | ''' |
| 287 | Invert a preference state | 290 | Invert a preference state |
| 288 | ''' | 291 | ''' |
| @@ -293,7 +296,10 @@ non-anonymised) file to outputed archive') | |||
| 293 | elif name is 'backup': | 296 | elif name is 'backup': |
| 294 | self.backup = not self.backup | 297 | self.backup = not self.backup |
| 295 | 298 | ||
| 296 | def clear_model(self, button=None): | 299 | def clear_model(self, _): |
| 300 | ''' | ||
| 301 | Clear the whole list of files | ||
| 302 | ''' | ||
| 297 | self.liststore.clear() | 303 | self.liststore.clear() |
| 298 | 304 | ||
| 299 | def all_if_empy(self, iterator): | 305 | def all_if_empy(self, iterator): |
diff --git a/lib/archive.py b/lib/archive.py index f9e4dba..108134c 100644 --- a/lib/archive.py +++ b/lib/archive.py | |||
| @@ -1,3 +1,6 @@ | |||
| 1 | ''' | ||
| 2 | Take care of archives formats | ||
| 3 | ''' | ||
| 1 | import tarfile | 4 | import tarfile |
| 2 | import zipfile | 5 | import zipfile |
| 3 | 6 | ||
| @@ -67,6 +70,9 @@ class ZipStripper(GenericArchiveStripper): | |||
| 67 | return True | 70 | return True |
| 68 | 71 | ||
| 69 | def is_clean(self): | 72 | def is_clean(self): |
| 73 | ''' | ||
| 74 | Check if the given file is clean from harmful metadata | ||
| 75 | ''' | ||
| 70 | zipin = zipfile.ZipFile(self.filename, 'r') | 76 | zipin = zipfile.ZipFile(self.filename, 'r') |
| 71 | if zipin.comment != '': | 77 | if zipin.comment != '': |
| 72 | logging.debug('%s has a comment' % self.filename) | 78 | logging.debug('%s has a comment' % self.filename) |
| @@ -154,6 +160,9 @@ harmless format' % item.filename) | |||
| 154 | 160 | ||
| 155 | 161 | ||
| 156 | class TarStripper(GenericArchiveStripper): | 162 | class TarStripper(GenericArchiveStripper): |
| 163 | ''' | ||
| 164 | Represent a tarfile archive | ||
| 165 | ''' | ||
| 157 | def _remove(self, current_file): | 166 | def _remove(self, current_file): |
| 158 | ''' | 167 | ''' |
| 159 | remove the meta added by tar itself to the file | 168 | remove the meta added by tar itself to the file |
| @@ -209,6 +218,9 @@ class TarStripper(GenericArchiveStripper): | |||
| 209 | return True | 218 | return True |
| 210 | 219 | ||
| 211 | def is_clean(self): | 220 | def is_clean(self): |
| 221 | ''' | ||
| 222 | Check if the file is clean from harmful metadatas | ||
| 223 | ''' | ||
| 212 | tarin = tarfile.open(self.filename, 'r' + self.compression) | 224 | tarin = tarfile.open(self.filename, 'r' + self.compression) |
| 213 | for item in tarin.getmembers(): | 225 | for item in tarin.getmembers(): |
| 214 | if not self.is_file_clean(item): | 226 | if not self.is_file_clean(item): |
| @@ -233,6 +245,9 @@ class TarStripper(GenericArchiveStripper): | |||
| 233 | return True | 245 | return True |
| 234 | 246 | ||
| 235 | def get_meta(self): | 247 | def get_meta(self): |
| 248 | ''' | ||
| 249 | Return a dict with all the meta of the file | ||
| 250 | ''' | ||
| 236 | tarin = tarfile.open(self.filename, 'r' + self.compression) | 251 | tarin = tarfile.open(self.filename, 'r' + self.compression) |
| 237 | metadata = {} | 252 | metadata = {} |
| 238 | for current_file in tarin.getmembers(): | 253 | for current_file in tarin.getmembers(): |
diff --git a/lib/audio.py b/lib/audio.py index 73030af..f1f53ff 100644 --- a/lib/audio.py +++ b/lib/audio.py | |||
| @@ -3,9 +3,9 @@ | |||
| 3 | ''' | 3 | ''' |
| 4 | try: | 4 | try: |
| 5 | from mutagen.flac import FLAC | 5 | from mutagen.flac import FLAC |
| 6 | from mutagen.apev2 import APEv2 | 6 | from mutagen.apev2 import APEv2File |
| 7 | from mutagen.oggvorbis import OggVorbis | 7 | from mutagen.oggvorbis import OggVorbis |
| 8 | except: | 8 | except ImportError: |
| 9 | pass | 9 | pass |
| 10 | 10 | ||
| 11 | 11 | ||
| @@ -70,7 +70,7 @@ class Apev2Stripper(parser.GenericParser): | |||
| 70 | shutil.copy2(self.filename, self.output) | 70 | shutil.copy2(self.filename, self.output) |
| 71 | self.filename = self.output | 71 | self.filename = self.output |
| 72 | 72 | ||
| 73 | mfile = APEv2(self.filename) | 73 | mfile = APEv2File(self.filename) |
| 74 | mfile.delete() | 74 | mfile.delete() |
| 75 | mfile.save() | 75 | mfile.save() |
| 76 | 76 | ||
| @@ -78,7 +78,7 @@ class Apev2Stripper(parser.GenericParser): | |||
| 78 | ''' | 78 | ''' |
| 79 | Check if the "metadata" block is present in the file | 79 | Check if the "metadata" block is present in the file |
| 80 | ''' | 80 | ''' |
| 81 | mfile = APEv2(self.filename) | 81 | mfile = APEv2File(self.filename) |
| 82 | if mfile.tags is None: | 82 | if mfile.tags is None: |
| 83 | return True | 83 | return True |
| 84 | else: | 84 | else: |
| @@ -89,7 +89,7 @@ class Apev2Stripper(parser.GenericParser): | |||
| 89 | Return the content of the metadata block if present | 89 | Return the content of the metadata block if present |
| 90 | ''' | 90 | ''' |
| 91 | metadata = {} | 91 | metadata = {} |
| 92 | mfile = APEv2(self.filename) | 92 | mfile = APEv2File(self.filename) |
| 93 | if mfile.tags is None: | 93 | if mfile.tags is None: |
| 94 | return metadata | 94 | return metadata |
| 95 | for key, value in mfile.tags: | 95 | for key, value in mfile.tags: |
diff --git a/lib/images.py b/lib/images.py index df3d256..9fa9999 100644 --- a/lib/images.py +++ b/lib/images.py | |||
| @@ -1,3 +1,7 @@ | |||
| 1 | ''' | ||
| 2 | Takes care about pictures formats | ||
| 3 | ''' | ||
| 4 | |||
| 1 | import parser | 5 | import parser |
| 2 | 6 | ||
| 3 | 7 | ||
| @@ -6,6 +10,9 @@ class JpegStripper(parser.GenericParser): | |||
| 6 | Represents a .jpeg file | 10 | Represents a .jpeg file |
| 7 | ''' | 11 | ''' |
| 8 | def _should_remove(self, field): | 12 | def _should_remove(self, field): |
| 13 | ''' | ||
| 14 | return True if the field is compromizing | ||
| 15 | ''' | ||
| 9 | if field.name.startswith('comment'): | 16 | if field.name.startswith('comment'): |
| 10 | return True | 17 | return True |
| 11 | elif field.name in ("photoshop", "exif", "adobe"): | 18 | elif field.name in ("photoshop", "exif", "adobe"): |
| @@ -19,6 +26,9 @@ class PngStripper(parser.GenericParser): | |||
| 19 | Represents a .png file | 26 | Represents a .png file |
| 20 | ''' | 27 | ''' |
| 21 | def _should_remove(self, field): | 28 | def _should_remove(self, field): |
| 29 | ''' | ||
| 30 | return True if the field is compromizing | ||
| 31 | ''' | ||
| 22 | if field.name.startswith("text["): | 32 | if field.name.startswith("text["): |
| 23 | return True | 33 | return True |
| 24 | elif field.name is "time": | 34 | elif field.name is "time": |
| @@ -7,7 +7,6 @@ | |||
| 7 | import os | 7 | import os |
| 8 | import subprocess | 8 | import subprocess |
| 9 | import logging | 9 | import logging |
| 10 | import mimetypes | ||
| 11 | 10 | ||
| 12 | import hachoir_core.cmd_line | 11 | import hachoir_core.cmd_line |
| 13 | import hachoir_parser | 12 | import hachoir_parser |
| @@ -102,7 +101,6 @@ def create_class_file(name, backup, add2archive): | |||
| 102 | return | 101 | return |
| 103 | 102 | ||
| 104 | mime = parser.mime_type | 103 | mime = parser.mime_type |
| 105 | print mime | ||
| 106 | 104 | ||
| 107 | if mime.startswith('application/vnd.oasis.opendocument'): | 105 | if mime.startswith('application/vnd.oasis.opendocument'): |
| 108 | mime = 'application/vnd.oasis.opendocument' # opendocument fileformat | 106 | mime = 'application/vnd.oasis.opendocument' # opendocument fileformat |
diff --git a/lib/misc.py b/lib/misc.py index f846388..acbaed8 100644 --- a/lib/misc.py +++ b/lib/misc.py | |||
| @@ -31,6 +31,9 @@ class TorrentStripper(parser.GenericParser): | |||
| 31 | return True | 31 | return True |
| 32 | 32 | ||
| 33 | def get_meta(self): | 33 | def get_meta(self): |
| 34 | ''' | ||
| 35 | Return a dict with all the meta of the file | ||
| 36 | ''' | ||
| 34 | metadata = {} | 37 | metadata = {} |
| 35 | for field in self.editor['root']: | 38 | for field in self.editor['root']: |
| 36 | if self._should_remove(field): | 39 | if self._should_remove(field): |
diff --git a/lib/office.py b/lib/office.py index 966a64d..2320e40 100644 --- a/lib/office.py +++ b/lib/office.py | |||
| @@ -3,14 +3,9 @@ | |||
| 3 | ''' | 3 | ''' |
| 4 | 4 | ||
| 5 | import os | 5 | import os |
| 6 | import mimetypes | ||
| 7 | import subprocess | ||
| 8 | import tempfile | ||
| 9 | import glob | ||
| 10 | import logging | 6 | import logging |
| 11 | import zipfile | 7 | import zipfile |
| 12 | import re | 8 | import re |
| 13 | import shutil | ||
| 14 | from xml.etree import ElementTree | 9 | from xml.etree import ElementTree |
| 15 | 10 | ||
| 16 | try: | 11 | try: |
| @@ -103,13 +98,13 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): | |||
| 103 | zipin.getinfo('meta.xml') | 98 | zipin.getinfo('meta.xml') |
| 104 | return False | 99 | return False |
| 105 | except KeyError: # no meta.xml in the file | 100 | except KeyError: # no meta.xml in the file |
| 106 | zipin.close() | 101 | zipin.close() |
| 107 | czf = archive.ZipStripper(self.filename, self.parser, | 102 | czf = archive.ZipStripper(self.filename, self.parser, |
| 108 | 'application/zip', self.backup, self.add2archive) | 103 | 'application/zip', self.backup, self.add2archive) |
| 109 | if czf.is_clean(): | 104 | if czf.is_clean(): |
| 110 | return True | 105 | return True |
| 111 | else: | 106 | else: |
| 112 | return False | 107 | return False |
| 113 | return True | 108 | return True |
| 114 | 109 | ||
| 115 | 110 | ||
| @@ -172,7 +167,7 @@ class PdfStripper(parser.GenericParser): | |||
| 172 | ''' | 167 | ''' |
| 173 | Return a dict with all the meta of the file | 168 | Return a dict with all the meta of the file |
| 174 | ''' | 169 | ''' |
| 175 | metadata={} | 170 | metadata = {} |
| 176 | for key in self.meta_list: | 171 | for key in self.meta_list: |
| 177 | if key == 'creation-date' or key == 'mod-date': | 172 | if key == 'creation-date' or key == 'mod-date': |
| 178 | #creation and modification are set to -1 | 173 | #creation and modification are set to -1 |
| @@ -181,5 +176,5 @@ class PdfStripper(parser.GenericParser): | |||
| 181 | else: | 176 | else: |
| 182 | if self.document.get_property(key) is not None and \ | 177 | if self.document.get_property(key) is not None and \ |
| 183 | self.document.get_property(key) != '': | 178 | self.document.get_property(key) != '': |
| 184 | metadata[key] = self.document.get_property(key) | 179 | metadata[key] = self.document.get_property(key) |
| 185 | return metadata | 180 | return metadata |
diff --git a/lib/parser.py b/lib/parser.py index 385dd78..044ef0a 100644 --- a/lib/parser.py +++ b/lib/parser.py | |||
| @@ -6,7 +6,6 @@ import hachoir_core | |||
| 6 | import hachoir_editor | 6 | import hachoir_editor |
| 7 | 7 | ||
| 8 | import os | 8 | import os |
| 9 | import mimetypes | ||
| 10 | 9 | ||
| 11 | import mat | 10 | import mat |
| 12 | 11 | ||
| @@ -14,6 +13,9 @@ NOMETA = ('.bmp', 'html', '.py', '.rdf', '.txt', '.xml') | |||
| 14 | 13 | ||
| 15 | 14 | ||
| 16 | class GenericParser(object): | 15 | class GenericParser(object): |
| 16 | ''' | ||
| 17 | Parent class of all parsers | ||
| 18 | ''' | ||
| 17 | def __init__(self, filename, parser, mime, backup, add2archive): | 19 | def __init__(self, filename, parser, mime, backup, add2archive): |
| 18 | self.filename = '' | 20 | self.filename = '' |
| 19 | self.parser = parser | 21 | self.parser = parser |
| @@ -30,7 +32,6 @@ class GenericParser(object): | |||
| 30 | self.basename = os.path.basename(filename) # only filename | 32 | self.basename = os.path.basename(filename) # only filename |
| 31 | 33 | ||
| 32 | 34 | ||
| 33 | |||
| 34 | def is_clean(self): | 35 | def is_clean(self): |
| 35 | ''' | 36 | ''' |
| 36 | Check if the file is clean from harmful metadatas | 37 | Check if the file is clean from harmful metadatas |
| @@ -68,7 +69,7 @@ class GenericParser(object): | |||
| 68 | 69 | ||
| 69 | def get_meta(self): | 70 | def get_meta(self): |
| 70 | ''' | 71 | ''' |
| 71 | return a dict with all the meta of the file | 72 | Return a dict with all the meta of the file |
| 72 | ''' | 73 | ''' |
| 73 | metadata = {} | 74 | metadata = {} |
| 74 | for field in self.editor: | 75 | for field in self.editor: |
diff --git a/test/clitest.py b/test/clitest.py index f71a622..453333a 100644 --- a/test/clitest.py +++ b/test/clitest.py | |||
| @@ -8,7 +8,6 @@ import subprocess | |||
| 8 | import sys | 8 | import sys |
| 9 | 9 | ||
| 10 | sys.path.append('..') | 10 | sys.path.append('..') |
| 11 | #import cli | ||
| 12 | from lib import mat | 11 | from lib import mat |
| 13 | import test | 12 | import test |
| 14 | 13 | ||
| @@ -57,7 +56,6 @@ class TestisCleancli(test.MATTest): | |||
| 57 | ''' | 56 | ''' |
| 58 | check if cli.py correctly check if a file is clean or not | 57 | check if cli.py correctly check if a file is clean or not |
| 59 | ''' | 58 | ''' |
| 60 | #FIXME : use an external file with string as const ? | ||
| 61 | def test_clean(self): | 59 | def test_clean(self): |
| 62 | '''test is_clean on clean files''' | 60 | '''test is_clean on clean files''' |
| 63 | for clean, _ in self.file_list: | 61 | for clean, _ in self.file_list: |
diff --git a/test/test.py b/test/test.py index ac63f60..227170b 100644 --- a/test/test.py +++ b/test/test.py | |||
| @@ -12,32 +12,28 @@ import sys | |||
| 12 | import tempfile | 12 | import tempfile |
| 13 | import unittest | 13 | import unittest |
| 14 | 14 | ||
| 15 | sys.path.append('..') | ||
| 16 | from lib import mat | ||
| 17 | |||
| 18 | VERBOSITY = 3 | 15 | VERBOSITY = 3 |
| 19 | FILE_LIST = zip(glob.glob('clean*'), glob.glob('dirty*')) | 16 | FILE_LIST = zip(glob.glob('clean*'), glob.glob('dirty*')) |
| 20 | 17 | ||
| 21 | class MATTest(unittest.TestCase): | 18 | class MATTest(unittest.TestCase): |
| 22 | def setUp(self): | ||
| 23 | ''' | ||
| 24 | Create working copy of the clean and the dirty file in the TMP dir | ||
| 25 | ''' | 19 | ''' |
| 20 | Parent class of all test-functions | ||
| 21 | ''' | ||
| 22 | def setUp(self): | ||
| 23 | ''' | ||
| 24 | Create working copy of the clean and the dirty file in the TMP dir | ||
| 25 | ''' | ||
| 26 | self.file_list = [] | 26 | self.file_list = [] |
| 27 | self.tmpdir = tempfile.mkdtemp() | 27 | self.tmpdir = tempfile.mkdtemp() |
| 28 | 28 | ||
| 29 | for clean, dirty in FILE_LIST: | 29 | for clean, dirty in FILE_LIST: |
| 30 | shutil.copy2(clean, self.tmpdir + os.sep + clean) | 30 | shutil.copy2(clean, self.tmpdir + os.sep + clean) |
| 31 | shutil.copy2(dirty, self.tmpdir + os.sep + dirty) | 31 | shutil.copy2(dirty, self.tmpdir + os.sep + dirty) |
| 32 | |||
| 33 | self.file_list.append((self.tmpdir + os.sep + clean, | 32 | self.file_list.append((self.tmpdir + os.sep + clean, |
| 34 | self.tmpdir + os.sep + dirty)) | 33 | self.tmpdir + os.sep + dirty)) |
| 35 | 34 | ||
| 36 | def tearDown(self): | 35 | def tearDown(self): |
| 37 | ''' | 36 | ''' |
| 38 | Remove the tmp folder | 37 | Remove the tmp folder |
| 39 | ''' | 38 | ''' |
| 40 | for clean, dirty in self.file_list: | ||
| 41 | mat.secure_remove(clean) | ||
| 42 | mat.secure_remove(dirty) | ||
| 43 | shutil.rmtree(self.tmpdir) | 39 | shutil.rmtree(self.tmpdir) |
