diff options
| author | jvoisin | 2013-10-27 23:01:20 +0000 |
|---|---|---|
| committer | jvoisin | 2013-10-27 23:01:20 +0000 |
| commit | 4c81e731a485d3ea84049ef6d568153c8b10e90b (patch) | |
| tree | 86ad43d7df67ed8d27cfbe7ff60dda1545784845 /MAT | |
| parent | 6f21743fdae533d7a94f64fb03d706fb342aff01 (diff) | |
Improves documentation
Diffstat (limited to 'MAT')
| -rw-r--r-- | MAT/archive.py | 25 | ||||
| -rw-r--r-- | MAT/audio.py | 21 | ||||
| -rw-r--r-- | MAT/images.py | 45 | ||||
| -rw-r--r-- | MAT/mat.py | 50 | ||||
| -rw-r--r-- | MAT/mutagenstripper.py | 4 | ||||
| -rw-r--r-- | MAT/office.py | 30 | ||||
| -rw-r--r-- | MAT/parser.py | 33 | ||||
| -rw-r--r-- | MAT/strippers.py | 11 |
8 files changed, 102 insertions, 117 deletions
diff --git a/MAT/archive.py b/MAT/archive.py index 447f068..f07e18c 100644 --- a/MAT/archive.py +++ b/MAT/archive.py | |||
| @@ -1,21 +1,19 @@ | |||
| 1 | ''' | 1 | ''' Take care of archives formats |
| 2 | Take care of archives formats | ||
| 3 | ''' | 2 | ''' |
| 4 | 3 | ||
| 5 | import zipfile | ||
| 6 | import shutil | ||
| 7 | import os | ||
| 8 | import logging | 4 | import logging |
| 5 | import os | ||
| 6 | import shutil | ||
| 9 | import tempfile | 7 | import tempfile |
| 8 | import zipfile | ||
| 10 | 9 | ||
| 11 | import parser | ||
| 12 | import mat | 10 | import mat |
| 11 | import parser | ||
| 13 | import tarfile | 12 | import tarfile |
| 14 | 13 | ||
| 15 | 14 | ||
| 16 | class GenericArchiveStripper(parser.GenericParser): | 15 | class GenericArchiveStripper(parser.GenericParser): |
| 17 | ''' | 16 | ''' Represent a generic archive |
| 18 | Represent a generic archive | ||
| 19 | ''' | 17 | ''' |
| 20 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | 18 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): |
| 21 | super(GenericArchiveStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) | 19 | super(GenericArchiveStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) |
| @@ -24,8 +22,7 @@ class GenericArchiveStripper(parser.GenericParser): | |||
| 24 | self.tempdir = tempfile.mkdtemp() | 22 | self.tempdir = tempfile.mkdtemp() |
| 25 | 23 | ||
| 26 | def __del__(self): | 24 | def __del__(self): |
| 27 | ''' | 25 | ''' Remove the files inside the temp dir, |
| 28 | Remove the files inside the temp dir, | ||
| 29 | then remove the temp dir | 26 | then remove the temp dir |
| 30 | ''' | 27 | ''' |
| 31 | for root, dirs, files in os.walk(self.tempdir): | 28 | for root, dirs, files in os.walk(self.tempdir): |
| @@ -35,16 +32,16 @@ class GenericArchiveStripper(parser.GenericParser): | |||
| 35 | shutil.rmtree(self.tempdir) | 32 | shutil.rmtree(self.tempdir) |
| 36 | 33 | ||
| 37 | def remove_all(self): | 34 | def remove_all(self): |
| 35 | ''' Virtual method to remove all metadata | ||
| 36 | ''' | ||
| 38 | raise NotImplementedError | 37 | raise NotImplementedError |
| 39 | 38 | ||
| 40 | 39 | ||
| 41 | class ZipStripper(GenericArchiveStripper): | 40 | class ZipStripper(GenericArchiveStripper): |
| 42 | ''' | 41 | ''' Represent a zip file |
| 43 | Represent a zip file | ||
| 44 | ''' | 42 | ''' |
| 45 | def is_file_clean(self, fileinfo): | 43 | def is_file_clean(self, fileinfo): |
| 46 | ''' | 44 | ''' Check if a ZipInfo object is clean of metadatas added |
| 47 | Check if a ZipInfo object is clean of metadatas added | ||
| 48 | by zip itself, independently of the corresponding file metadatas | 45 | by zip itself, independently of the corresponding file metadatas |
| 49 | ''' | 46 | ''' |
| 50 | if fileinfo.comment: | 47 | if fileinfo.comment: |
diff --git a/MAT/audio.py b/MAT/audio.py index 3c6c7bc..dae9d75 100644 --- a/MAT/audio.py +++ b/MAT/audio.py | |||
| @@ -1,5 +1,4 @@ | |||
| 1 | ''' | 1 | ''' Care about audio fileformat |
| 2 | Care about audio fileformat | ||
| 3 | ''' | 2 | ''' |
| 4 | 3 | ||
| 5 | try: | 4 | try: |
| @@ -13,31 +12,27 @@ import mutagenstripper | |||
| 13 | 12 | ||
| 14 | 13 | ||
| 15 | class MpegAudioStripper(parser.GenericParser): | 14 | class MpegAudioStripper(parser.GenericParser): |
| 16 | ''' | 15 | ''' Represent mpeg audio file (mp3, ...) |
| 17 | Represent mpeg audio file (mp3, ...) | ||
| 18 | ''' | 16 | ''' |
| 19 | def _should_remove(self, field): | 17 | def _should_remove(self, field): |
| 20 | return field.name in ("id3v1", "id3v2") | 18 | return field.name in ("id3v1", "id3v2") |
| 21 | 19 | ||
| 22 | 20 | ||
| 23 | class OggStripper(mutagenstripper.MutagenStripper): | 21 | class OggStripper(mutagenstripper.MutagenStripper): |
| 24 | ''' | 22 | ''' Represent an ogg vorbis file |
| 25 | Represent an ogg vorbis file | ||
| 26 | ''' | 23 | ''' |
| 27 | def _create_mfile(self): | 24 | def _create_mfile(self): |
| 28 | self.mfile = OggVorbis(self.filename) | 25 | self.mfile = OggVorbis(self.filename) |
| 29 | 26 | ||
| 30 | 27 | ||
| 31 | class FlacStripper(mutagenstripper.MutagenStripper): | 28 | class FlacStripper(mutagenstripper.MutagenStripper): |
| 32 | ''' | 29 | ''' Represent a Flac audio file |
| 33 | Represent a Flac audio file | ||
| 34 | ''' | 30 | ''' |
| 35 | def _create_mfile(self): | 31 | def _create_mfile(self): |
| 36 | self.mfile = FLAC(self.filename) | 32 | self.mfile = FLAC(self.filename) |
| 37 | 33 | ||
| 38 | def remove_all(self): | 34 | def remove_all(self): |
| 39 | ''' | 35 | ''' Remove the "metadata" block from the file |
| 40 | Remove the "metadata" block from the file | ||
| 41 | ''' | 36 | ''' |
| 42 | super(FlacStripper, self).remove_all() | 37 | super(FlacStripper, self).remove_all() |
| 43 | self.mfile.clear_pictures() | 38 | self.mfile.clear_pictures() |
| @@ -45,14 +40,12 @@ class FlacStripper(mutagenstripper.MutagenStripper): | |||
| 45 | return True | 40 | return True |
| 46 | 41 | ||
| 47 | def is_clean(self): | 42 | def is_clean(self): |
| 48 | ''' | 43 | ''' Check if the "metadata" block is present in the file |
| 49 | Check if the "metadata" block is present in the file | ||
| 50 | ''' | 44 | ''' |
| 51 | return super(FlacStripper, self).is_clean() and not self.mfile.pictures | 45 | return super(FlacStripper, self).is_clean() and not self.mfile.pictures |
| 52 | 46 | ||
| 53 | def get_meta(self): | 47 | def get_meta(self): |
| 54 | ''' | 48 | ''' Return the content of the metadata block if present |
| 55 | Return the content of the metadata block if present | ||
| 56 | ''' | 49 | ''' |
| 57 | metadata = super(FlacStripper, self).get_meta() | 50 | metadata = super(FlacStripper, self).get_meta() |
| 58 | if self.mfile.pictures: | 51 | if self.mfile.pictures: |
diff --git a/MAT/images.py b/MAT/images.py index 55c1a90..dc96e6a 100644 --- a/MAT/images.py +++ b/MAT/images.py | |||
| @@ -1,41 +1,52 @@ | |||
| 1 | ''' | 1 | ''' Takes care about pictures formats |
| 2 | Takes care about pictures formats | 2 | |
| 3 | References: | ||
| 4 | - JFIF: http://www.ecma-international.org/publications/techreports/E-TR-098.htm | ||
| 5 | - PNG: http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html | ||
| 6 | - PNG: http://www.w3.org/TR/PNG-Chunks.html | ||
| 3 | ''' | 7 | ''' |
| 4 | 8 | ||
| 5 | import parser | 9 | import parser |
| 6 | 10 | ||
| 7 | 11 | ||
| 8 | class JpegStripper(parser.GenericParser): | 12 | class JpegStripper(parser.GenericParser): |
| 9 | ''' | 13 | ''' Represents a jpeg file. |
| 10 | represents a jpeg file | 14 | Custom Huffman and Quantization tables |
| 15 | are stripped: they may leak | ||
| 16 | some info, and the quality loss is minor. | ||
| 11 | ''' | 17 | ''' |
| 12 | def _should_remove(self, field): | 18 | def _should_remove(self, field): |
| 19 | ''' Return True if the field is compromising | ||
| 13 | ''' | 20 | ''' |
| 14 | return True if the field is compromising | 21 | field_list = frozenset([ |
| 15 | ''' | 22 | 'start_image', # start of the image |
| 16 | field_list = frozenset(['start_image', 'app0', 'start_frame', | 23 | 'app0', # JFIF data |
| 17 | 'start_scan', 'data', 'end_image']) | 24 | 'start_frame', # specify width, height, number of components |
| 25 | 'start_scan', # specify which slice of data the top-to-bottom scan contains | ||
| 26 | 'data', # actual data | ||
| 27 | 'end_image']) # end of the image | ||
| 18 | if field.name in field_list: | 28 | if field.name in field_list: |
| 19 | return False | 29 | return False |
| 20 | elif field.name.startswith('quantization['): | 30 | elif field.name.startswith('quantization['): # custom Quant. tables |
| 21 | return False | 31 | return False |
| 22 | elif field.name.startswith('huffman['): | 32 | elif field.name.startswith('huffman['): # custom Huffman tables |
| 23 | return False | 33 | return False |
| 24 | return True | 34 | return True |
| 25 | 35 | ||
| 26 | 36 | ||
| 27 | class PngStripper(parser.GenericParser): | 37 | class PngStripper(parser.GenericParser): |
| 28 | ''' | 38 | ''' Represents a png file |
| 29 | represents a png file | ||
| 30 | see : http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html | ||
| 31 | ''' | 39 | ''' |
| 32 | def _should_remove(self, field): | 40 | def _should_remove(self, field): |
| 41 | ''' Return True if the field is compromising | ||
| 33 | ''' | 42 | ''' |
| 34 | return True if the field is compromising | 43 | field_list = frozenset([ |
| 35 | ''' | 44 | 'id', |
| 36 | field_list = frozenset(['id', 'header', 'physical', 'end']) | 45 | 'header', # PNG header |
| 46 | 'physical', # the intended pixel size or aspect ratio | ||
| 47 | 'end']) # end of the image | ||
| 37 | if field.name in field_list: | 48 | if field.name in field_list: |
| 38 | return False | 49 | return False |
| 39 | if field.name.startswith('data['): | 50 | if field.name.startswith('data['): # data |
| 40 | return False | 51 | return False |
| 41 | return True | 52 | return True |
| @@ -1,13 +1,12 @@ | |||
| 1 | #!/usr/bin/env python | 1 | #!/usr/bin/env python |
| 2 | 2 | ||
| 3 | ''' | 3 | ''' Metadata anonymisation toolkit library |
| 4 | Metadata anonymisation toolkit library | ||
| 5 | ''' | 4 | ''' |
| 6 | 5 | ||
| 7 | import os | ||
| 8 | import subprocess | ||
| 9 | import logging | 6 | import logging |
| 10 | import mimetypes | 7 | import mimetypes |
| 8 | import os | ||
| 9 | import subprocess | ||
| 11 | import xml.sax | 10 | import xml.sax |
| 12 | 11 | ||
| 13 | import hachoir_core.cmd_line | 12 | import hachoir_core.cmd_line |
| @@ -33,6 +32,8 @@ logging.basicConfig(filename=fname, level=LOGGING_LEVEL) | |||
| 33 | import strippers # this is loaded here because we need LOGGING_LEVEL | 32 | import strippers # this is loaded here because we need LOGGING_LEVEL |
| 34 | 33 | ||
| 35 | def get_logo(): | 34 | def get_logo(): |
| 35 | ''' Return the path to the logo | ||
| 36 | ''' | ||
| 36 | if os.path.isfile('./data/mat.png'): | 37 | if os.path.isfile('./data/mat.png'): |
| 37 | return './data/mat.png' | 38 | return './data/mat.png' |
| 38 | elif os.path.isfile('/usr/share/pixmaps/mat.png'): | 39 | elif os.path.isfile('/usr/share/pixmaps/mat.png'): |
| @@ -41,6 +42,8 @@ def get_logo(): | |||
| 41 | return '/usr/local/share/pixmaps/mat.png' | 42 | return '/usr/local/share/pixmaps/mat.png' |
| 42 | 43 | ||
| 43 | def get_datadir(): | 44 | def get_datadir(): |
| 45 | ''' Return the path to the data directory | ||
| 46 | ''' | ||
| 44 | if os.path.isdir('./data/'): | 47 | if os.path.isdir('./data/'): |
| 45 | return './data/' | 48 | return './data/' |
| 46 | elif os.path.isdir('/usr/local/share/mat/'): | 49 | elif os.path.isdir('/usr/local/share/mat/'): |
| @@ -49,8 +52,9 @@ def get_datadir(): | |||
| 49 | return '/usr/share/mat/' | 52 | return '/usr/share/mat/' |
| 50 | 53 | ||
| 51 | def list_supported_formats(): | 54 | def list_supported_formats(): |
| 52 | ''' | 55 | ''' Return a list of all locally supported fileformat. |
| 53 | Return a list of all locally supported fileformat | 56 | It parses that FORMATS file, and removes locally |
| 57 | non-supported formats. | ||
| 54 | ''' | 58 | ''' |
| 55 | handler = XMLParser() | 59 | handler = XMLParser() |
| 56 | parser = xml.sax.make_parser() | 60 | parser = xml.sax.make_parser() |
| @@ -67,8 +71,7 @@ def list_supported_formats(): | |||
| 67 | return localy_supported | 71 | return localy_supported |
| 68 | 72 | ||
| 69 | class XMLParser(xml.sax.handler.ContentHandler): | 73 | class XMLParser(xml.sax.handler.ContentHandler): |
| 70 | ''' | 74 | ''' Parse the supported format xml, and return a corresponding |
| 71 | Parse the supported format xml, and return a corresponding | ||
| 72 | list of dict | 75 | list of dict |
| 73 | ''' | 76 | ''' |
| 74 | def __init__(self): | 77 | def __init__(self): |
| @@ -78,18 +81,16 @@ class XMLParser(xml.sax.handler.ContentHandler): | |||
| 78 | self.between = False | 81 | self.between = False |
| 79 | 82 | ||
| 80 | def startElement(self, name, attrs): | 83 | def startElement(self, name, attrs): |
| 81 | ''' | 84 | ''' Called when entering into xml tag |
| 82 | Called when entering into xml tag | ||
| 83 | ''' | 85 | ''' |
| 84 | self.between = True | 86 | self.between = True |
| 85 | self.key = name | 87 | self.key = name |
| 86 | self.content = '' | 88 | self.content = '' |
| 87 | 89 | ||
| 88 | def endElement(self, name): | 90 | def endElement(self, name): |
| 91 | ''' Called when exiting a xml tag | ||
| 89 | ''' | 92 | ''' |
| 90 | Called when exiting a xml tag | 93 | if name == 'format': # leaving a fileformat section |
| 91 | ''' | ||
| 92 | if name == 'format': # exiting a fileformat section | ||
| 93 | self.list.append(self.dict.copy()) | 94 | self.list.append(self.dict.copy()) |
| 94 | self.dict.clear() | 95 | self.dict.clear() |
| 95 | else: | 96 | else: |
| @@ -98,19 +99,17 @@ class XMLParser(xml.sax.handler.ContentHandler): | |||
| 98 | self.between = False | 99 | self.between = False |
| 99 | 100 | ||
| 100 | def characters(self, characters): | 101 | def characters(self, characters): |
| 101 | ''' | 102 | ''' Concatenate the content between opening and closing tags |
| 102 | Concatenate the content between opening and closing tags | ||
| 103 | ''' | 103 | ''' |
| 104 | if self.between: | 104 | if self.between: |
| 105 | self.content += characters | 105 | self.content += characters |
| 106 | 106 | ||
| 107 | 107 | ||
| 108 | def secure_remove(filename): | 108 | def secure_remove(filename): |
| 109 | ''' | 109 | ''' Securely remove the file |
| 110 | securely remove the file | ||
| 111 | ''' | 110 | ''' |
| 112 | try: | 111 | try: |
| 113 | if subprocess.call(['shred', '--remove', filename]) == 0: | 112 | if not subprocess.call(['shred', '--remove', filename]): |
| 114 | return True | 113 | return True |
| 115 | else: | 114 | else: |
| 116 | raise OSError | 115 | raise OSError |
| @@ -126,22 +125,17 @@ def secure_remove(filename): | |||
| 126 | 125 | ||
| 127 | 126 | ||
| 128 | def create_class_file(name, backup, **kwargs): | 127 | def create_class_file(name, backup, **kwargs): |
| 129 | ''' | 128 | ''' Return a $FILETYPEStripper() class, |
| 130 | return a $FILETYPEStripper() class, | ||
| 131 | corresponding to the filetype of the given file | 129 | corresponding to the filetype of the given file |
| 132 | ''' | 130 | ''' |
| 133 | if not os.path.isfile(name): | 131 | if not os.path.isfile(name): # check if the file exists |
| 134 | # check if the file exists | ||
| 135 | logging.error('%s is not a valid file' % name) | 132 | logging.error('%s is not a valid file' % name) |
| 136 | return None | 133 | return None |
| 137 | 134 | ||
| 138 | if not os.access(name, os.R_OK): | 135 | if not os.access(name, os.R_OK): #check read permissions |
| 139 | #check read permissions | ||
| 140 | logging.error('%s is is not readable' % name) | 136 | logging.error('%s is is not readable' % name) |
| 141 | return None | 137 | return None |
| 142 | 138 | ||
| 143 | is_writable = os.access(name, os.W_OK) | ||
| 144 | |||
| 145 | if not os.path.getsize(name): | 139 | if not os.path.getsize(name): |
| 146 | #check if the file is not empty (hachoir crash on empty files) | 140 | #check if the file is not empty (hachoir crash on empty files) |
| 147 | logging.error('%s is empty' % name) | 141 | logging.error('%s is empty' % name) |
| @@ -161,7 +155,7 @@ def create_class_file(name, backup, **kwargs): | |||
| 161 | mime = parser.mime_type | 155 | mime = parser.mime_type |
| 162 | 156 | ||
| 163 | if mime == 'application/zip': # some formats are zipped stuff | 157 | if mime == 'application/zip': # some formats are zipped stuff |
| 164 | if mimetypes.guess_type(name)[0] is not None: | 158 | if mimetypes.guess_type(name)[0]: |
| 165 | mime = mimetypes.guess_type(name)[0] | 159 | mime = mimetypes.guess_type(name)[0] |
| 166 | 160 | ||
| 167 | if mime.startswith('application/vnd.oasis.opendocument'): | 161 | if mime.startswith('application/vnd.oasis.opendocument'): |
| @@ -169,6 +163,8 @@ def create_class_file(name, backup, **kwargs): | |||
| 169 | elif mime.startswith('application/vnd.openxmlformats-officedocument'): | 163 | elif mime.startswith('application/vnd.openxmlformats-officedocument'): |
| 170 | mime = 'application/officeopenxml' # office openxml | 164 | mime = 'application/officeopenxml' # office openxml |
| 171 | 165 | ||
| 166 | is_writable = os.access(name, os.W_OK) | ||
| 167 | |||
| 172 | try: | 168 | try: |
| 173 | stripper_class = strippers.STRIPPERS[mime] | 169 | stripper_class = strippers.STRIPPERS[mime] |
| 174 | except KeyError: | 170 | except KeyError: |
diff --git a/MAT/mutagenstripper.py b/MAT/mutagenstripper.py index ebc6b91..403c9a7 100644 --- a/MAT/mutagenstripper.py +++ b/MAT/mutagenstripper.py | |||
| @@ -1,5 +1,7 @@ | |||
| 1 | ''' Take care of mutagen-supported formats (audio) | ||
| 2 | ''' | ||
| 3 | |||
| 1 | import parser | 4 | import parser |
| 2 | import shutil | ||
| 3 | 5 | ||
| 4 | 6 | ||
| 5 | class MutagenStripper(parser.GenericParser): | 7 | class MutagenStripper(parser.GenericParser): |
diff --git a/MAT/office.py b/MAT/office.py index 583e0f9..91e49be 100644 --- a/MAT/office.py +++ b/MAT/office.py | |||
| @@ -1,5 +1,4 @@ | |||
| 1 | ''' | 1 | ''' Care about office's formats |
| 2 | Care about office's formats | ||
| 3 | ''' | 2 | ''' |
| 4 | 3 | ||
| 5 | import os | 4 | import os |
| @@ -23,14 +22,12 @@ import archive | |||
| 23 | 22 | ||
| 24 | 23 | ||
| 25 | class OpenDocumentStripper(archive.GenericArchiveStripper): | 24 | class OpenDocumentStripper(archive.GenericArchiveStripper): |
| 26 | ''' | 25 | ''' An open document file is a zip, with xml file into. |
| 27 | An open document file is a zip, with xml file into. | ||
| 28 | The one that interest us is meta.xml | 26 | The one that interest us is meta.xml |
| 29 | ''' | 27 | ''' |
| 30 | 28 | ||
| 31 | def get_meta(self): | 29 | def get_meta(self): |
| 32 | ''' | 30 | ''' Return a dict with all the meta of the file by |
| 33 | Return a dict with all the meta of the file by | ||
| 34 | trying to read the meta.xml file. | 31 | trying to read the meta.xml file. |
| 35 | ''' | 32 | ''' |
| 36 | zipin = zipfile.ZipFile(self.filename, 'r') | 33 | zipin = zipfile.ZipFile(self.filename, 'r') |
| @@ -103,8 +100,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): | |||
| 103 | return True | 100 | return True |
| 104 | 101 | ||
| 105 | def is_clean(self): | 102 | def is_clean(self): |
| 106 | ''' | 103 | ''' Check if the file is clean from harmful metadatas |
| 107 | Check if the file is clean from harmful metadatas | ||
| 108 | ''' | 104 | ''' |
| 109 | zipin = zipfile.ZipFile(self.filename, 'r') | 105 | zipin = zipfile.ZipFile(self.filename, 'r') |
| 110 | try: | 106 | try: |
| @@ -120,8 +116,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): | |||
| 120 | 116 | ||
| 121 | 117 | ||
| 122 | class PdfStripper(parser.GenericParser): | 118 | class PdfStripper(parser.GenericParser): |
| 123 | ''' | 119 | ''' Represent a PDF file |
| 124 | Represent a PDF file | ||
| 125 | ''' | 120 | ''' |
| 126 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | 121 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): |
| 127 | super(PdfStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) | 122 | super(PdfStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) |
| @@ -137,8 +132,7 @@ class PdfStripper(parser.GenericParser): | |||
| 137 | 'producer', 'metadata']) | 132 | 'producer', 'metadata']) |
| 138 | 133 | ||
| 139 | def is_clean(self): | 134 | def is_clean(self): |
| 140 | ''' | 135 | ''' Check if the file is clean from harmful metadatas |
| 141 | Check if the file is clean from harmful metadatas | ||
| 142 | ''' | 136 | ''' |
| 143 | for key in self.meta_list: | 137 | for key in self.meta_list: |
| 144 | if self.document.get_property(key): | 138 | if self.document.get_property(key): |
| @@ -146,8 +140,7 @@ class PdfStripper(parser.GenericParser): | |||
| 146 | return True | 140 | return True |
| 147 | 141 | ||
| 148 | def remove_all(self): | 142 | def remove_all(self): |
| 149 | ''' | 143 | ''' Opening the PDF with poppler, then doing a render |
| 150 | Opening the PDF with poppler, then doing a render | ||
| 151 | on a cairo pdfsurface for each pages. | 144 | on a cairo pdfsurface for each pages. |
| 152 | 145 | ||
| 153 | http://cairographics.org/documentation/pycairo/2/ | 146 | http://cairographics.org/documentation/pycairo/2/ |
| @@ -195,8 +188,7 @@ pdfrw' % self.output) | |||
| 195 | return True | 188 | return True |
| 196 | 189 | ||
| 197 | def get_meta(self): | 190 | def get_meta(self): |
| 198 | ''' | 191 | ''' Return a dict with all the meta of the file |
| 199 | Return a dict with all the meta of the file | ||
| 200 | ''' | 192 | ''' |
| 201 | metadata = {} | 193 | metadata = {} |
| 202 | for key in self.meta_list: | 194 | for key in self.meta_list: |
| @@ -252,8 +244,7 @@ class OpenXmlStripper(archive.GenericArchiveStripper): | |||
| 252 | return True | 244 | return True |
| 253 | 245 | ||
| 254 | def is_clean(self): | 246 | def is_clean(self): |
| 255 | ''' | 247 | ''' Check if the file is clean from harmful metadatas |
| 256 | Check if the file is clean from harmful metadatas | ||
| 257 | ''' | 248 | ''' |
| 258 | zipin = zipfile.ZipFile(self.filename, 'r') | 249 | zipin = zipfile.ZipFile(self.filename, 'r') |
| 259 | for item in zipin.namelist(): | 250 | for item in zipin.namelist(): |
| @@ -265,8 +256,7 @@ class OpenXmlStripper(archive.GenericArchiveStripper): | |||
| 265 | return czf.is_clean() | 256 | return czf.is_clean() |
| 266 | 257 | ||
| 267 | def get_meta(self): | 258 | def get_meta(self): |
| 268 | ''' | 259 | ''' Return a dict with all the meta of the file |
| 269 | Return a dict with all the meta of the file | ||
| 270 | ''' | 260 | ''' |
| 271 | zipin = zipfile.ZipFile(self.filename, 'r') | 261 | zipin = zipfile.ZipFile(self.filename, 'r') |
| 272 | metadata = {} | 262 | metadata = {} |
diff --git a/MAT/parser.py b/MAT/parser.py index c1c3f4c..ae07d7e 100644 --- a/MAT/parser.py +++ b/MAT/parser.py | |||
| @@ -1,22 +1,22 @@ | |||
| 1 | ''' Parent class of all parser | ||
| 1 | ''' | 2 | ''' |
| 2 | Parent class of all parser | ||
| 3 | ''' | ||
| 4 | |||
| 5 | import hachoir_core | ||
| 6 | import hachoir_editor | ||
| 7 | 3 | ||
| 8 | import os | 4 | import os |
| 9 | import tempfile | ||
| 10 | import shutil | 5 | import shutil |
| 6 | import tempfile | ||
| 7 | |||
| 8 | import hachoir_core | ||
| 9 | import hachoir_editor | ||
| 11 | 10 | ||
| 12 | import mat | 11 | import mat |
| 13 | 12 | ||
| 14 | NOMETA = frozenset(('.bmp', # image | 13 | NOMETA = frozenset(( |
| 15 | '.rdf', # text | 14 | '.bmp', # "raw" image |
| 16 | '.txt', # plain text | 15 | '.rdf', # text |
| 17 | '.xml', # formated text (XML) | 16 | '.txt', # plain text |
| 18 | '.rels', # openXML formated text | 17 | '.xml', # formated text (XML) |
| 19 | )) | 18 | '.rels', # openXML formated text |
| 19 | )) | ||
| 20 | 20 | ||
| 21 | FIELD = object() | 21 | FIELD = object() |
| 22 | 22 | ||
| @@ -92,8 +92,7 @@ class GenericParser(object): | |||
| 92 | del fieldset[field] | 92 | del fieldset[field] |
| 93 | 93 | ||
| 94 | def get_meta(self): | 94 | def get_meta(self): |
| 95 | ''' | 95 | ''' Return a dict with all the meta of the file |
| 96 | Return a dict with all the meta of the file | ||
| 97 | ''' | 96 | ''' |
| 98 | metadata = {} | 97 | metadata = {} |
| 99 | self._get_meta(self.editor, metadata) | 98 | self._get_meta(self.editor, metadata) |
| @@ -113,8 +112,7 @@ class GenericParser(object): | |||
| 113 | self._get_meta(field, None) | 112 | self._get_meta(field, None) |
| 114 | 113 | ||
| 115 | def _should_remove(self, key): | 114 | def _should_remove(self, key): |
| 116 | ''' | 115 | ''' Return True if the field is compromising |
| 117 | Return True if the field is compromising | ||
| 118 | abstract method | 116 | abstract method |
| 119 | ''' | 117 | ''' |
| 120 | raise NotImplementedError | 118 | raise NotImplementedError |
| @@ -125,8 +123,7 @@ class GenericParser(object): | |||
| 125 | shutil.copy2(self.filename, self.filename + '.bak') | 123 | shutil.copy2(self.filename, self.filename + '.bak') |
| 126 | 124 | ||
| 127 | def do_backup(self): | 125 | def do_backup(self): |
| 128 | ''' | 126 | ''' Keep a backup of the file if asked. |
| 129 | Keep a backup of the file if asked. | ||
| 130 | 127 | ||
| 131 | The process of double-renaming is not very elegant, | 128 | The process of double-renaming is not very elegant, |
| 132 | but it greatly simplify new strippers implementation. | 129 | but it greatly simplify new strippers implementation. |
diff --git a/MAT/strippers.py b/MAT/strippers.py index f6ae899..78113ff 100644 --- a/MAT/strippers.py +++ b/MAT/strippers.py | |||
| @@ -1,16 +1,15 @@ | |||
| 1 | ''' | 1 | ''' Manage which fileformat can be processed |
| 2 | Manage which fileformat can be processed | ||
| 3 | ''' | 2 | ''' |
| 4 | 3 | ||
| 5 | import images | 4 | import archive |
| 6 | import audio | 5 | import audio |
| 7 | import gi | 6 | import gi |
| 8 | import office | 7 | import images |
| 9 | import archive | 8 | import logging |
| 10 | import mat | 9 | import mat |
| 11 | import misc | 10 | import misc |
| 11 | import office | ||
| 12 | import subprocess | 12 | import subprocess |
| 13 | import logging | ||
| 14 | 13 | ||
| 15 | STRIPPERS = { | 14 | STRIPPERS = { |
| 16 | 'application/x-tar': archive.TarStripper, | 15 | 'application/x-tar': archive.TarStripper, |
