diff options
| author | jvoisin | 2011-07-30 21:47:31 +0200 |
|---|---|---|
| committer | jvoisin | 2011-07-30 21:47:31 +0200 |
| commit | 5715ba52f2238af513b0b87f4aa3c0158d2c84ba (patch) | |
| tree | e77829efd78b1473650a669546c3c11edc37932c /lib | |
| parent | c9cb00bdf702ce6663e78784de63dc6d35c3d875 (diff) | |
Documentation, and removal of unnecessary imports
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/archive.py | 15 | ||||
| -rw-r--r-- | lib/audio.py | 10 | ||||
| -rw-r--r-- | lib/images.py | 10 | ||||
| -rw-r--r-- | lib/mat.py | 2 | ||||
| -rw-r--r-- | lib/misc.py | 3 | ||||
| -rw-r--r-- | lib/office.py | 23 | ||||
| -rw-r--r-- | lib/parser.py | 7 |
7 files changed, 46 insertions, 24 deletions
diff --git a/lib/archive.py b/lib/archive.py index f9e4dba..108134c 100644 --- a/lib/archive.py +++ b/lib/archive.py | |||
| @@ -1,3 +1,6 @@ | |||
| 1 | ''' | ||
| 2 | Take care of archives formats | ||
| 3 | ''' | ||
| 1 | import tarfile | 4 | import tarfile |
| 2 | import zipfile | 5 | import zipfile |
| 3 | 6 | ||
| @@ -67,6 +70,9 @@ class ZipStripper(GenericArchiveStripper): | |||
| 67 | return True | 70 | return True |
| 68 | 71 | ||
| 69 | def is_clean(self): | 72 | def is_clean(self): |
| 73 | ''' | ||
| 74 | Check if the given file is clean from harmful metadata | ||
| 75 | ''' | ||
| 70 | zipin = zipfile.ZipFile(self.filename, 'r') | 76 | zipin = zipfile.ZipFile(self.filename, 'r') |
| 71 | if zipin.comment != '': | 77 | if zipin.comment != '': |
| 72 | logging.debug('%s has a comment' % self.filename) | 78 | logging.debug('%s has a comment' % self.filename) |
| @@ -154,6 +160,9 @@ harmless format' % item.filename) | |||
| 154 | 160 | ||
| 155 | 161 | ||
| 156 | class TarStripper(GenericArchiveStripper): | 162 | class TarStripper(GenericArchiveStripper): |
| 163 | ''' | ||
| 164 | Represent a tarfile archive | ||
| 165 | ''' | ||
| 157 | def _remove(self, current_file): | 166 | def _remove(self, current_file): |
| 158 | ''' | 167 | ''' |
| 159 | remove the meta added by tar itself to the file | 168 | remove the meta added by tar itself to the file |
| @@ -209,6 +218,9 @@ class TarStripper(GenericArchiveStripper): | |||
| 209 | return True | 218 | return True |
| 210 | 219 | ||
| 211 | def is_clean(self): | 220 | def is_clean(self): |
| 221 | ''' | ||
| 222 | Check if the file is clean from harmful metadatas | ||
| 223 | ''' | ||
| 212 | tarin = tarfile.open(self.filename, 'r' + self.compression) | 224 | tarin = tarfile.open(self.filename, 'r' + self.compression) |
| 213 | for item in tarin.getmembers(): | 225 | for item in tarin.getmembers(): |
| 214 | if not self.is_file_clean(item): | 226 | if not self.is_file_clean(item): |
| @@ -233,6 +245,9 @@ class TarStripper(GenericArchiveStripper): | |||
| 233 | return True | 245 | return True |
| 234 | 246 | ||
| 235 | def get_meta(self): | 247 | def get_meta(self): |
| 248 | ''' | ||
| 249 | Return a dict with all the meta of the file | ||
| 250 | ''' | ||
| 236 | tarin = tarfile.open(self.filename, 'r' + self.compression) | 251 | tarin = tarfile.open(self.filename, 'r' + self.compression) |
| 237 | metadata = {} | 252 | metadata = {} |
| 238 | for current_file in tarin.getmembers(): | 253 | for current_file in tarin.getmembers(): |
diff --git a/lib/audio.py b/lib/audio.py index 73030af..f1f53ff 100644 --- a/lib/audio.py +++ b/lib/audio.py | |||
| @@ -3,9 +3,9 @@ | |||
| 3 | ''' | 3 | ''' |
| 4 | try: | 4 | try: |
| 5 | from mutagen.flac import FLAC | 5 | from mutagen.flac import FLAC |
| 6 | from mutagen.apev2 import APEv2 | 6 | from mutagen.apev2 import APEv2File |
| 7 | from mutagen.oggvorbis import OggVorbis | 7 | from mutagen.oggvorbis import OggVorbis |
| 8 | except: | 8 | except ImportError: |
| 9 | pass | 9 | pass |
| 10 | 10 | ||
| 11 | 11 | ||
| @@ -70,7 +70,7 @@ class Apev2Stripper(parser.GenericParser): | |||
| 70 | shutil.copy2(self.filename, self.output) | 70 | shutil.copy2(self.filename, self.output) |
| 71 | self.filename = self.output | 71 | self.filename = self.output |
| 72 | 72 | ||
| 73 | mfile = APEv2(self.filename) | 73 | mfile = APEv2File(self.filename) |
| 74 | mfile.delete() | 74 | mfile.delete() |
| 75 | mfile.save() | 75 | mfile.save() |
| 76 | 76 | ||
| @@ -78,7 +78,7 @@ class Apev2Stripper(parser.GenericParser): | |||
| 78 | ''' | 78 | ''' |
| 79 | Check if the "metadata" block is present in the file | 79 | Check if the "metadata" block is present in the file |
| 80 | ''' | 80 | ''' |
| 81 | mfile = APEv2(self.filename) | 81 | mfile = APEv2File(self.filename) |
| 82 | if mfile.tags is None: | 82 | if mfile.tags is None: |
| 83 | return True | 83 | return True |
| 84 | else: | 84 | else: |
| @@ -89,7 +89,7 @@ class Apev2Stripper(parser.GenericParser): | |||
| 89 | Return the content of the metadata block if present | 89 | Return the content of the metadata block if present |
| 90 | ''' | 90 | ''' |
| 91 | metadata = {} | 91 | metadata = {} |
| 92 | mfile = APEv2(self.filename) | 92 | mfile = APEv2File(self.filename) |
| 93 | if mfile.tags is None: | 93 | if mfile.tags is None: |
| 94 | return metadata | 94 | return metadata |
| 95 | for key, value in mfile.tags: | 95 | for key, value in mfile.tags: |
diff --git a/lib/images.py b/lib/images.py index df3d256..9fa9999 100644 --- a/lib/images.py +++ b/lib/images.py | |||
| @@ -1,3 +1,7 @@ | |||
| 1 | ''' | ||
| 2 | Takes care about pictures formats | ||
| 3 | ''' | ||
| 4 | |||
| 1 | import parser | 5 | import parser |
| 2 | 6 | ||
| 3 | 7 | ||
| @@ -6,6 +10,9 @@ class JpegStripper(parser.GenericParser): | |||
| 6 | Represents a .jpeg file | 10 | Represents a .jpeg file |
| 7 | ''' | 11 | ''' |
| 8 | def _should_remove(self, field): | 12 | def _should_remove(self, field): |
| 13 | ''' | ||
| 14 | return True if the field is compromizing | ||
| 15 | ''' | ||
| 9 | if field.name.startswith('comment'): | 16 | if field.name.startswith('comment'): |
| 10 | return True | 17 | return True |
| 11 | elif field.name in ("photoshop", "exif", "adobe"): | 18 | elif field.name in ("photoshop", "exif", "adobe"): |
| @@ -19,6 +26,9 @@ class PngStripper(parser.GenericParser): | |||
| 19 | Represents a .png file | 26 | Represents a .png file |
| 20 | ''' | 27 | ''' |
| 21 | def _should_remove(self, field): | 28 | def _should_remove(self, field): |
| 29 | ''' | ||
| 30 | return True if the field is compromizing | ||
| 31 | ''' | ||
| 22 | if field.name.startswith("text["): | 32 | if field.name.startswith("text["): |
| 23 | return True | 33 | return True |
| 24 | elif field.name is "time": | 34 | elif field.name is "time": |
| @@ -7,7 +7,6 @@ | |||
| 7 | import os | 7 | import os |
| 8 | import subprocess | 8 | import subprocess |
| 9 | import logging | 9 | import logging |
| 10 | import mimetypes | ||
| 11 | 10 | ||
| 12 | import hachoir_core.cmd_line | 11 | import hachoir_core.cmd_line |
| 13 | import hachoir_parser | 12 | import hachoir_parser |
| @@ -102,7 +101,6 @@ def create_class_file(name, backup, add2archive): | |||
| 102 | return | 101 | return |
| 103 | 102 | ||
| 104 | mime = parser.mime_type | 103 | mime = parser.mime_type |
| 105 | print mime | ||
| 106 | 104 | ||
| 107 | if mime.startswith('application/vnd.oasis.opendocument'): | 105 | if mime.startswith('application/vnd.oasis.opendocument'): |
| 108 | mime = 'application/vnd.oasis.opendocument' # opendocument fileformat | 106 | mime = 'application/vnd.oasis.opendocument' # opendocument fileformat |
diff --git a/lib/misc.py b/lib/misc.py index f846388..acbaed8 100644 --- a/lib/misc.py +++ b/lib/misc.py | |||
| @@ -31,6 +31,9 @@ class TorrentStripper(parser.GenericParser): | |||
| 31 | return True | 31 | return True |
| 32 | 32 | ||
| 33 | def get_meta(self): | 33 | def get_meta(self): |
| 34 | ''' | ||
| 35 | Return a dict with all the meta of the file | ||
| 36 | ''' | ||
| 34 | metadata = {} | 37 | metadata = {} |
| 35 | for field in self.editor['root']: | 38 | for field in self.editor['root']: |
| 36 | if self._should_remove(field): | 39 | if self._should_remove(field): |
diff --git a/lib/office.py b/lib/office.py index 966a64d..2320e40 100644 --- a/lib/office.py +++ b/lib/office.py | |||
| @@ -3,14 +3,9 @@ | |||
| 3 | ''' | 3 | ''' |
| 4 | 4 | ||
| 5 | import os | 5 | import os |
| 6 | import mimetypes | ||
| 7 | import subprocess | ||
| 8 | import tempfile | ||
| 9 | import glob | ||
| 10 | import logging | 6 | import logging |
| 11 | import zipfile | 7 | import zipfile |
| 12 | import re | 8 | import re |
| 13 | import shutil | ||
| 14 | from xml.etree import ElementTree | 9 | from xml.etree import ElementTree |
| 15 | 10 | ||
| 16 | try: | 11 | try: |
| @@ -103,13 +98,13 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): | |||
| 103 | zipin.getinfo('meta.xml') | 98 | zipin.getinfo('meta.xml') |
| 104 | return False | 99 | return False |
| 105 | except KeyError: # no meta.xml in the file | 100 | except KeyError: # no meta.xml in the file |
| 106 | zipin.close() | 101 | zipin.close() |
| 107 | czf = archive.ZipStripper(self.filename, self.parser, | 102 | czf = archive.ZipStripper(self.filename, self.parser, |
| 108 | 'application/zip', self.backup, self.add2archive) | 103 | 'application/zip', self.backup, self.add2archive) |
| 109 | if czf.is_clean(): | 104 | if czf.is_clean(): |
| 110 | return True | 105 | return True |
| 111 | else: | 106 | else: |
| 112 | return False | 107 | return False |
| 113 | return True | 108 | return True |
| 114 | 109 | ||
| 115 | 110 | ||
| @@ -172,7 +167,7 @@ class PdfStripper(parser.GenericParser): | |||
| 172 | ''' | 167 | ''' |
| 173 | Return a dict with all the meta of the file | 168 | Return a dict with all the meta of the file |
| 174 | ''' | 169 | ''' |
| 175 | metadata={} | 170 | metadata = {} |
| 176 | for key in self.meta_list: | 171 | for key in self.meta_list: |
| 177 | if key == 'creation-date' or key == 'mod-date': | 172 | if key == 'creation-date' or key == 'mod-date': |
| 178 | #creation and modification are set to -1 | 173 | #creation and modification are set to -1 |
| @@ -181,5 +176,5 @@ class PdfStripper(parser.GenericParser): | |||
| 181 | else: | 176 | else: |
| 182 | if self.document.get_property(key) is not None and \ | 177 | if self.document.get_property(key) is not None and \ |
| 183 | self.document.get_property(key) != '': | 178 | self.document.get_property(key) != '': |
| 184 | metadata[key] = self.document.get_property(key) | 179 | metadata[key] = self.document.get_property(key) |
| 185 | return metadata | 180 | return metadata |
diff --git a/lib/parser.py b/lib/parser.py index 385dd78..044ef0a 100644 --- a/lib/parser.py +++ b/lib/parser.py | |||
| @@ -6,7 +6,6 @@ import hachoir_core | |||
| 6 | import hachoir_editor | 6 | import hachoir_editor |
| 7 | 7 | ||
| 8 | import os | 8 | import os |
| 9 | import mimetypes | ||
| 10 | 9 | ||
| 11 | import mat | 10 | import mat |
| 12 | 11 | ||
| @@ -14,6 +13,9 @@ NOMETA = ('.bmp', 'html', '.py', '.rdf', '.txt', '.xml') | |||
| 14 | 13 | ||
| 15 | 14 | ||
| 16 | class GenericParser(object): | 15 | class GenericParser(object): |
| 16 | ''' | ||
| 17 | Parent class of all parsers | ||
| 18 | ''' | ||
| 17 | def __init__(self, filename, parser, mime, backup, add2archive): | 19 | def __init__(self, filename, parser, mime, backup, add2archive): |
| 18 | self.filename = '' | 20 | self.filename = '' |
| 19 | self.parser = parser | 21 | self.parser = parser |
| @@ -30,7 +32,6 @@ class GenericParser(object): | |||
| 30 | self.basename = os.path.basename(filename) # only filename | 32 | self.basename = os.path.basename(filename) # only filename |
| 31 | 33 | ||
| 32 | 34 | ||
| 33 | |||
| 34 | def is_clean(self): | 35 | def is_clean(self): |
| 35 | ''' | 36 | ''' |
| 36 | Check if the file is clean from harmful metadatas | 37 | Check if the file is clean from harmful metadatas |
| @@ -68,7 +69,7 @@ class GenericParser(object): | |||
| 68 | 69 | ||
| 69 | def get_meta(self): | 70 | def get_meta(self): |
| 70 | ''' | 71 | ''' |
| 71 | return a dict with all the meta of the file | 72 | Return a dict with all the meta of the file |
| 72 | ''' | 73 | ''' |
| 73 | metadata = {} | 74 | metadata = {} |
| 74 | for field in self.editor: | 75 | for field in self.editor: |
