diff options
| -rw-r--r-- | .travis.yml | 4 | ||||
| -rw-r--r-- | README.md | 3 | ||||
| -rw-r--r-- | data/FORMATS | 6 | ||||
| -rw-r--r-- | libmat/archive.py | 13 | ||||
| -rw-r--r-- | libmat/audio.py | 53 | ||||
| -rw-r--r-- | libmat/exiftool.py | 4 | ||||
| -rw-r--r-- | libmat/hachoir_editor/__init__.py | 8 | ||||
| -rw-r--r-- | libmat/hachoir_editor/field.py | 69 | ||||
| -rw-r--r-- | libmat/hachoir_editor/fieldset.py | 352 | ||||
| -rw-r--r-- | libmat/hachoir_editor/typed_field.py | 268 | ||||
| -rw-r--r-- | libmat/images.py | 52 | ||||
| -rw-r--r-- | libmat/mat.py | 21 | ||||
| -rw-r--r-- | libmat/misc.py | 4 | ||||
| -rw-r--r-- | libmat/mutagenstripper.py | 66 | ||||
| -rw-r--r-- | libmat/office.py | 4 | ||||
| -rw-r--r-- | libmat/parser.py | 78 | ||||
| -rw-r--r-- | libmat/strippers.py | 18 | ||||
| -rwxr-xr-x | mat | 4 | ||||
| -rw-r--r-- | mat.1 | 2 | ||||
| -rwxr-xr-x | setup.py | 6 |
20 files changed, 100 insertions, 935 deletions
diff --git a/.travis.yml b/.travis.yml index 758176a..d30d5e0 100644 --- a/.travis.yml +++ b/.travis.yml | |||
| @@ -16,6 +16,7 @@ addons: | |||
| 16 | - gir1.2-poppler-0.18 | 16 | - gir1.2-poppler-0.18 |
| 17 | - python-pdfrw | 17 | - python-pdfrw |
| 18 | - python-gi-cairo | 18 | - python-gi-cairo |
| 19 | - python-mutagen | ||
| 19 | virtualenv: | 20 | virtualenv: |
| 20 | system_site_packages: true | 21 | system_site_packages: true |
| 21 | 22 | ||
| @@ -25,12 +26,11 @@ install: | |||
| 25 | - pip install --user --upgrade setuptools | 26 | - pip install --user --upgrade setuptools |
| 26 | - pip install --user coveralls | 27 | - pip install --user coveralls |
| 27 | - pip install --user codecov | 28 | - pip install --user codecov |
| 28 | - pip install --user mutagen hachoir_core hachoir_parser | ||
| 29 | - popd | 29 | - popd |
| 30 | - python setup.py install | 30 | - python setup.py install |
| 31 | 31 | ||
| 32 | script: | 32 | script: |
| 33 | - coverage run --source=libmat --omit='*hachoir_editor*' setup.py test | 33 | - coverage run --source=libmat setup.py test |
| 34 | 34 | ||
| 35 | after_success: | 35 | after_success: |
| 36 | - coveralls | 36 | - coveralls |
| @@ -27,7 +27,6 @@ See README.security | |||
| 27 | DEPENDENCIES | 27 | DEPENDENCIES |
| 28 | ============ | 28 | ============ |
| 29 | * python2.7 (at least) | 29 | * python2.7 (at least) |
| 30 | * python-hachoir-core and python-hachoir-parser | ||
| 31 | * python-pdfrw, gir-poppler and python-gi-cairo for full PDF support | 30 | * python-pdfrw, gir-poppler and python-gi-cairo for full PDF support |
| 32 | * python-gi for the GUI | 31 | * python-gi for the GUI |
| 33 | * shred (should be already installed) | 32 | * shred (should be already installed) |
| @@ -35,7 +34,7 @@ DEPENDENCIES | |||
| 35 | OPTIONALS DEPENDENCIES | 34 | OPTIONALS DEPENDENCIES |
| 36 | ====================== | 35 | ====================== |
| 37 | * python-mutagen: for massive audio format support | 36 | * python-mutagen: for massive audio format support |
| 38 | * exiftool: for _massive_ image format support | 37 | * exiftool: for image format support |
| 39 | 38 | ||
| 40 | USAGE | 39 | USAGE |
| 41 | ===== | 40 | ===== |
diff --git a/data/FORMATS b/data/FORMATS index b398be1..6880e25 100644 --- a/data/FORMATS +++ b/data/FORMATS | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | <mimetype>image/png</mimetype> | 5 | <mimetype>image/png</mimetype> |
| 6 | <support>Full</support> | 6 | <support>Full</support> |
| 7 | <metadata>Textual metadata and date</metadata> | 7 | <metadata>Textual metadata and date</metadata> |
| 8 | <method>Removal of harmful fields with hachoir.</method> | 8 | <method>Removal of harmful fields with exiftool.</method> |
| 9 | <remaining>None</remaining> | 9 | <remaining>None</remaining> |
| 10 | </format> | 10 | </format> |
| 11 | 11 | ||
| @@ -15,7 +15,7 @@ | |||
| 15 | <mimetype>image/jpeg</mimetype> | 15 | <mimetype>image/jpeg</mimetype> |
| 16 | <support>Partial</support> | 16 | <support>Partial</support> |
| 17 | <metadata>Comments and exif/photoshop/adobe</metadata> | 17 | <metadata>Comments and exif/photoshop/adobe</metadata> |
| 18 | <method>Removal of harmful fields with hachoir.</method> | 18 | <method>Removal of harmful fields with exiftool.</method> |
| 19 | <remaining>Canon Raw tags</remaining> | 19 | <remaining>Canon Raw tags</remaining> |
| 20 | </format> | 20 | </format> |
| 21 | 21 | ||
| @@ -75,7 +75,7 @@ | |||
| 75 | <mimetype>audio/mpeg</mimetype> | 75 | <mimetype>audio/mpeg</mimetype> |
| 76 | <support>Full</support> | 76 | <support>Full</support> |
| 77 | <metadata>Id3</metadata> | 77 | <metadata>Id3</metadata> |
| 78 | <method>Removal of harmful fields with hachoir</method> | 78 | <method>Removal of harmful fields with exiftool</method> |
| 79 | <remaining>None</remaining> | 79 | <remaining>None</remaining> |
| 80 | </format> | 80 | </format> |
| 81 | 81 | ||
diff --git a/libmat/archive.py b/libmat/archive.py index ad9fdc9..2e14538 100644 --- a/libmat/archive.py +++ b/libmat/archive.py | |||
| @@ -20,9 +20,8 @@ class GenericArchiveStripper(parser.GenericParser): | |||
| 20 | """ Represent a generic archive | 20 | """ Represent a generic archive |
| 21 | """ | 21 | """ |
| 22 | 22 | ||
| 23 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | 23 | def __init__(self, filename, mime, backup, is_writable, **kwargs): |
| 24 | super(GenericArchiveStripper, self).__init__(filename, | 24 | super(GenericArchiveStripper, self).__init__(filename, mime, backup, is_writable, **kwargs) |
| 25 | parser, mime, backup, is_writable, **kwargs) | ||
| 26 | self.compression = '' | 25 | self.compression = '' |
| 27 | self.add2archive = kwargs['add2archive'] | 26 | self.add2archive = kwargs['add2archive'] |
| 28 | self.tempdir = tempfile.mkdtemp() | 27 | self.tempdir = tempfile.mkdtemp() |
| @@ -354,8 +353,8 @@ class GzipStripper(TarStripper): | |||
| 354 | """ Represent a tar.gz archive | 353 | """ Represent a tar.gz archive |
| 355 | """ | 354 | """ |
| 356 | 355 | ||
| 357 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | 356 | def __init__(self, filename, mime, backup, is_writable, **kwargs): |
| 358 | super(GzipStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) | 357 | super(GzipStripper, self).__init__(filename, mime, backup, is_writable, **kwargs) |
| 359 | self.compression = ':gz' | 358 | self.compression = ':gz' |
| 360 | 359 | ||
| 361 | 360 | ||
| @@ -363,6 +362,6 @@ class Bzip2Stripper(TarStripper): | |||
| 363 | """ Represent a tar.bz2 archive | 362 | """ Represent a tar.bz2 archive |
| 364 | """ | 363 | """ |
| 365 | 364 | ||
| 366 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | 365 | def __init__(self, filename, mime, backup, is_writable, **kwargs): |
| 367 | super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) | 366 | super(Bzip2Stripper, self).__init__(filename, mime, backup, is_writable, **kwargs) |
| 368 | self.compression = ':bz2' | 367 | self.compression = ':bz2' |
diff --git a/libmat/audio.py b/libmat/audio.py deleted file mode 100644 index 2747dc1..0000000 --- a/libmat/audio.py +++ /dev/null | |||
| @@ -1,53 +0,0 @@ | |||
| 1 | """ Care about audio fileformat | ||
| 2 | """ | ||
| 3 | |||
| 4 | try: | ||
| 5 | from mutagen.flac import FLAC | ||
| 6 | from mutagen.oggvorbis import OggVorbis | ||
| 7 | except ImportError: | ||
| 8 | pass | ||
| 9 | |||
| 10 | import parser | ||
| 11 | import mutagenstripper | ||
| 12 | |||
| 13 | |||
| 14 | class MpegAudioStripper(parser.GenericParser): | ||
| 15 | """ Represent mpeg audio file (mp3, ...) | ||
| 16 | """ | ||
| 17 | def _should_remove(self, field): | ||
| 18 | return field.name in ("id3v1", "id3v2") | ||
| 19 | |||
| 20 | |||
| 21 | class OggStripper(mutagenstripper.MutagenStripper): | ||
| 22 | """ Represent an ogg vorbis file | ||
| 23 | """ | ||
| 24 | def _create_mfile(self): | ||
| 25 | self.mfile = OggVorbis(self.filename) | ||
| 26 | |||
| 27 | |||
| 28 | class FlacStripper(mutagenstripper.MutagenStripper): | ||
| 29 | """ Represent a Flac audio file | ||
| 30 | """ | ||
| 31 | def _create_mfile(self): | ||
| 32 | self.mfile = FLAC(self.filename) | ||
| 33 | |||
| 34 | def remove_all(self): | ||
| 35 | """ Remove the "metadata" block from the file | ||
| 36 | """ | ||
| 37 | super(FlacStripper, self).remove_all() | ||
| 38 | self.mfile.clear_pictures() | ||
| 39 | self.mfile.save() | ||
| 40 | return True | ||
| 41 | |||
| 42 | def is_clean(self): | ||
| 43 | """ Check if the "metadata" block is present in the file | ||
| 44 | """ | ||
| 45 | return super(FlacStripper, self).is_clean() and not self.mfile.pictures | ||
| 46 | |||
| 47 | def get_meta(self): | ||
| 48 | """ Return the content of the metadata block if present | ||
| 49 | """ | ||
| 50 | metadata = super(FlacStripper, self).get_meta() | ||
| 51 | if self.mfile.pictures: | ||
| 52 | metadata['picture:'] = 'yes' | ||
| 53 | return metadata | ||
diff --git a/libmat/exiftool.py b/libmat/exiftool.py index 07ef06b..ef81ed3 100644 --- a/libmat/exiftool.py +++ b/libmat/exiftool.py | |||
| @@ -9,8 +9,8 @@ class ExiftoolStripper(parser.GenericParser): | |||
| 9 | """ A generic stripper class using exiftool as backend | 9 | """ A generic stripper class using exiftool as backend |
| 10 | """ | 10 | """ |
| 11 | 11 | ||
| 12 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | 12 | def __init__(self, filename, mime, backup, is_writable, **kwargs): |
| 13 | super(ExiftoolStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) | 13 | super(ExiftoolStripper, self).__init__(filename, mime, backup, is_writable, **kwargs) |
| 14 | self.allowed = {'ExifTool Version Number', 'File Name', 'Directory', 'File Size', 'File Modification Date/Time', | 14 | self.allowed = {'ExifTool Version Number', 'File Name', 'Directory', 'File Size', 'File Modification Date/Time', |
| 15 | 'File Access Date/Time', 'File Permissions', 'File Type', 'File Type Extension', 'MIME Type', | 15 | 'File Access Date/Time', 'File Permissions', 'File Type', 'File Type Extension', 'MIME Type', |
| 16 | 'Image Width', 'Image Height', 'Image Size', 'File Inode Change Date/Time', 'Megapixels'} | 16 | 'Image Width', 'Image Height', 'Image Size', 'File Inode Change Date/Time', 'Megapixels'} |
diff --git a/libmat/hachoir_editor/__init__.py b/libmat/hachoir_editor/__init__.py deleted file mode 100644 index 1835676..0000000 --- a/libmat/hachoir_editor/__init__.py +++ /dev/null | |||
| @@ -1,8 +0,0 @@ | |||
| 1 | from field import ( | ||
| 2 | EditorError, FakeField) | ||
| 3 | from typed_field import ( | ||
| 4 | EditableField, EditableBits, EditableBytes, | ||
| 5 | EditableInteger, EditableString, | ||
| 6 | createEditableField) | ||
| 7 | from fieldset import EditableFieldSet, NewFieldSet, createEditor | ||
| 8 | |||
diff --git a/libmat/hachoir_editor/field.py b/libmat/hachoir_editor/field.py deleted file mode 100644 index 6b1efe3..0000000 --- a/libmat/hachoir_editor/field.py +++ /dev/null | |||
| @@ -1,69 +0,0 @@ | |||
| 1 | from hachoir_core.error import HachoirError | ||
| 2 | from hachoir_core.field import joinPath, MissingField | ||
| 3 | |||
| 4 | class EditorError(HachoirError): | ||
| 5 | pass | ||
| 6 | |||
| 7 | class FakeField(object): | ||
| 8 | """ | ||
| 9 | This class have API looks similar to Field API, but objects don't contain | ||
| 10 | any value: all values are _computed_ by parent methods. | ||
| 11 | |||
| 12 | Example: FakeField(editor, "abc").size calls editor._getFieldSize("abc"). | ||
| 13 | """ | ||
| 14 | is_field_set = False | ||
| 15 | |||
| 16 | def __init__(self, parent, name): | ||
| 17 | self._parent = parent | ||
| 18 | self._name = name | ||
| 19 | |||
| 20 | def _getPath(self): | ||
| 21 | return joinPath(self._parent.path, self._name) | ||
| 22 | path = property(_getPath) | ||
| 23 | |||
| 24 | def _getName(self): | ||
| 25 | return self._name | ||
| 26 | name = property(_getName) | ||
| 27 | |||
| 28 | def _getAddress(self): | ||
| 29 | return self._parent._getFieldAddress(self._name) | ||
| 30 | address = property(_getAddress) | ||
| 31 | |||
| 32 | def _getSize(self): | ||
| 33 | return self._parent.input[self._name].size | ||
| 34 | size = property(_getSize) | ||
| 35 | |||
| 36 | def _getValue(self): | ||
| 37 | return self._parent.input[self._name].value | ||
| 38 | value = property(_getValue) | ||
| 39 | |||
| 40 | def createDisplay(self): | ||
| 41 | # TODO: Returns new value if field is altered | ||
| 42 | return self._parent.input[self._name].display | ||
| 43 | display = property(createDisplay) | ||
| 44 | |||
| 45 | def _getParent(self): | ||
| 46 | return self._parent | ||
| 47 | parent = property(_getParent) | ||
| 48 | |||
| 49 | def hasValue(self): | ||
| 50 | return self._parent.input[self._name].hasValue() | ||
| 51 | |||
| 52 | def __getitem__(self, key): | ||
| 53 | # TODO: Implement this function! | ||
| 54 | raise MissingField(self, key) | ||
| 55 | |||
| 56 | def _isAltered(self): | ||
| 57 | return False | ||
| 58 | is_altered = property(_isAltered) | ||
| 59 | |||
| 60 | def writeInto(self, output): | ||
| 61 | size = self.size | ||
| 62 | addr = self._parent._getFieldInputAddress(self._name) | ||
| 63 | input = self._parent.input | ||
| 64 | stream = input.stream | ||
| 65 | if size % 8: | ||
| 66 | output.copyBitsFrom(stream, addr, size, input.endian) | ||
| 67 | else: | ||
| 68 | output.copyBytesFrom(stream, addr, size//8) | ||
| 69 | |||
diff --git a/libmat/hachoir_editor/fieldset.py b/libmat/hachoir_editor/fieldset.py deleted file mode 100644 index b7c9b07..0000000 --- a/libmat/hachoir_editor/fieldset.py +++ /dev/null | |||
| @@ -1,352 +0,0 @@ | |||
| 1 | from hachoir_core.dict import UniqKeyError | ||
| 2 | from hachoir_core.field import MissingField, Float32, Float64, FakeArray | ||
| 3 | from hachoir_core.compatibility import any | ||
| 4 | from hachoir_core.i18n import _ | ||
| 5 | from typed_field import createEditableField | ||
| 6 | from field import EditorError | ||
| 7 | from collections import deque # Python 2.4 | ||
| 8 | import weakref # Python 2.1 | ||
| 9 | import struct | ||
| 10 | |||
| 11 | class EditableFieldSet(object): | ||
| 12 | MAX_SIZE = (1 << 40) # Arbitrary limit to catch errors | ||
| 13 | is_field_set = True | ||
| 14 | |||
| 15 | def __init__(self, parent, fieldset): | ||
| 16 | self._parent = parent | ||
| 17 | self.input = fieldset # original FieldSet | ||
| 18 | self._fields = {} # cache of editable fields | ||
| 19 | self._deleted = set() # Names of deleted fields | ||
| 20 | self._inserted = {} # Inserted field (name => list of field, | ||
| 21 | # where name is the name after) | ||
| 22 | |||
| 23 | def array(self, key): | ||
| 24 | # FIXME: Use cache? | ||
| 25 | return FakeArray(self, key) | ||
| 26 | |||
| 27 | def _getParent(self): | ||
| 28 | return self._parent | ||
| 29 | parent = property(_getParent) | ||
| 30 | |||
| 31 | def _isAltered(self): | ||
| 32 | if self._inserted: | ||
| 33 | return True | ||
| 34 | if self._deleted: | ||
| 35 | return True | ||
| 36 | return any(field.is_altered for field in self._fields.itervalues()) | ||
| 37 | is_altered = property(_isAltered) | ||
| 38 | |||
| 39 | def reset(self): | ||
| 40 | """ | ||
| 41 | Reset the field set and the input field set. | ||
| 42 | """ | ||
| 43 | for key, field in self._fields.iteritems(): | ||
| 44 | if not field.is_altered: | ||
| 45 | del self._fields[key] | ||
| 46 | self.input.reset() | ||
| 47 | |||
| 48 | def __len__(self): | ||
| 49 | return len(self.input) \ | ||
| 50 | - len(self._deleted) \ | ||
| 51 | + sum( len(new) for new in self._inserted.itervalues() ) | ||
| 52 | |||
| 53 | def __iter__(self): | ||
| 54 | for field in self.input: | ||
| 55 | name = field.name | ||
| 56 | if name in self._inserted: | ||
| 57 | for newfield in self._inserted[name]: | ||
| 58 | yield weakref.proxy(newfield) | ||
| 59 | if name not in self._deleted: | ||
| 60 | yield self[name] | ||
| 61 | if None in self._inserted: | ||
| 62 | for newfield in self._inserted[None]: | ||
| 63 | yield weakref.proxy(newfield) | ||
| 64 | |||
| 65 | def insertBefore(self, name, *new_fields): | ||
| 66 | self._insert(name, new_fields, False) | ||
| 67 | |||
| 68 | def insertAfter(self, name, *new_fields): | ||
| 69 | self._insert(name, new_fields, True) | ||
| 70 | |||
| 71 | def insert(self, *new_fields): | ||
| 72 | self._insert(None, new_fields, True) | ||
| 73 | |||
| 74 | def _insert(self, key, new_fields, next): | ||
| 75 | """ | ||
| 76 | key is the name of the field before which new_fields | ||
| 77 | will be inserted. If next is True, the fields will be inserted | ||
| 78 | _after_ this field. | ||
| 79 | """ | ||
| 80 | # Set unique field name | ||
| 81 | for field in new_fields: | ||
| 82 | if field._name.endswith("[]"): | ||
| 83 | self.input.setUniqueFieldName(field) | ||
| 84 | |||
| 85 | # Check that there is no duplicate in inserted fields | ||
| 86 | new_names = list(field.name for field in new_fields) | ||
| 87 | names_set = set(new_names) | ||
| 88 | if len(names_set) != len(new_fields): | ||
| 89 | duplicates = (name for name in names_set if 1 < new_names.count(name)) | ||
| 90 | raise UniqKeyError(_("Duplicates in inserted fields: %s") % ", ".join(duplicates)) | ||
| 91 | |||
| 92 | # Check that field names are not in input | ||
| 93 | if self.input: # Write special version for NewFieldSet? | ||
| 94 | for name in new_names: | ||
| 95 | if name in self.input and name not in self._deleted: | ||
| 96 | raise UniqKeyError(_("Field name '%s' already exists") % name) | ||
| 97 | |||
| 98 | # Check that field names are not in inserted fields | ||
| 99 | for fields in self._inserted.itervalues(): | ||
| 100 | for field in fields: | ||
| 101 | if field.name in new_names: | ||
| 102 | raise UniqKeyError(_("Field name '%s' already exists") % field.name) | ||
| 103 | |||
| 104 | # Input have already inserted field? | ||
| 105 | if key in self._inserted: | ||
| 106 | if next: | ||
| 107 | self._inserted[key].extend( reversed(new_fields) ) | ||
| 108 | else: | ||
| 109 | self._inserted[key].extendleft( reversed(new_fields) ) | ||
| 110 | return | ||
| 111 | |||
| 112 | # Whould like to insert in inserted fields? | ||
| 113 | if key: | ||
| 114 | for fields in self._inserted.itervalues(): | ||
| 115 | names = [item.name for item in fields] | ||
| 116 | try: | ||
| 117 | pos = names.index(key) | ||
| 118 | except ValueError: | ||
| 119 | continue | ||
| 120 | if 0 <= pos: | ||
| 121 | if next: | ||
| 122 | pos += 1 | ||
| 123 | fields.rotate(-pos) | ||
| 124 | fields.extendleft( reversed(new_fields) ) | ||
| 125 | fields.rotate(pos) | ||
| 126 | return | ||
| 127 | |||
| 128 | # Get next field. Use None if we are at the end. | ||
| 129 | if next: | ||
| 130 | index = self.input[key].index + 1 | ||
| 131 | try: | ||
| 132 | key = self.input[index].name | ||
| 133 | except IndexError: | ||
| 134 | key = None | ||
| 135 | |||
| 136 | # Check that field names are not in input | ||
| 137 | if key not in self.input: | ||
| 138 | raise MissingField(self, key) | ||
| 139 | |||
| 140 | # Insert in original input | ||
| 141 | self._inserted[key]= deque(new_fields) | ||
| 142 | |||
| 143 | def _getDescription(self): | ||
| 144 | return self.input.description | ||
| 145 | description = property(_getDescription) | ||
| 146 | |||
| 147 | def _getStream(self): | ||
| 148 | # FIXME: This property is maybe a bad idea since address may be differents | ||
| 149 | return self.input.stream | ||
| 150 | stream = property(_getStream) | ||
| 151 | |||
| 152 | def _getName(self): | ||
| 153 | return self.input.name | ||
| 154 | name = property(_getName) | ||
| 155 | |||
| 156 | def _getEndian(self): | ||
| 157 | return self.input.endian | ||
| 158 | endian = property(_getEndian) | ||
| 159 | |||
| 160 | def _getAddress(self): | ||
| 161 | if self._parent: | ||
| 162 | return self._parent._getFieldAddress(self.name) | ||
| 163 | else: | ||
| 164 | return 0 | ||
| 165 | address = property(_getAddress) | ||
| 166 | |||
| 167 | def _getAbsoluteAddress(self): | ||
| 168 | address = self.address | ||
| 169 | current = self._parent | ||
| 170 | while current: | ||
| 171 | address += current.address | ||
| 172 | current = current._parent | ||
| 173 | return address | ||
| 174 | absolute_address = property(_getAbsoluteAddress) | ||
| 175 | |||
| 176 | def hasValue(self): | ||
| 177 | return False | ||
| 178 | # return self._parent.input[self.name].hasValue() | ||
| 179 | |||
| 180 | def _getSize(self): | ||
| 181 | if self.is_altered: | ||
| 182 | return sum(field.size for field in self) | ||
| 183 | else: | ||
| 184 | return self.input.size | ||
| 185 | size = property(_getSize) | ||
| 186 | |||
| 187 | def _getPath(self): | ||
| 188 | return self.input.path | ||
| 189 | path = property(_getPath) | ||
| 190 | |||
| 191 | def _getOriginalField(self, name): | ||
| 192 | assert name in self.input | ||
| 193 | return self.input[name] | ||
| 194 | |||
| 195 | def _getFieldInputAddress(self, name): | ||
| 196 | """ | ||
| 197 | Absolute address of a field from the input field set. | ||
| 198 | """ | ||
| 199 | assert name in self.input | ||
| 200 | return self.input[name].absolute_address | ||
| 201 | |||
| 202 | def _getFieldAddress(self, name): | ||
| 203 | """ | ||
| 204 | Compute relative address of a field. The operation takes care of | ||
| 205 | deleted and resized fields. | ||
| 206 | """ | ||
| 207 | #assert name not in self._deleted | ||
| 208 | addr = 0 | ||
| 209 | for field in self: | ||
| 210 | if field.name == name: | ||
| 211 | return addr | ||
| 212 | addr += field.size | ||
| 213 | raise MissingField(self, name) | ||
| 214 | |||
| 215 | def _getItemByPath(self, path): | ||
| 216 | if not path[0]: | ||
| 217 | path = path[1:] | ||
| 218 | field = self | ||
| 219 | for name in path: | ||
| 220 | field = field[name] | ||
| 221 | return field | ||
| 222 | |||
| 223 | def __contains__(self, name): | ||
| 224 | try: | ||
| 225 | field = self[name] | ||
| 226 | return (field is not None) | ||
| 227 | except MissingField: | ||
| 228 | return False | ||
| 229 | |||
| 230 | def __getitem__(self, key): | ||
| 231 | """ | ||
| 232 | Create a weak reference to an editable field (EditableField) for the | ||
| 233 | field with specified name. If the field is removed later, using the | ||
| 234 | editable field will raise a weakref.ReferenceError exception. | ||
| 235 | |||
| 236 | May raise a MissingField error if the field doesn't exist in original | ||
| 237 | field set or it has been deleted. | ||
| 238 | """ | ||
| 239 | if "/" in key: | ||
| 240 | return self._getItemByPath(key.split("/")) | ||
| 241 | if isinstance(key, (int, long)): | ||
| 242 | raise EditorError("Integer index are not supported") | ||
| 243 | |||
| 244 | if (key in self._deleted) or (key not in self.input): | ||
| 245 | raise MissingField(self, key) | ||
| 246 | if key not in self._fields: | ||
| 247 | field = self.input[key] | ||
| 248 | if field.is_field_set: | ||
| 249 | self._fields[key] = createEditableFieldSet(self, field) | ||
| 250 | else: | ||
| 251 | self._fields[key] = createEditableField(self, field) | ||
| 252 | return weakref.proxy(self._fields[key]) | ||
| 253 | |||
| 254 | def __delitem__(self, name): | ||
| 255 | """ | ||
| 256 | Remove a field from the field set. May raise an MissingField exception | ||
| 257 | if the field has already been deleted. | ||
| 258 | """ | ||
| 259 | parts = name.partition('/') | ||
| 260 | if parts[2]: | ||
| 261 | fieldset = self[parts[0]] | ||
| 262 | del fieldset[parts[2]] | ||
| 263 | return | ||
| 264 | if name in self._deleted: | ||
| 265 | raise MissingField(self, name) | ||
| 266 | self._deleted.add(name) | ||
| 267 | if name in self._fields: | ||
| 268 | del self._fields[name] | ||
| 269 | |||
| 270 | def writeInto(self, output): | ||
| 271 | """ | ||
| 272 | Write the content if this field set into the output stream | ||
| 273 | (OutputStream). | ||
| 274 | """ | ||
| 275 | if not self.is_altered: | ||
| 276 | # Not altered: just copy bits/bytes | ||
| 277 | input = self.input | ||
| 278 | if input.size % 8: | ||
| 279 | output.copyBitsFrom(input.stream, | ||
| 280 | input.absolute_address, input.size, input.endian) | ||
| 281 | else: | ||
| 282 | output.copyBytesFrom(input.stream, | ||
| 283 | input.absolute_address, input.size//8) | ||
| 284 | else: | ||
| 285 | # Altered: call writeInto() method of each field | ||
| 286 | realaddr = 0 | ||
| 287 | for field in self: | ||
| 288 | field.writeInto(output) | ||
| 289 | realaddr += field.size | ||
| 290 | |||
| 291 | def _getValue(self): | ||
| 292 | raise EditorError('Field set "%s" has no value' % self.path) | ||
| 293 | def _setValue(self, value): | ||
| 294 | raise EditorError('Field set "%s" value is read only' % self.path) | ||
| 295 | value = property(_getValue, _setValue, "Value of field") | ||
| 296 | |||
| 297 | class EditableFloat(EditableFieldSet): | ||
| 298 | _value = None | ||
| 299 | |||
| 300 | def _isAltered(self): | ||
| 301 | return (self._value is not None) | ||
| 302 | is_altered = property(_isAltered) | ||
| 303 | |||
| 304 | def writeInto(self, output): | ||
| 305 | if self._value is not None: | ||
| 306 | self._write(output) | ||
| 307 | else: | ||
| 308 | EditableFieldSet.writeInto(self, output) | ||
| 309 | |||
| 310 | def _write(self, output): | ||
| 311 | format = self.input.struct_format | ||
| 312 | raw = struct.pack(format, self._value) | ||
| 313 | output.writeBytes(raw) | ||
| 314 | |||
| 315 | def _setValue(self, value): | ||
| 316 | self.parent._is_altered = True | ||
| 317 | self._value = value | ||
| 318 | value = property(EditableFieldSet._getValue, _setValue) | ||
| 319 | |||
| 320 | def createEditableFieldSet(parent, field): | ||
| 321 | cls = field.__class__ | ||
| 322 | # FIXME: Support Float80 | ||
| 323 | if cls in (Float32, Float64): | ||
| 324 | return EditableFloat(parent, field) | ||
| 325 | else: | ||
| 326 | return EditableFieldSet(parent, field) | ||
| 327 | |||
| 328 | class NewFieldSet(EditableFieldSet): | ||
| 329 | def __init__(self, parent, name): | ||
| 330 | EditableFieldSet.__init__(self, parent, None) | ||
| 331 | self._name = name | ||
| 332 | self._endian = parent.endian | ||
| 333 | |||
| 334 | def __iter__(self): | ||
| 335 | if None in self._inserted: | ||
| 336 | return iter(self._inserted[None]) | ||
| 337 | else: | ||
| 338 | raise StopIteration() | ||
| 339 | |||
| 340 | def _getName(self): | ||
| 341 | return self._name | ||
| 342 | name = property(_getName) | ||
| 343 | |||
| 344 | def _getEndian(self): | ||
| 345 | return self._endian | ||
| 346 | endian = property(_getEndian) | ||
| 347 | |||
| 348 | is_altered = property(lambda self: True) | ||
| 349 | |||
| 350 | def createEditor(fieldset): | ||
| 351 | return EditableFieldSet(None, fieldset) | ||
| 352 | |||
diff --git a/libmat/hachoir_editor/typed_field.py b/libmat/hachoir_editor/typed_field.py deleted file mode 100644 index 606d39b..0000000 --- a/libmat/hachoir_editor/typed_field.py +++ /dev/null | |||
| @@ -1,268 +0,0 @@ | |||
| 1 | from hachoir_core.field import ( | ||
| 2 | RawBits, Bit, Bits, PaddingBits, | ||
| 3 | RawBytes, Bytes, PaddingBytes, | ||
| 4 | GenericString, Character, | ||
| 5 | isInteger, isString) | ||
| 6 | from field import FakeField | ||
| 7 | |||
| 8 | |||
| 9 | class EditableField(FakeField): | ||
| 10 | """ | ||
| 11 | Pure virtual class used to write editable field class. | ||
| 12 | """ | ||
| 13 | |||
| 14 | _is_altered = False | ||
| 15 | |||
| 16 | def __init__(self, parent, name, value=None): | ||
| 17 | FakeField.__init__(self, parent, name) | ||
| 18 | self._value = value | ||
| 19 | |||
| 20 | def _isAltered(self): | ||
| 21 | return self._is_altered | ||
| 22 | |||
| 23 | is_altered = property(_isAltered) | ||
| 24 | |||
| 25 | def hasValue(self): | ||
| 26 | return True | ||
| 27 | |||
| 28 | def _computeSize(self): | ||
| 29 | raise NotImplementedError() | ||
| 30 | |||
| 31 | def _getValue(self): | ||
| 32 | return self._value | ||
| 33 | |||
| 34 | def _setValue(self, value): | ||
| 35 | self._value = value | ||
| 36 | |||
| 37 | def _propGetValue(self): | ||
| 38 | if self._value is not None: | ||
| 39 | return self._getValue() | ||
| 40 | else: | ||
| 41 | return FakeField._getValue(self) | ||
| 42 | |||
| 43 | def _propSetValue(self, value): | ||
| 44 | self._setValue(value) | ||
| 45 | self._is_altered = True | ||
| 46 | |||
| 47 | value = property(_propGetValue, _propSetValue) | ||
| 48 | |||
| 49 | def _getSize(self): | ||
| 50 | if self._value is not None: | ||
| 51 | return self._computeSize() | ||
| 52 | else: | ||
| 53 | return FakeField._getSize(self) | ||
| 54 | |||
| 55 | size = property(_getSize) | ||
| 56 | |||
| 57 | def _write(self, output): | ||
| 58 | raise NotImplementedError() | ||
| 59 | |||
| 60 | def writeInto(self, output): | ||
| 61 | if self._is_altered: | ||
| 62 | self._write(output) | ||
| 63 | else: | ||
| 64 | return FakeField.writeInto(self, output) | ||
| 65 | |||
| 66 | |||
| 67 | class EditableFixedField(EditableField): | ||
| 68 | """ | ||
| 69 | Editable field with fixed size. | ||
| 70 | """ | ||
| 71 | |||
| 72 | def __init__(self, parent, name, value=None, size=None): | ||
| 73 | EditableField.__init__(self, parent, name, value) | ||
| 74 | if size is not None: | ||
| 75 | self._size = size | ||
| 76 | else: | ||
| 77 | self._size = self._parent._getOriginalField(self._name).size | ||
| 78 | |||
| 79 | def _getSize(self): | ||
| 80 | return self._size | ||
| 81 | |||
| 82 | size = property(_getSize) | ||
| 83 | |||
| 84 | |||
| 85 | class EditableBits(EditableFixedField): | ||
| 86 | def __init__(self, parent, name, *args): | ||
| 87 | if args: | ||
| 88 | if len(args) != 2: | ||
| 89 | raise TypeError( | ||
| 90 | "Wrong argument count, EditableBits constructor prototype is: " | ||
| 91 | "(parent, name, [size, value])") | ||
| 92 | size = args[0] | ||
| 93 | value = args[1] | ||
| 94 | assert isinstance(value, (int, long)) | ||
| 95 | else: | ||
| 96 | size = None | ||
| 97 | value = None | ||
| 98 | EditableFixedField.__init__(self, parent, name, value, size) | ||
| 99 | if args: | ||
| 100 | self._setValue(args[1]) | ||
| 101 | self._is_altered = True | ||
| 102 | |||
| 103 | def _setValue(self, value): | ||
| 104 | if not (0 <= value < (1 << self._size)): | ||
| 105 | raise ValueError("Invalid value, must be in range %s..%s" | ||
| 106 | % (0, (1 << self._size) - 1)) | ||
| 107 | self._value = value | ||
| 108 | |||
| 109 | def _write(self, output): | ||
| 110 | output.writeBits(self._size, self._value, self._parent.endian) | ||
| 111 | |||
| 112 | |||
| 113 | class EditableBytes(EditableField): | ||
| 114 | def _setValue(self, value): | ||
| 115 | if not value: raise ValueError( | ||
| 116 | "Unable to set empty string to a EditableBytes field") | ||
| 117 | self._value = value | ||
| 118 | |||
| 119 | def _computeSize(self): | ||
| 120 | return len(self._value) * 8 | ||
| 121 | |||
| 122 | def _write(self, output): | ||
| 123 | output.writeBytes(self._value) | ||
| 124 | |||
| 125 | |||
| 126 | class EditableString(EditableField): | ||
| 127 | MAX_SIZE = { | ||
| 128 | "Pascal8": (1 << 8) - 1, | ||
| 129 | "Pascal16": (1 << 16) - 1, | ||
| 130 | "Pascal32": (1 << 32) - 1, | ||
| 131 | } | ||
| 132 | |||
| 133 | def __init__(self, parent, name, *args, **kw): | ||
| 134 | if len(args) == 2: | ||
| 135 | value = args[1] | ||
| 136 | assert isinstance(value, str) # TODO: support Unicode | ||
| 137 | elif not args: | ||
| 138 | value = None | ||
| 139 | else: | ||
| 140 | raise TypeError( | ||
| 141 | "Wrong argument count, EditableString constructor prototype is:" | ||
| 142 | "(parent, name, [format, value])") | ||
| 143 | EditableField.__init__(self, parent, name, value) | ||
| 144 | if len(args) == 2: | ||
| 145 | self._charset = kw.get('charset', None) | ||
| 146 | self._format = args[0] | ||
| 147 | if self._format in GenericString.PASCAL_FORMATS: | ||
| 148 | self._prefix_size = GenericString.PASCAL_FORMATS[self._format] | ||
| 149 | else: | ||
| 150 | self._prefix_size = 0 | ||
| 151 | self._suffix_str = GenericString.staticSuffixStr( | ||
| 152 | self._format, self._charset, self._parent.endian) | ||
| 153 | self._is_altered = True | ||
| 154 | else: | ||
| 155 | orig = self._parent._getOriginalField(name) | ||
| 156 | self._charset = orig.charset | ||
| 157 | self._format = orig.format | ||
| 158 | self._prefix_size = orig.content_offset | ||
| 159 | self._suffix_str = orig.suffix_str | ||
| 160 | |||
| 161 | def _setValue(self, value): | ||
| 162 | size = len(value) | ||
| 163 | if self._format in self.MAX_SIZE and self.MAX_SIZE[self._format] < size: | ||
| 164 | raise ValueError("String is too big") | ||
| 165 | self._value = value | ||
| 166 | |||
| 167 | def _computeSize(self): | ||
| 168 | return (self._prefix_size + len(self._value) + len(self._suffix_str)) * 8 | ||
| 169 | |||
| 170 | def _write(self, output): | ||
| 171 | if self._format in GenericString.SUFFIX_FORMAT: | ||
| 172 | output.writeBytes(self._value) | ||
| 173 | output.writeBytes(self._suffix_str) | ||
| 174 | elif self._format == "fixed": | ||
| 175 | output.writeBytes(self._value) | ||
| 176 | else: | ||
| 177 | assert self._format in GenericString.PASCAL_FORMATS | ||
| 178 | size = GenericString.PASCAL_FORMATS[self._format] | ||
| 179 | output.writeInteger(len(self._value), False, size, self._parent.endian) | ||
| 180 | output.writeBytes(self._value) | ||
| 181 | |||
| 182 | |||
| 183 | class EditableCharacter(EditableFixedField): | ||
| 184 | def __init__(self, parent, name, *args): | ||
| 185 | if args: | ||
| 186 | if len(args) != 3: | ||
| 187 | raise TypeError( | ||
| 188 | "Wrong argument count, EditableCharacter " | ||
| 189 | "constructor prototype is: (parent, name, [value])") | ||
| 190 | value = args[0] | ||
| 191 | if not isinstance(value, str) or len(value) != 1: | ||
| 192 | raise TypeError("EditableCharacter needs a character") | ||
| 193 | else: | ||
| 194 | value = None | ||
| 195 | EditableFixedField.__init__(self, parent, name, value, 8) | ||
| 196 | if args: | ||
| 197 | self._is_altered = True | ||
| 198 | |||
| 199 | def _setValue(self, value): | ||
| 200 | if not isinstance(value, str) or len(value) != 1: | ||
| 201 | raise TypeError("EditableCharacter needs a character") | ||
| 202 | self._value = value | ||
| 203 | |||
| 204 | def _write(self, output): | ||
| 205 | output.writeBytes(self._value) | ||
| 206 | |||
| 207 | |||
| 208 | class EditableInteger(EditableFixedField): | ||
| 209 | VALID_VALUE_SIGNED = { | ||
| 210 | 8: (-(1 << 8), (1 << 8) - 1), | ||
| 211 | 16: (-(1 << 15), (1 << 15) - 1), | ||
| 212 | 32: (-(1 << 31), (1 << 31) - 1), | ||
| 213 | } | ||
| 214 | VALID_VALUE_UNSIGNED = { | ||
| 215 | 8: (0, (1 << 8) - 1), | ||
| 216 | 16: (0, (1 << 16) - 1), | ||
| 217 | 32: (0, (1 << 32) - 1) | ||
| 218 | } | ||
| 219 | |||
| 220 | def __init__(self, parent, name, *args): | ||
| 221 | if args: | ||
| 222 | if len(args) != 3: | ||
| 223 | raise TypeError( | ||
| 224 | "Wrong argument count, EditableInteger constructor prototype is: " | ||
| 225 | "(parent, name, [signed, size, value])") | ||
| 226 | size = args[1] | ||
| 227 | value = args[2] | ||
| 228 | assert isinstance(value, (int, long)) | ||
| 229 | else: | ||
| 230 | size = None | ||
| 231 | value = None | ||
| 232 | EditableFixedField.__init__(self, parent, name, value, size) | ||
| 233 | if args: | ||
| 234 | self._signed = args[0] | ||
| 235 | self._is_altered = True | ||
| 236 | else: | ||
| 237 | self._signed = self._parent._getOriginalField(self._name).signed | ||
| 238 | |||
| 239 | def _setValue(self, value): | ||
| 240 | if self._signed: | ||
| 241 | valid = self.VALID_VALUE_SIGNED | ||
| 242 | else: | ||
| 243 | valid = self.VALID_VALUE_UNSIGNED | ||
| 244 | minval, maxval = valid[self._size] | ||
| 245 | if not (minval <= value <= maxval): | ||
| 246 | raise ValueError("Invalid value, must be in range %s..%s" | ||
| 247 | % (minval, maxval)) | ||
| 248 | self._value = value | ||
| 249 | |||
| 250 | def _write(self, output): | ||
| 251 | output.writeInteger( | ||
| 252 | self.value, self._signed, self._size // 8, self._parent.endian) | ||
| 253 | |||
| 254 | |||
| 255 | def createEditableField(fieldset, field): | ||
| 256 | if isInteger(field): | ||
| 257 | cls = EditableInteger | ||
| 258 | elif isString(field): | ||
| 259 | cls = EditableString | ||
| 260 | elif field.__class__ in (RawBytes, Bytes, PaddingBytes): | ||
| 261 | cls = EditableBytes | ||
| 262 | elif field.__class__ in (RawBits, Bits, Bit, PaddingBits): | ||
| 263 | cls = EditableBits | ||
| 264 | elif field.__class__ == Character: | ||
| 265 | cls = EditableCharacter | ||
| 266 | else: | ||
| 267 | cls = FakeField | ||
| 268 | return cls(fieldset, field.name) | ||
diff --git a/libmat/images.py b/libmat/images.py deleted file mode 100644 index 813b0fd..0000000 --- a/libmat/images.py +++ /dev/null | |||
| @@ -1,52 +0,0 @@ | |||
| 1 | """ Takes care about pictures formats | ||
| 2 | |||
| 3 | References: | ||
| 4 | - JFIF: http://www.ecma-international.org/publications/techreports/E-TR-098.htm | ||
| 5 | - PNG: http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html | ||
| 6 | - PNG: http://www.w3.org/TR/PNG-Chunks.html | ||
| 7 | """ | ||
| 8 | |||
| 9 | import parser | ||
| 10 | |||
| 11 | |||
| 12 | class JpegStripper(parser.GenericParser): | ||
| 13 | """ Represents a jpeg file. | ||
| 14 | Custom Huffman and Quantization tables | ||
| 15 | are stripped: they may leak | ||
| 16 | some info, and the quality loss is minor. | ||
| 17 | """ | ||
| 18 | def _should_remove(self, field): | ||
| 19 | """ Return True if the field is compromising | ||
| 20 | """ | ||
| 21 | field_list = frozenset([ | ||
| 22 | 'start_image', # start of the image | ||
| 23 | 'app0', # JFIF data | ||
| 24 | 'start_frame', # specify width, height, number of components | ||
| 25 | 'start_scan', # specify which slice of data the top-to-bottom scan contains | ||
| 26 | 'data', # actual data | ||
| 27 | 'end_image']) # end of the image | ||
| 28 | if field.name in field_list: | ||
| 29 | return False | ||
| 30 | elif field.name.startswith('quantization['): # custom Quant. tables | ||
| 31 | return False | ||
| 32 | elif field.name.startswith('huffman['): # custom Huffman tables | ||
| 33 | return False | ||
| 34 | return True | ||
| 35 | |||
| 36 | |||
| 37 | class PngStripper(parser.GenericParser): | ||
| 38 | """ Represents a png file | ||
| 39 | """ | ||
| 40 | def _should_remove(self, field): | ||
| 41 | """ Return True if the field is compromising | ||
| 42 | """ | ||
| 43 | field_list = frozenset([ | ||
| 44 | 'id', | ||
| 45 | 'header', # PNG header | ||
| 46 | 'physical', # the intended pixel size or aspect ratio | ||
| 47 | 'end']) # end of the image | ||
| 48 | if field.name in field_list: | ||
| 49 | return False | ||
| 50 | elif field.name.startswith('data['): # data | ||
| 51 | return False | ||
| 52 | return True | ||
diff --git a/libmat/mat.py b/libmat/mat.py index 42357d6..2634cc3 100644 --- a/libmat/mat.py +++ b/libmat/mat.py | |||
| @@ -10,9 +10,6 @@ import platform | |||
| 10 | import subprocess | 10 | import subprocess |
| 11 | import xml.sax | 11 | import xml.sax |
| 12 | 12 | ||
| 13 | import hachoir_core.cmd_line | ||
| 14 | import hachoir_parser | ||
| 15 | |||
| 16 | import libmat.exceptions | 13 | import libmat.exceptions |
| 17 | 14 | ||
| 18 | __version__ = '0.5.4' | 15 | __version__ = '0.5.4' |
| @@ -20,12 +17,10 @@ __author__ = 'jvoisin' | |||
| 20 | 17 | ||
| 21 | # Silence | 18 | # Silence |
| 22 | LOGGING_LEVEL = logging.CRITICAL | 19 | LOGGING_LEVEL = logging.CRITICAL |
| 23 | hachoir_core.config.quiet = True | ||
| 24 | fname = '' | 20 | fname = '' |
| 25 | 21 | ||
| 26 | # Verbose | 22 | # Verbose |
| 27 | # LOGGING_LEVEL = logging.DEBUG | 23 | # LOGGING_LEVEL = logging.DEBUG |
| 28 | # hachoir_core.config.quiet = False | ||
| 29 | # logname = 'report.log' | 24 | # logname = 'report.log' |
| 30 | 25 | ||
| 31 | logging.basicConfig(filename=fname, level=LOGGING_LEVEL) | 26 | logging.basicConfig(filename=fname, level=LOGGING_LEVEL) |
| @@ -155,22 +150,10 @@ def create_class_file(name, backup, **kwargs): | |||
| 155 | elif not os.access(name, os.R_OK): # check read permissions | 150 | elif not os.access(name, os.R_OK): # check read permissions |
| 156 | logging.error('%s is is not readable', name) | 151 | logging.error('%s is is not readable', name) |
| 157 | return None | 152 | return None |
| 158 | elif not os.path.getsize(name): # check if the file is not empty (hachoir crash on empty files) | ||
| 159 | logging.error('%s is empty', name) | ||
| 160 | return None | ||
| 161 | |||
| 162 | try: | ||
| 163 | filename = hachoir_core.cmd_line.unicodeFilename(name) | ||
| 164 | except TypeError: # get rid of "decoding Unicode is not supported" | ||
| 165 | filename = name | ||
| 166 | |||
| 167 | parser = hachoir_parser.createParser(filename) | ||
| 168 | if not parser: | ||
| 169 | logging.info('Unable to parse %s with hachoir', filename) | ||
| 170 | 153 | ||
| 171 | mime = mimetypes.guess_type(name)[0] | 154 | mime = mimetypes.guess_type(name)[0] |
| 172 | if not mime: | 155 | if not mime: |
| 173 | logging.info('Unable to find mimetype of %s', filename) | 156 | logging.info('Unable to find mimetype of %s', name) |
| 174 | return None | 157 | return None |
| 175 | 158 | ||
| 176 | if mime.startswith('application/vnd.oasis.opendocument'): | 159 | if mime.startswith('application/vnd.oasis.opendocument'): |
| @@ -186,4 +169,4 @@ def create_class_file(name, backup, **kwargs): | |||
| 186 | logging.info('Don\'t have stripper for %s format', mime) | 169 | logging.info('Don\'t have stripper for %s format', mime) |
| 187 | return None | 170 | return None |
| 188 | 171 | ||
| 189 | return stripper_class(filename, parser, mime, backup, is_writable, **kwargs) | 172 | return stripper_class(name, mime, backup, is_writable, **kwargs) |
diff --git a/libmat/misc.py b/libmat/misc.py index a55b8ed..cc480e5 100644 --- a/libmat/misc.py +++ b/libmat/misc.py | |||
| @@ -11,8 +11,8 @@ class TorrentStripper(parser.GenericParser): | |||
| 11 | of the bencode lib from Petru Paler | 11 | of the bencode lib from Petru Paler |
| 12 | """ | 12 | """ |
| 13 | 13 | ||
| 14 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | 14 | def __init__(self, filename, mime, backup, is_writable, **kwargs): |
| 15 | super(TorrentStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) | 15 | super(TorrentStripper, self).__init__(filename, mime, backup, is_writable, **kwargs) |
| 16 | self.fields = frozenset(['announce', 'info', 'name', 'path', 'piece length', 'pieces', | 16 | self.fields = frozenset(['announce', 'info', 'name', 'path', 'piece length', 'pieces', |
| 17 | 'length', 'files', 'announce-list', 'nodes', 'httpseeds', 'private', 'root hash']) | 17 | 'length', 'files', 'announce-list', 'nodes', 'httpseeds', 'private', 'root hash']) |
| 18 | 18 | ||
diff --git a/libmat/mutagenstripper.py b/libmat/mutagenstripper.py index 0f9520a..692c56f 100644 --- a/libmat/mutagenstripper.py +++ b/libmat/mutagenstripper.py | |||
| @@ -3,11 +3,15 @@ | |||
| 3 | 3 | ||
| 4 | import parser | 4 | import parser |
| 5 | 5 | ||
| 6 | from mutagen.flac import FLAC | ||
| 7 | from mutagen.oggvorbis import OggVorbis | ||
| 8 | from mutagen.mp3 import MP3 | ||
| 9 | |||
| 6 | 10 | ||
| 7 | class MutagenStripper(parser.GenericParser): | 11 | class MutagenStripper(parser.GenericParser): |
| 8 | """ Parser using the (awesome) mutagen library. """ | 12 | """ Parser using the (awesome) mutagen library. """ |
| 9 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | 13 | def __init__(self, filename, mime, backup, is_writable, **kwargs): |
| 10 | super(MutagenStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) | 14 | super(MutagenStripper, self).__init__(filename, mime, backup, is_writable, **kwargs) |
| 11 | self.mfile = None # This will be instanciated in self._create_mfile() | 15 | self.mfile = None # This will be instanciated in self._create_mfile() |
| 12 | self._create_mfile() | 16 | self._create_mfile() |
| 13 | 17 | ||
| @@ -36,3 +40,61 @@ class MutagenStripper(parser.GenericParser): | |||
| 36 | for key, value in self.mfile.tags: | 40 | for key, value in self.mfile.tags: |
| 37 | metadata[key] = value | 41 | metadata[key] = value |
| 38 | return metadata | 42 | return metadata |
| 43 | |||
| 44 | |||
| 45 | class MpegAudioStripper(MutagenStripper): | ||
| 46 | """ Represent a mp3 vorbis file | ||
| 47 | """ | ||
| 48 | def _create_mfile(self): | ||
| 49 | self.mfile = MP3(self.filename) | ||
| 50 | |||
| 51 | def get_meta(self): | ||
| 52 | """ | ||
| 53 | Return the content of the metadata block is present | ||
| 54 | """ | ||
| 55 | metadata = {} | ||
| 56 | if self.mfile.tags: | ||
| 57 | for key in self.mfile.tags.keys(): | ||
| 58 | meta = self.mfile.tags[key] | ||
| 59 | try: # Sometimes, the field has a human-redable description | ||
| 60 | desc = meta.desc | ||
| 61 | except AttributeError: | ||
| 62 | desc = key | ||
| 63 | text = meta.text[0] | ||
| 64 | metadata[desc] = text | ||
| 65 | return metadata | ||
| 66 | |||
| 67 | |||
| 68 | class OggStripper(MutagenStripper): | ||
| 69 | """ Represent an ogg vorbis file | ||
| 70 | """ | ||
| 71 | def _create_mfile(self): | ||
| 72 | self.mfile = OggVorbis(self.filename) | ||
| 73 | |||
| 74 | |||
| 75 | class FlacStripper(MutagenStripper): | ||
| 76 | """ Represent a Flac audio file | ||
| 77 | """ | ||
| 78 | def _create_mfile(self): | ||
| 79 | self.mfile = FLAC(self.filename) | ||
| 80 | |||
| 81 | def remove_all(self): | ||
| 82 | """ Remove the "metadata" block from the file | ||
| 83 | """ | ||
| 84 | super(FlacStripper, self).remove_all() | ||
| 85 | self.mfile.clear_pictures() | ||
| 86 | self.mfile.save() | ||
| 87 | return True | ||
| 88 | |||
| 89 | def is_clean(self): | ||
| 90 | """ Check if the "metadata" block is present in the file | ||
| 91 | """ | ||
| 92 | return super(FlacStripper, self).is_clean() and not self.mfile.pictures | ||
| 93 | |||
| 94 | def get_meta(self): | ||
| 95 | """ Return the content of the metadata block if present | ||
| 96 | """ | ||
| 97 | metadata = super(FlacStripper, self).get_meta() | ||
| 98 | if self.mfile.pictures: | ||
| 99 | metadata['picture:'] = 'yes' | ||
| 100 | return metadata | ||
diff --git a/libmat/office.py b/libmat/office.py index c585cb6..47cd622 100644 --- a/libmat/office.py +++ b/libmat/office.py | |||
| @@ -110,8 +110,8 @@ class PdfStripper(parser.GenericParser): | |||
| 110 | """ Represent a PDF file | 110 | """ Represent a PDF file |
| 111 | """ | 111 | """ |
| 112 | 112 | ||
| 113 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | 113 | def __init__(self, filename, mime, backup, is_writable, **kwargs): |
| 114 | super(PdfStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) | 114 | super(PdfStripper, self).__init__(filename, mime, backup, is_writable, **kwargs) |
| 115 | self.uri = 'file://' + os.path.abspath(self.filename) | 115 | self.uri = 'file://' + os.path.abspath(self.filename) |
| 116 | self.password = None | 116 | self.password = None |
| 117 | try: | 117 | try: |
diff --git a/libmat/parser.py b/libmat/parser.py index 8e10ae9..2a82a25 100644 --- a/libmat/parser.py +++ b/libmat/parser.py | |||
| @@ -5,8 +5,6 @@ import os | |||
| 5 | import shutil | 5 | import shutil |
| 6 | import tempfile | 6 | import tempfile |
| 7 | 7 | ||
| 8 | import hachoir_core | ||
| 9 | import hachoir_editor | ||
| 10 | 8 | ||
| 11 | import mat | 9 | import mat |
| 12 | 10 | ||
| @@ -24,19 +22,14 @@ FIELD = object() | |||
| 24 | class GenericParser(object): | 22 | class GenericParser(object): |
| 25 | """ Parent class of all parsers | 23 | """ Parent class of all parsers |
| 26 | """ | 24 | """ |
| 27 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | 25 | def __init__(self, filename, mime, backup, is_writable, **kwargs): |
| 28 | self.filename = '' | 26 | self.filename = '' |
| 29 | self.parser = parser | ||
| 30 | self.mime = mime | 27 | self.mime = mime |
| 31 | self.backup = backup | 28 | self.backup = backup |
| 32 | self.is_writable = is_writable | 29 | self.is_writable = is_writable |
| 33 | self.editor = hachoir_editor.createEditor(parser) | 30 | self.filename = filename |
| 34 | try: | ||
| 35 | self.filename = hachoir_core.cmd_line.unicodeFilename(filename) | ||
| 36 | except TypeError: # get rid of "decoding Unicode is not supported" | ||
| 37 | self.filename = filename | ||
| 38 | self.basename = os.path.basename(filename) | 31 | self.basename = os.path.basename(filename) |
| 39 | self.output = hachoir_core.cmd_line.unicodeFilename(tempfile.mkstemp()[1]) | 32 | self.output = tempfile.mkstemp()[1] |
| 40 | 33 | ||
| 41 | def __del__(self): | 34 | def __del__(self): |
| 42 | """ Remove tempfile if it was not used | 35 | """ Remove tempfile if it was not used |
| @@ -48,74 +41,11 @@ class GenericParser(object): | |||
| 48 | """ | 41 | """ |
| 49 | Check if the file is clean from harmful metadatas | 42 | Check if the file is clean from harmful metadatas |
| 50 | """ | 43 | """ |
| 51 | for field in self.editor: | 44 | raise NotImplementedError |
| 52 | if self._should_remove(field): | ||
| 53 | return self._is_clean(self.editor) | ||
| 54 | return True | ||
| 55 | |||
| 56 | def _is_clean(self, fieldset): | ||
| 57 | """ Helper method of the `is_clean` one """ | ||
| 58 | for field in fieldset: | ||
| 59 | remove = self._should_remove(field) | ||
| 60 | if remove is True: | ||
| 61 | return False | ||
| 62 | if remove is FIELD: | ||
| 63 | if not self._is_clean(field): | ||
| 64 | return False | ||
| 65 | return True | ||
| 66 | 45 | ||
| 67 | def remove_all(self): | 46 | def remove_all(self): |
| 68 | """ Remove all compromising fields | 47 | """ Remove all compromising fields |
| 69 | """ | 48 | """ |
| 70 | state = self._remove_all(self.editor) | ||
| 71 | hachoir_core.field.writeIntoFile(self.editor, self.output) | ||
| 72 | self.do_backup() | ||
| 73 | return state | ||
| 74 | |||
| 75 | def _remove_all(self, fieldset): | ||
| 76 | """ Recursive way to handle tree metadatas | ||
| 77 | """ | ||
| 78 | try: | ||
| 79 | for field in fieldset: | ||
| 80 | remove = self._should_remove(field) | ||
| 81 | if remove is True: | ||
| 82 | self._remove(fieldset, field.name) | ||
| 83 | if remove is FIELD: | ||
| 84 | self._remove_all(field) | ||
| 85 | return True | ||
| 86 | except: | ||
| 87 | return False | ||
| 88 | |||
| 89 | @staticmethod | ||
| 90 | def _remove(fieldset, field): | ||
| 91 | """ Delete the given field | ||
| 92 | """ | ||
| 93 | del fieldset[field] | ||
| 94 | |||
| 95 | def get_meta(self): | ||
| 96 | """ Return a dict with all the meta of the file | ||
| 97 | """ | ||
| 98 | metadata = {} | ||
| 99 | self._get_meta(self.editor, metadata) | ||
| 100 | return metadata | ||
| 101 | |||
| 102 | def _get_meta(self, fieldset, metadata): | ||
| 103 | """ Recursive way to handle tree metadatas | ||
| 104 | """ | ||
| 105 | for field in fieldset: | ||
| 106 | remove = self._should_remove(field) | ||
| 107 | if remove: | ||
| 108 | try: | ||
| 109 | metadata[field.name] = field.value | ||
| 110 | except: | ||
| 111 | metadata[field.name] = 'harmful content' | ||
| 112 | if remove is FIELD: | ||
| 113 | self._get_meta(field, None) | ||
| 114 | |||
| 115 | def _should_remove(self, key): | ||
| 116 | """ Return True if the field is compromising | ||
| 117 | abstract method | ||
| 118 | """ | ||
| 119 | raise NotImplementedError | 49 | raise NotImplementedError |
| 120 | 50 | ||
| 121 | def create_backup_copy(self): | 51 | def create_backup_copy(self): |
diff --git a/libmat/strippers.py b/libmat/strippers.py index 3aca04f..5920c41 100644 --- a/libmat/strippers.py +++ b/libmat/strippers.py | |||
| @@ -2,8 +2,7 @@ | |||
| 2 | """ | 2 | """ |
| 3 | 3 | ||
| 4 | import archive | 4 | import archive |
| 5 | import audio | 5 | import mutagenstripper |
| 6 | import images | ||
| 7 | import logging | 6 | import logging |
| 8 | import mat | 7 | import mat |
| 9 | import misc | 8 | import misc |
| @@ -15,7 +14,6 @@ STRIPPERS = { | |||
| 15 | 'application/x-bzip2': archive.Bzip2Stripper, | 14 | 'application/x-bzip2': archive.Bzip2Stripper, |
| 16 | 'application/x-gzip': archive.GzipStripper, | 15 | 'application/x-gzip': archive.GzipStripper, |
| 17 | 'application/zip': archive.ZipStripper, | 16 | 'application/zip': archive.ZipStripper, |
| 18 | 'audio/mpeg': audio.MpegAudioStripper, | ||
| 19 | 'application/x-bittorrent': misc.TorrentStripper, | 17 | 'application/x-bittorrent': misc.TorrentStripper, |
| 20 | 'application/torrent': misc.TorrentStripper, | 18 | 'application/torrent': misc.TorrentStripper, |
| 21 | 'application/opendocument': office.OpenDocumentStripper, | 19 | 'application/opendocument': office.OpenDocumentStripper, |
| @@ -52,11 +50,11 @@ if pdfSupport: | |||
| 52 | # audio format support with mutagen-python | 50 | # audio format support with mutagen-python |
| 53 | try: | 51 | try: |
| 54 | import mutagen | 52 | import mutagen |
| 55 | STRIPPERS['audio/x-flac'] = audio.FlacStripper | 53 | STRIPPERS['audio/x-flac'] = mutagenstripper.FlacStripper |
| 56 | STRIPPERS['audio/flac'] = audio.FlacStripper | 54 | STRIPPERS['audio/flac'] = mutagenstripper.FlacStripper |
| 57 | STRIPPERS['audio/vorbis'] = audio.OggStripper | 55 | STRIPPERS['audio/vorbis'] = mutagenstripper.OggStripper |
| 58 | STRIPPERS['audio/ogg'] = audio.OggStripper | 56 | STRIPPERS['audio/ogg'] = mutagenstripper.OggStripper |
| 59 | STRIPPERS['audio/mpeg'] = audio.MpegAudioStripper | 57 | STRIPPERS['audio/mpeg'] = mutagenstripper.MpegAudioStripper |
| 60 | except ImportError: | 58 | except ImportError: |
| 61 | logging.info('Unable to import python-mutagen: limited audio format support') | 59 | logging.info('Unable to import python-mutagen: limited audio format support') |
| 62 | 60 | ||
| @@ -67,7 +65,5 @@ try: | |||
| 67 | STRIPPERS['image/jpeg'] = exiftool.JpegStripper | 65 | STRIPPERS['image/jpeg'] = exiftool.JpegStripper |
| 68 | STRIPPERS['image/png'] = exiftool.PngStripper | 66 | STRIPPERS['image/png'] = exiftool.PngStripper |
| 69 | STRIPPERS['image/tiff'] = exiftool.TiffStripper | 67 | STRIPPERS['image/tiff'] = exiftool.TiffStripper |
| 70 | except OSError: # if exiftool is not installed, use hachoir instead | 68 | except OSError: |
| 71 | logging.info('Unable to find exiftool: limited images support') | 69 | logging.info('Unable to find exiftool: limited images support') |
| 72 | STRIPPERS['image/jpeg'] = images.JpegStripper | ||
| 73 | STRIPPERS['image/png'] = images.PngStripper | ||
| @@ -7,8 +7,6 @@ import sys | |||
| 7 | import argparse | 7 | import argparse |
| 8 | import os | 8 | import os |
| 9 | 9 | ||
| 10 | import hachoir_core | ||
| 11 | |||
| 12 | from libmat import mat | 10 | from libmat import mat |
| 13 | from libmat import archive | 11 | from libmat import archive |
| 14 | 12 | ||
| @@ -35,7 +33,7 @@ def parse(): | |||
| 35 | info.add_argument('-l', '--list', action='store_true', | 33 | info.add_argument('-l', '--list', action='store_true', |
| 36 | help='list all supported fileformats') | 34 | help='list all supported fileformats') |
| 37 | info.add_argument('-v', '--version', action='version', | 35 | info.add_argument('-v', '--version', action='version', |
| 38 | version='MAT %s - Hachoir %s' % (mat.__version__, hachoir_core.__version__)) | 36 | version='MAT %s' % mat.__version__) |
| 39 | 37 | ||
| 40 | return parser.parse_args() | 38 | return parser.parse_args() |
| 41 | 39 | ||
| @@ -73,4 +73,4 @@ You should only use it for big files. | |||
| 73 | 73 | ||
| 74 | 74 | ||
| 75 | .SH "SEE ALSO" | 75 | .SH "SEE ALSO" |
| 76 | \fBexiftool\fR(1), \fBhachoir\-metadata\fR(1) | 76 | \fBexiftool\fR(1) |
| @@ -31,13 +31,13 @@ setup( | |||
| 31 | name='MAT', | 31 | name='MAT', |
| 32 | version=__version__, | 32 | version=__version__, |
| 33 | description='Metadata Anonymisation Toolkit', | 33 | description='Metadata Anonymisation Toolkit', |
| 34 | long_description='A Metadata Anonymisation Toolkit in Python, using python-hachoir', | 34 | long_description='A Metadata Anonymisation Toolkit in Python', |
| 35 | author='jvoisin', | 35 | author='jvoisin', |
| 36 | author_email='julien.voisin@dustri.org', | 36 | author_email='julien.voisin@dustri.org', |
| 37 | platforms='linux', | 37 | platforms='linux', |
| 38 | license='GPLv2', | 38 | license='GPLv2', |
| 39 | url='https://mat.boum.org', | 39 | url='https://mat.boum.org', |
| 40 | packages=['libmat', 'libmat.hachoir_editor', 'libmat.bencode'], | 40 | packages=['libmat', 'libmat.bencode'], |
| 41 | scripts=['mat', 'mat-gui'], | 41 | scripts=['mat', 'mat-gui'], |
| 42 | data_files=[ | 42 | data_files=[ |
| 43 | ('share/applications', ['mat.desktop']), | 43 | ('share/applications', ['mat.desktop']), |
| @@ -55,5 +55,5 @@ setup( | |||
| 55 | 'build_icons': build_icons.build_icons, | 55 | 'build_icons': build_icons.build_icons, |
| 56 | 'clean': clean_i18n.clean_i18n, | 56 | 'clean': clean_i18n.clean_i18n, |
| 57 | }, | 57 | }, |
| 58 | requires=['mutagen', 'gi', 'pdfrw', 'hachoir_core', 'hachoir_parser'] | 58 | requires=['mutagen', 'gi', 'pdfrw'] |
| 59 | ) | 59 | ) |
