summaryrefslogtreecommitdiff
path: root/libmat
diff options
context:
space:
mode:
authorjvoisin2015-12-02 17:07:19 +0100
committerjvoisin2015-12-02 17:22:45 +0100
commit80ece3001895ea13d50915a5215fd47e313bab4c (patch)
treec5ede43867c5d7fe2af4178b34b0e6dc219f6aac /libmat
parent3cf80e8b5d6faf410e9ad3aad77f23cf6418a587 (diff)
Remove hachoir from MAT.
This (huge) commit removes completely hachoir from MAT. Audio files are now processed with mutagen, and images with exiftool, since the main python imaging library (PIL) isn't super-great to deal with metadata (and damaged/non-standard files). Package maintainer should change the dependencies to reflect this.
Diffstat (limited to 'libmat')
-rw-r--r--libmat/archive.py13
-rw-r--r--libmat/audio.py53
-rw-r--r--libmat/exiftool.py4
-rw-r--r--libmat/hachoir_editor/__init__.py8
-rw-r--r--libmat/hachoir_editor/field.py69
-rw-r--r--libmat/hachoir_editor/fieldset.py352
-rw-r--r--libmat/hachoir_editor/typed_field.py268
-rw-r--r--libmat/images.py52
-rw-r--r--libmat/mat.py21
-rw-r--r--libmat/misc.py4
-rw-r--r--libmat/mutagenstripper.py66
-rw-r--r--libmat/office.py4
-rw-r--r--libmat/parser.py78
-rw-r--r--libmat/strippers.py18
14 files changed, 89 insertions, 921 deletions
diff --git a/libmat/archive.py b/libmat/archive.py
index ad9fdc9..2e14538 100644
--- a/libmat/archive.py
+++ b/libmat/archive.py
@@ -20,9 +20,8 @@ class GenericArchiveStripper(parser.GenericParser):
20 """ Represent a generic archive 20 """ Represent a generic archive
21 """ 21 """
22 22
23 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): 23 def __init__(self, filename, mime, backup, is_writable, **kwargs):
24 super(GenericArchiveStripper, self).__init__(filename, 24 super(GenericArchiveStripper, self).__init__(filename, mime, backup, is_writable, **kwargs)
25 parser, mime, backup, is_writable, **kwargs)
26 self.compression = '' 25 self.compression = ''
27 self.add2archive = kwargs['add2archive'] 26 self.add2archive = kwargs['add2archive']
28 self.tempdir = tempfile.mkdtemp() 27 self.tempdir = tempfile.mkdtemp()
@@ -354,8 +353,8 @@ class GzipStripper(TarStripper):
354 """ Represent a tar.gz archive 353 """ Represent a tar.gz archive
355 """ 354 """
356 355
357 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): 356 def __init__(self, filename, mime, backup, is_writable, **kwargs):
358 super(GzipStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) 357 super(GzipStripper, self).__init__(filename, mime, backup, is_writable, **kwargs)
359 self.compression = ':gz' 358 self.compression = ':gz'
360 359
361 360
@@ -363,6 +362,6 @@ class Bzip2Stripper(TarStripper):
363 """ Represent a tar.bz2 archive 362 """ Represent a tar.bz2 archive
364 """ 363 """
365 364
366 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): 365 def __init__(self, filename, mime, backup, is_writable, **kwargs):
367 super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) 366 super(Bzip2Stripper, self).__init__(filename, mime, backup, is_writable, **kwargs)
368 self.compression = ':bz2' 367 self.compression = ':bz2'
diff --git a/libmat/audio.py b/libmat/audio.py
deleted file mode 100644
index 2747dc1..0000000
--- a/libmat/audio.py
+++ /dev/null
@@ -1,53 +0,0 @@
1""" Care about audio fileformat
2"""
3
4try:
5 from mutagen.flac import FLAC
6 from mutagen.oggvorbis import OggVorbis
7except ImportError:
8 pass
9
10import parser
11import mutagenstripper
12
13
14class MpegAudioStripper(parser.GenericParser):
15 """ Represent mpeg audio file (mp3, ...)
16 """
17 def _should_remove(self, field):
18 return field.name in ("id3v1", "id3v2")
19
20
21class OggStripper(mutagenstripper.MutagenStripper):
22 """ Represent an ogg vorbis file
23 """
24 def _create_mfile(self):
25 self.mfile = OggVorbis(self.filename)
26
27
28class FlacStripper(mutagenstripper.MutagenStripper):
29 """ Represent a Flac audio file
30 """
31 def _create_mfile(self):
32 self.mfile = FLAC(self.filename)
33
34 def remove_all(self):
35 """ Remove the "metadata" block from the file
36 """
37 super(FlacStripper, self).remove_all()
38 self.mfile.clear_pictures()
39 self.mfile.save()
40 return True
41
42 def is_clean(self):
43 """ Check if the "metadata" block is present in the file
44 """
45 return super(FlacStripper, self).is_clean() and not self.mfile.pictures
46
47 def get_meta(self):
48 """ Return the content of the metadata block if present
49 """
50 metadata = super(FlacStripper, self).get_meta()
51 if self.mfile.pictures:
52 metadata['picture:'] = 'yes'
53 return metadata
diff --git a/libmat/exiftool.py b/libmat/exiftool.py
index 07ef06b..ef81ed3 100644
--- a/libmat/exiftool.py
+++ b/libmat/exiftool.py
@@ -9,8 +9,8 @@ class ExiftoolStripper(parser.GenericParser):
9 """ A generic stripper class using exiftool as backend 9 """ A generic stripper class using exiftool as backend
10 """ 10 """
11 11
12 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): 12 def __init__(self, filename, mime, backup, is_writable, **kwargs):
13 super(ExiftoolStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) 13 super(ExiftoolStripper, self).__init__(filename, mime, backup, is_writable, **kwargs)
14 self.allowed = {'ExifTool Version Number', 'File Name', 'Directory', 'File Size', 'File Modification Date/Time', 14 self.allowed = {'ExifTool Version Number', 'File Name', 'Directory', 'File Size', 'File Modification Date/Time',
15 'File Access Date/Time', 'File Permissions', 'File Type', 'File Type Extension', 'MIME Type', 15 'File Access Date/Time', 'File Permissions', 'File Type', 'File Type Extension', 'MIME Type',
16 'Image Width', 'Image Height', 'Image Size', 'File Inode Change Date/Time', 'Megapixels'} 16 'Image Width', 'Image Height', 'Image Size', 'File Inode Change Date/Time', 'Megapixels'}
diff --git a/libmat/hachoir_editor/__init__.py b/libmat/hachoir_editor/__init__.py
deleted file mode 100644
index 1835676..0000000
--- a/libmat/hachoir_editor/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
1from field import (
2 EditorError, FakeField)
3from typed_field import (
4 EditableField, EditableBits, EditableBytes,
5 EditableInteger, EditableString,
6 createEditableField)
7from fieldset import EditableFieldSet, NewFieldSet, createEditor
8
diff --git a/libmat/hachoir_editor/field.py b/libmat/hachoir_editor/field.py
deleted file mode 100644
index 6b1efe3..0000000
--- a/libmat/hachoir_editor/field.py
+++ /dev/null
@@ -1,69 +0,0 @@
1from hachoir_core.error import HachoirError
2from hachoir_core.field import joinPath, MissingField
3
4class EditorError(HachoirError):
5 pass
6
7class FakeField(object):
8 """
9 This class have API looks similar to Field API, but objects don't contain
10 any value: all values are _computed_ by parent methods.
11
12 Example: FakeField(editor, "abc").size calls editor._getFieldSize("abc").
13 """
14 is_field_set = False
15
16 def __init__(self, parent, name):
17 self._parent = parent
18 self._name = name
19
20 def _getPath(self):
21 return joinPath(self._parent.path, self._name)
22 path = property(_getPath)
23
24 def _getName(self):
25 return self._name
26 name = property(_getName)
27
28 def _getAddress(self):
29 return self._parent._getFieldAddress(self._name)
30 address = property(_getAddress)
31
32 def _getSize(self):
33 return self._parent.input[self._name].size
34 size = property(_getSize)
35
36 def _getValue(self):
37 return self._parent.input[self._name].value
38 value = property(_getValue)
39
40 def createDisplay(self):
41 # TODO: Returns new value if field is altered
42 return self._parent.input[self._name].display
43 display = property(createDisplay)
44
45 def _getParent(self):
46 return self._parent
47 parent = property(_getParent)
48
49 def hasValue(self):
50 return self._parent.input[self._name].hasValue()
51
52 def __getitem__(self, key):
53 # TODO: Implement this function!
54 raise MissingField(self, key)
55
56 def _isAltered(self):
57 return False
58 is_altered = property(_isAltered)
59
60 def writeInto(self, output):
61 size = self.size
62 addr = self._parent._getFieldInputAddress(self._name)
63 input = self._parent.input
64 stream = input.stream
65 if size % 8:
66 output.copyBitsFrom(stream, addr, size, input.endian)
67 else:
68 output.copyBytesFrom(stream, addr, size//8)
69
diff --git a/libmat/hachoir_editor/fieldset.py b/libmat/hachoir_editor/fieldset.py
deleted file mode 100644
index b7c9b07..0000000
--- a/libmat/hachoir_editor/fieldset.py
+++ /dev/null
@@ -1,352 +0,0 @@
1from hachoir_core.dict import UniqKeyError
2from hachoir_core.field import MissingField, Float32, Float64, FakeArray
3from hachoir_core.compatibility import any
4from hachoir_core.i18n import _
5from typed_field import createEditableField
6from field import EditorError
7from collections import deque # Python 2.4
8import weakref # Python 2.1
9import struct
10
11class EditableFieldSet(object):
12 MAX_SIZE = (1 << 40) # Arbitrary limit to catch errors
13 is_field_set = True
14
15 def __init__(self, parent, fieldset):
16 self._parent = parent
17 self.input = fieldset # original FieldSet
18 self._fields = {} # cache of editable fields
19 self._deleted = set() # Names of deleted fields
20 self._inserted = {} # Inserted field (name => list of field,
21 # where name is the name after)
22
23 def array(self, key):
24 # FIXME: Use cache?
25 return FakeArray(self, key)
26
27 def _getParent(self):
28 return self._parent
29 parent = property(_getParent)
30
31 def _isAltered(self):
32 if self._inserted:
33 return True
34 if self._deleted:
35 return True
36 return any(field.is_altered for field in self._fields.itervalues())
37 is_altered = property(_isAltered)
38
39 def reset(self):
40 """
41 Reset the field set and the input field set.
42 """
43 for key, field in self._fields.iteritems():
44 if not field.is_altered:
45 del self._fields[key]
46 self.input.reset()
47
48 def __len__(self):
49 return len(self.input) \
50 - len(self._deleted) \
51 + sum( len(new) for new in self._inserted.itervalues() )
52
53 def __iter__(self):
54 for field in self.input:
55 name = field.name
56 if name in self._inserted:
57 for newfield in self._inserted[name]:
58 yield weakref.proxy(newfield)
59 if name not in self._deleted:
60 yield self[name]
61 if None in self._inserted:
62 for newfield in self._inserted[None]:
63 yield weakref.proxy(newfield)
64
65 def insertBefore(self, name, *new_fields):
66 self._insert(name, new_fields, False)
67
68 def insertAfter(self, name, *new_fields):
69 self._insert(name, new_fields, True)
70
71 def insert(self, *new_fields):
72 self._insert(None, new_fields, True)
73
74 def _insert(self, key, new_fields, next):
75 """
76 key is the name of the field before which new_fields
77 will be inserted. If next is True, the fields will be inserted
78 _after_ this field.
79 """
80 # Set unique field name
81 for field in new_fields:
82 if field._name.endswith("[]"):
83 self.input.setUniqueFieldName(field)
84
85 # Check that there is no duplicate in inserted fields
86 new_names = list(field.name for field in new_fields)
87 names_set = set(new_names)
88 if len(names_set) != len(new_fields):
89 duplicates = (name for name in names_set if 1 < new_names.count(name))
90 raise UniqKeyError(_("Duplicates in inserted fields: %s") % ", ".join(duplicates))
91
92 # Check that field names are not in input
93 if self.input: # Write special version for NewFieldSet?
94 for name in new_names:
95 if name in self.input and name not in self._deleted:
96 raise UniqKeyError(_("Field name '%s' already exists") % name)
97
98 # Check that field names are not in inserted fields
99 for fields in self._inserted.itervalues():
100 for field in fields:
101 if field.name in new_names:
102 raise UniqKeyError(_("Field name '%s' already exists") % field.name)
103
104 # Input have already inserted field?
105 if key in self._inserted:
106 if next:
107 self._inserted[key].extend( reversed(new_fields) )
108 else:
109 self._inserted[key].extendleft( reversed(new_fields) )
110 return
111
112 # Whould like to insert in inserted fields?
113 if key:
114 for fields in self._inserted.itervalues():
115 names = [item.name for item in fields]
116 try:
117 pos = names.index(key)
118 except ValueError:
119 continue
120 if 0 <= pos:
121 if next:
122 pos += 1
123 fields.rotate(-pos)
124 fields.extendleft( reversed(new_fields) )
125 fields.rotate(pos)
126 return
127
128 # Get next field. Use None if we are at the end.
129 if next:
130 index = self.input[key].index + 1
131 try:
132 key = self.input[index].name
133 except IndexError:
134 key = None
135
136 # Check that field names are not in input
137 if key not in self.input:
138 raise MissingField(self, key)
139
140 # Insert in original input
141 self._inserted[key]= deque(new_fields)
142
143 def _getDescription(self):
144 return self.input.description
145 description = property(_getDescription)
146
147 def _getStream(self):
148 # FIXME: This property is maybe a bad idea since address may be differents
149 return self.input.stream
150 stream = property(_getStream)
151
152 def _getName(self):
153 return self.input.name
154 name = property(_getName)
155
156 def _getEndian(self):
157 return self.input.endian
158 endian = property(_getEndian)
159
160 def _getAddress(self):
161 if self._parent:
162 return self._parent._getFieldAddress(self.name)
163 else:
164 return 0
165 address = property(_getAddress)
166
167 def _getAbsoluteAddress(self):
168 address = self.address
169 current = self._parent
170 while current:
171 address += current.address
172 current = current._parent
173 return address
174 absolute_address = property(_getAbsoluteAddress)
175
176 def hasValue(self):
177 return False
178# return self._parent.input[self.name].hasValue()
179
180 def _getSize(self):
181 if self.is_altered:
182 return sum(field.size for field in self)
183 else:
184 return self.input.size
185 size = property(_getSize)
186
187 def _getPath(self):
188 return self.input.path
189 path = property(_getPath)
190
191 def _getOriginalField(self, name):
192 assert name in self.input
193 return self.input[name]
194
195 def _getFieldInputAddress(self, name):
196 """
197 Absolute address of a field from the input field set.
198 """
199 assert name in self.input
200 return self.input[name].absolute_address
201
202 def _getFieldAddress(self, name):
203 """
204 Compute relative address of a field. The operation takes care of
205 deleted and resized fields.
206 """
207 #assert name not in self._deleted
208 addr = 0
209 for field in self:
210 if field.name == name:
211 return addr
212 addr += field.size
213 raise MissingField(self, name)
214
215 def _getItemByPath(self, path):
216 if not path[0]:
217 path = path[1:]
218 field = self
219 for name in path:
220 field = field[name]
221 return field
222
223 def __contains__(self, name):
224 try:
225 field = self[name]
226 return (field is not None)
227 except MissingField:
228 return False
229
230 def __getitem__(self, key):
231 """
232 Create a weak reference to an editable field (EditableField) for the
233 field with specified name. If the field is removed later, using the
234 editable field will raise a weakref.ReferenceError exception.
235
236 May raise a MissingField error if the field doesn't exist in original
237 field set or it has been deleted.
238 """
239 if "/" in key:
240 return self._getItemByPath(key.split("/"))
241 if isinstance(key, (int, long)):
242 raise EditorError("Integer index are not supported")
243
244 if (key in self._deleted) or (key not in self.input):
245 raise MissingField(self, key)
246 if key not in self._fields:
247 field = self.input[key]
248 if field.is_field_set:
249 self._fields[key] = createEditableFieldSet(self, field)
250 else:
251 self._fields[key] = createEditableField(self, field)
252 return weakref.proxy(self._fields[key])
253
254 def __delitem__(self, name):
255 """
256 Remove a field from the field set. May raise an MissingField exception
257 if the field has already been deleted.
258 """
259 parts = name.partition('/')
260 if parts[2]:
261 fieldset = self[parts[0]]
262 del fieldset[parts[2]]
263 return
264 if name in self._deleted:
265 raise MissingField(self, name)
266 self._deleted.add(name)
267 if name in self._fields:
268 del self._fields[name]
269
270 def writeInto(self, output):
271 """
272 Write the content if this field set into the output stream
273 (OutputStream).
274 """
275 if not self.is_altered:
276 # Not altered: just copy bits/bytes
277 input = self.input
278 if input.size % 8:
279 output.copyBitsFrom(input.stream,
280 input.absolute_address, input.size, input.endian)
281 else:
282 output.copyBytesFrom(input.stream,
283 input.absolute_address, input.size//8)
284 else:
285 # Altered: call writeInto() method of each field
286 realaddr = 0
287 for field in self:
288 field.writeInto(output)
289 realaddr += field.size
290
291 def _getValue(self):
292 raise EditorError('Field set "%s" has no value' % self.path)
293 def _setValue(self, value):
294 raise EditorError('Field set "%s" value is read only' % self.path)
295 value = property(_getValue, _setValue, "Value of field")
296
297class EditableFloat(EditableFieldSet):
298 _value = None
299
300 def _isAltered(self):
301 return (self._value is not None)
302 is_altered = property(_isAltered)
303
304 def writeInto(self, output):
305 if self._value is not None:
306 self._write(output)
307 else:
308 EditableFieldSet.writeInto(self, output)
309
310 def _write(self, output):
311 format = self.input.struct_format
312 raw = struct.pack(format, self._value)
313 output.writeBytes(raw)
314
315 def _setValue(self, value):
316 self.parent._is_altered = True
317 self._value = value
318 value = property(EditableFieldSet._getValue, _setValue)
319
320def createEditableFieldSet(parent, field):
321 cls = field.__class__
322 # FIXME: Support Float80
323 if cls in (Float32, Float64):
324 return EditableFloat(parent, field)
325 else:
326 return EditableFieldSet(parent, field)
327
328class NewFieldSet(EditableFieldSet):
329 def __init__(self, parent, name):
330 EditableFieldSet.__init__(self, parent, None)
331 self._name = name
332 self._endian = parent.endian
333
334 def __iter__(self):
335 if None in self._inserted:
336 return iter(self._inserted[None])
337 else:
338 raise StopIteration()
339
340 def _getName(self):
341 return self._name
342 name = property(_getName)
343
344 def _getEndian(self):
345 return self._endian
346 endian = property(_getEndian)
347
348 is_altered = property(lambda self: True)
349
350def createEditor(fieldset):
351 return EditableFieldSet(None, fieldset)
352
diff --git a/libmat/hachoir_editor/typed_field.py b/libmat/hachoir_editor/typed_field.py
deleted file mode 100644
index 606d39b..0000000
--- a/libmat/hachoir_editor/typed_field.py
+++ /dev/null
@@ -1,268 +0,0 @@
1from hachoir_core.field import (
2 RawBits, Bit, Bits, PaddingBits,
3 RawBytes, Bytes, PaddingBytes,
4 GenericString, Character,
5 isInteger, isString)
6from field import FakeField
7
8
9class EditableField(FakeField):
10 """
11 Pure virtual class used to write editable field class.
12 """
13
14 _is_altered = False
15
16 def __init__(self, parent, name, value=None):
17 FakeField.__init__(self, parent, name)
18 self._value = value
19
20 def _isAltered(self):
21 return self._is_altered
22
23 is_altered = property(_isAltered)
24
25 def hasValue(self):
26 return True
27
28 def _computeSize(self):
29 raise NotImplementedError()
30
31 def _getValue(self):
32 return self._value
33
34 def _setValue(self, value):
35 self._value = value
36
37 def _propGetValue(self):
38 if self._value is not None:
39 return self._getValue()
40 else:
41 return FakeField._getValue(self)
42
43 def _propSetValue(self, value):
44 self._setValue(value)
45 self._is_altered = True
46
47 value = property(_propGetValue, _propSetValue)
48
49 def _getSize(self):
50 if self._value is not None:
51 return self._computeSize()
52 else:
53 return FakeField._getSize(self)
54
55 size = property(_getSize)
56
57 def _write(self, output):
58 raise NotImplementedError()
59
60 def writeInto(self, output):
61 if self._is_altered:
62 self._write(output)
63 else:
64 return FakeField.writeInto(self, output)
65
66
67class EditableFixedField(EditableField):
68 """
69 Editable field with fixed size.
70 """
71
72 def __init__(self, parent, name, value=None, size=None):
73 EditableField.__init__(self, parent, name, value)
74 if size is not None:
75 self._size = size
76 else:
77 self._size = self._parent._getOriginalField(self._name).size
78
79 def _getSize(self):
80 return self._size
81
82 size = property(_getSize)
83
84
85class EditableBits(EditableFixedField):
86 def __init__(self, parent, name, *args):
87 if args:
88 if len(args) != 2:
89 raise TypeError(
90 "Wrong argument count, EditableBits constructor prototype is: "
91 "(parent, name, [size, value])")
92 size = args[0]
93 value = args[1]
94 assert isinstance(value, (int, long))
95 else:
96 size = None
97 value = None
98 EditableFixedField.__init__(self, parent, name, value, size)
99 if args:
100 self._setValue(args[1])
101 self._is_altered = True
102
103 def _setValue(self, value):
104 if not (0 <= value < (1 << self._size)):
105 raise ValueError("Invalid value, must be in range %s..%s"
106 % (0, (1 << self._size) - 1))
107 self._value = value
108
109 def _write(self, output):
110 output.writeBits(self._size, self._value, self._parent.endian)
111
112
113class EditableBytes(EditableField):
114 def _setValue(self, value):
115 if not value: raise ValueError(
116 "Unable to set empty string to a EditableBytes field")
117 self._value = value
118
119 def _computeSize(self):
120 return len(self._value) * 8
121
122 def _write(self, output):
123 output.writeBytes(self._value)
124
125
126class EditableString(EditableField):
127 MAX_SIZE = {
128 "Pascal8": (1 << 8) - 1,
129 "Pascal16": (1 << 16) - 1,
130 "Pascal32": (1 << 32) - 1,
131 }
132
133 def __init__(self, parent, name, *args, **kw):
134 if len(args) == 2:
135 value = args[1]
136 assert isinstance(value, str) # TODO: support Unicode
137 elif not args:
138 value = None
139 else:
140 raise TypeError(
141 "Wrong argument count, EditableString constructor prototype is:"
142 "(parent, name, [format, value])")
143 EditableField.__init__(self, parent, name, value)
144 if len(args) == 2:
145 self._charset = kw.get('charset', None)
146 self._format = args[0]
147 if self._format in GenericString.PASCAL_FORMATS:
148 self._prefix_size = GenericString.PASCAL_FORMATS[self._format]
149 else:
150 self._prefix_size = 0
151 self._suffix_str = GenericString.staticSuffixStr(
152 self._format, self._charset, self._parent.endian)
153 self._is_altered = True
154 else:
155 orig = self._parent._getOriginalField(name)
156 self._charset = orig.charset
157 self._format = orig.format
158 self._prefix_size = orig.content_offset
159 self._suffix_str = orig.suffix_str
160
161 def _setValue(self, value):
162 size = len(value)
163 if self._format in self.MAX_SIZE and self.MAX_SIZE[self._format] < size:
164 raise ValueError("String is too big")
165 self._value = value
166
167 def _computeSize(self):
168 return (self._prefix_size + len(self._value) + len(self._suffix_str)) * 8
169
170 def _write(self, output):
171 if self._format in GenericString.SUFFIX_FORMAT:
172 output.writeBytes(self._value)
173 output.writeBytes(self._suffix_str)
174 elif self._format == "fixed":
175 output.writeBytes(self._value)
176 else:
177 assert self._format in GenericString.PASCAL_FORMATS
178 size = GenericString.PASCAL_FORMATS[self._format]
179 output.writeInteger(len(self._value), False, size, self._parent.endian)
180 output.writeBytes(self._value)
181
182
183class EditableCharacter(EditableFixedField):
184 def __init__(self, parent, name, *args):
185 if args:
186 if len(args) != 3:
187 raise TypeError(
188 "Wrong argument count, EditableCharacter "
189 "constructor prototype is: (parent, name, [value])")
190 value = args[0]
191 if not isinstance(value, str) or len(value) != 1:
192 raise TypeError("EditableCharacter needs a character")
193 else:
194 value = None
195 EditableFixedField.__init__(self, parent, name, value, 8)
196 if args:
197 self._is_altered = True
198
199 def _setValue(self, value):
200 if not isinstance(value, str) or len(value) != 1:
201 raise TypeError("EditableCharacter needs a character")
202 self._value = value
203
204 def _write(self, output):
205 output.writeBytes(self._value)
206
207
208class EditableInteger(EditableFixedField):
209 VALID_VALUE_SIGNED = {
210 8: (-(1 << 8), (1 << 8) - 1),
211 16: (-(1 << 15), (1 << 15) - 1),
212 32: (-(1 << 31), (1 << 31) - 1),
213 }
214 VALID_VALUE_UNSIGNED = {
215 8: (0, (1 << 8) - 1),
216 16: (0, (1 << 16) - 1),
217 32: (0, (1 << 32) - 1)
218 }
219
220 def __init__(self, parent, name, *args):
221 if args:
222 if len(args) != 3:
223 raise TypeError(
224 "Wrong argument count, EditableInteger constructor prototype is: "
225 "(parent, name, [signed, size, value])")
226 size = args[1]
227 value = args[2]
228 assert isinstance(value, (int, long))
229 else:
230 size = None
231 value = None
232 EditableFixedField.__init__(self, parent, name, value, size)
233 if args:
234 self._signed = args[0]
235 self._is_altered = True
236 else:
237 self._signed = self._parent._getOriginalField(self._name).signed
238
239 def _setValue(self, value):
240 if self._signed:
241 valid = self.VALID_VALUE_SIGNED
242 else:
243 valid = self.VALID_VALUE_UNSIGNED
244 minval, maxval = valid[self._size]
245 if not (minval <= value <= maxval):
246 raise ValueError("Invalid value, must be in range %s..%s"
247 % (minval, maxval))
248 self._value = value
249
250 def _write(self, output):
251 output.writeInteger(
252 self.value, self._signed, self._size // 8, self._parent.endian)
253
254
255def createEditableField(fieldset, field):
256 if isInteger(field):
257 cls = EditableInteger
258 elif isString(field):
259 cls = EditableString
260 elif field.__class__ in (RawBytes, Bytes, PaddingBytes):
261 cls = EditableBytes
262 elif field.__class__ in (RawBits, Bits, Bit, PaddingBits):
263 cls = EditableBits
264 elif field.__class__ == Character:
265 cls = EditableCharacter
266 else:
267 cls = FakeField
268 return cls(fieldset, field.name)
diff --git a/libmat/images.py b/libmat/images.py
deleted file mode 100644
index 813b0fd..0000000
--- a/libmat/images.py
+++ /dev/null
@@ -1,52 +0,0 @@
1""" Takes care about pictures formats
2
3References:
4 - JFIF: http://www.ecma-international.org/publications/techreports/E-TR-098.htm
5 - PNG: http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html
6 - PNG: http://www.w3.org/TR/PNG-Chunks.html
7"""
8
9import parser
10
11
12class JpegStripper(parser.GenericParser):
13 """ Represents a jpeg file.
14 Custom Huffman and Quantization tables
15 are stripped: they may leak
16 some info, and the quality loss is minor.
17 """
18 def _should_remove(self, field):
19 """ Return True if the field is compromising
20 """
21 field_list = frozenset([
22 'start_image', # start of the image
23 'app0', # JFIF data
24 'start_frame', # specify width, height, number of components
25 'start_scan', # specify which slice of data the top-to-bottom scan contains
26 'data', # actual data
27 'end_image']) # end of the image
28 if field.name in field_list:
29 return False
30 elif field.name.startswith('quantization['): # custom Quant. tables
31 return False
32 elif field.name.startswith('huffman['): # custom Huffman tables
33 return False
34 return True
35
36
37class PngStripper(parser.GenericParser):
38 """ Represents a png file
39 """
40 def _should_remove(self, field):
41 """ Return True if the field is compromising
42 """
43 field_list = frozenset([
44 'id',
45 'header', # PNG header
46 'physical', # the intended pixel size or aspect ratio
47 'end']) # end of the image
48 if field.name in field_list:
49 return False
50 elif field.name.startswith('data['): # data
51 return False
52 return True
diff --git a/libmat/mat.py b/libmat/mat.py
index 42357d6..2634cc3 100644
--- a/libmat/mat.py
+++ b/libmat/mat.py
@@ -10,9 +10,6 @@ import platform
10import subprocess 10import subprocess
11import xml.sax 11import xml.sax
12 12
13import hachoir_core.cmd_line
14import hachoir_parser
15
16import libmat.exceptions 13import libmat.exceptions
17 14
18__version__ = '0.5.4' 15__version__ = '0.5.4'
@@ -20,12 +17,10 @@ __author__ = 'jvoisin'
20 17
21# Silence 18# Silence
22LOGGING_LEVEL = logging.CRITICAL 19LOGGING_LEVEL = logging.CRITICAL
23hachoir_core.config.quiet = True
24fname = '' 20fname = ''
25 21
26# Verbose 22# Verbose
27# LOGGING_LEVEL = logging.DEBUG 23# LOGGING_LEVEL = logging.DEBUG
28# hachoir_core.config.quiet = False
29# logname = 'report.log' 24# logname = 'report.log'
30 25
31logging.basicConfig(filename=fname, level=LOGGING_LEVEL) 26logging.basicConfig(filename=fname, level=LOGGING_LEVEL)
@@ -155,22 +150,10 @@ def create_class_file(name, backup, **kwargs):
155 elif not os.access(name, os.R_OK): # check read permissions 150 elif not os.access(name, os.R_OK): # check read permissions
156 logging.error('%s is is not readable', name) 151 logging.error('%s is is not readable', name)
157 return None 152 return None
158 elif not os.path.getsize(name): # check if the file is not empty (hachoir crash on empty files)
159 logging.error('%s is empty', name)
160 return None
161
162 try:
163 filename = hachoir_core.cmd_line.unicodeFilename(name)
164 except TypeError: # get rid of "decoding Unicode is not supported"
165 filename = name
166
167 parser = hachoir_parser.createParser(filename)
168 if not parser:
169 logging.info('Unable to parse %s with hachoir', filename)
170 153
171 mime = mimetypes.guess_type(name)[0] 154 mime = mimetypes.guess_type(name)[0]
172 if not mime: 155 if not mime:
173 logging.info('Unable to find mimetype of %s', filename) 156 logging.info('Unable to find mimetype of %s', name)
174 return None 157 return None
175 158
176 if mime.startswith('application/vnd.oasis.opendocument'): 159 if mime.startswith('application/vnd.oasis.opendocument'):
@@ -186,4 +169,4 @@ def create_class_file(name, backup, **kwargs):
186 logging.info('Don\'t have stripper for %s format', mime) 169 logging.info('Don\'t have stripper for %s format', mime)
187 return None 170 return None
188 171
189 return stripper_class(filename, parser, mime, backup, is_writable, **kwargs) 172 return stripper_class(name, mime, backup, is_writable, **kwargs)
diff --git a/libmat/misc.py b/libmat/misc.py
index a55b8ed..cc480e5 100644
--- a/libmat/misc.py
+++ b/libmat/misc.py
@@ -11,8 +11,8 @@ class TorrentStripper(parser.GenericParser):
11 of the bencode lib from Petru Paler 11 of the bencode lib from Petru Paler
12 """ 12 """
13 13
14 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): 14 def __init__(self, filename, mime, backup, is_writable, **kwargs):
15 super(TorrentStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) 15 super(TorrentStripper, self).__init__(filename, mime, backup, is_writable, **kwargs)
16 self.fields = frozenset(['announce', 'info', 'name', 'path', 'piece length', 'pieces', 16 self.fields = frozenset(['announce', 'info', 'name', 'path', 'piece length', 'pieces',
17 'length', 'files', 'announce-list', 'nodes', 'httpseeds', 'private', 'root hash']) 17 'length', 'files', 'announce-list', 'nodes', 'httpseeds', 'private', 'root hash'])
18 18
diff --git a/libmat/mutagenstripper.py b/libmat/mutagenstripper.py
index 0f9520a..692c56f 100644
--- a/libmat/mutagenstripper.py
+++ b/libmat/mutagenstripper.py
@@ -3,11 +3,15 @@
3 3
4import parser 4import parser
5 5
6from mutagen.flac import FLAC
7from mutagen.oggvorbis import OggVorbis
8from mutagen.mp3 import MP3
9
6 10
7class MutagenStripper(parser.GenericParser): 11class MutagenStripper(parser.GenericParser):
8 """ Parser using the (awesome) mutagen library. """ 12 """ Parser using the (awesome) mutagen library. """
9 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): 13 def __init__(self, filename, mime, backup, is_writable, **kwargs):
10 super(MutagenStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) 14 super(MutagenStripper, self).__init__(filename, mime, backup, is_writable, **kwargs)
11 self.mfile = None # This will be instanciated in self._create_mfile() 15 self.mfile = None # This will be instanciated in self._create_mfile()
12 self._create_mfile() 16 self._create_mfile()
13 17
@@ -36,3 +40,61 @@ class MutagenStripper(parser.GenericParser):
36 for key, value in self.mfile.tags: 40 for key, value in self.mfile.tags:
37 metadata[key] = value 41 metadata[key] = value
38 return metadata 42 return metadata
43
44
45class MpegAudioStripper(MutagenStripper):
46 """ Represent a mp3 vorbis file
47 """
48 def _create_mfile(self):
49 self.mfile = MP3(self.filename)
50
51 def get_meta(self):
52 """
53 Return the content of the metadata block is present
54 """
55 metadata = {}
56 if self.mfile.tags:
57 for key in self.mfile.tags.keys():
58 meta = self.mfile.tags[key]
59 try: # Sometimes, the field has a human-redable description
60 desc = meta.desc
61 except AttributeError:
62 desc = key
63 text = meta.text[0]
64 metadata[desc] = text
65 return metadata
66
67
68class OggStripper(MutagenStripper):
69 """ Represent an ogg vorbis file
70 """
71 def _create_mfile(self):
72 self.mfile = OggVorbis(self.filename)
73
74
75class FlacStripper(MutagenStripper):
76 """ Represent a Flac audio file
77 """
78 def _create_mfile(self):
79 self.mfile = FLAC(self.filename)
80
81 def remove_all(self):
82 """ Remove the "metadata" block from the file
83 """
84 super(FlacStripper, self).remove_all()
85 self.mfile.clear_pictures()
86 self.mfile.save()
87 return True
88
89 def is_clean(self):
90 """ Check if the "metadata" block is present in the file
91 """
92 return super(FlacStripper, self).is_clean() and not self.mfile.pictures
93
94 def get_meta(self):
95 """ Return the content of the metadata block if present
96 """
97 metadata = super(FlacStripper, self).get_meta()
98 if self.mfile.pictures:
99 metadata['picture:'] = 'yes'
100 return metadata
diff --git a/libmat/office.py b/libmat/office.py
index c585cb6..47cd622 100644
--- a/libmat/office.py
+++ b/libmat/office.py
@@ -110,8 +110,8 @@ class PdfStripper(parser.GenericParser):
110 """ Represent a PDF file 110 """ Represent a PDF file
111 """ 111 """
112 112
113 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): 113 def __init__(self, filename, mime, backup, is_writable, **kwargs):
114 super(PdfStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) 114 super(PdfStripper, self).__init__(filename, mime, backup, is_writable, **kwargs)
115 self.uri = 'file://' + os.path.abspath(self.filename) 115 self.uri = 'file://' + os.path.abspath(self.filename)
116 self.password = None 116 self.password = None
117 try: 117 try:
diff --git a/libmat/parser.py b/libmat/parser.py
index 8e10ae9..2a82a25 100644
--- a/libmat/parser.py
+++ b/libmat/parser.py
@@ -5,8 +5,6 @@ import os
5import shutil 5import shutil
6import tempfile 6import tempfile
7 7
8import hachoir_core
9import hachoir_editor
10 8
11import mat 9import mat
12 10
@@ -24,19 +22,14 @@ FIELD = object()
24class GenericParser(object): 22class GenericParser(object):
25 """ Parent class of all parsers 23 """ Parent class of all parsers
26 """ 24 """
27 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): 25 def __init__(self, filename, mime, backup, is_writable, **kwargs):
28 self.filename = '' 26 self.filename = ''
29 self.parser = parser
30 self.mime = mime 27 self.mime = mime
31 self.backup = backup 28 self.backup = backup
32 self.is_writable = is_writable 29 self.is_writable = is_writable
33 self.editor = hachoir_editor.createEditor(parser) 30 self.filename = filename
34 try:
35 self.filename = hachoir_core.cmd_line.unicodeFilename(filename)
36 except TypeError: # get rid of "decoding Unicode is not supported"
37 self.filename = filename
38 self.basename = os.path.basename(filename) 31 self.basename = os.path.basename(filename)
39 self.output = hachoir_core.cmd_line.unicodeFilename(tempfile.mkstemp()[1]) 32 self.output = tempfile.mkstemp()[1]
40 33
41 def __del__(self): 34 def __del__(self):
42 """ Remove tempfile if it was not used 35 """ Remove tempfile if it was not used
@@ -48,74 +41,11 @@ class GenericParser(object):
48 """ 41 """
49 Check if the file is clean from harmful metadatas 42 Check if the file is clean from harmful metadatas
50 """ 43 """
51 for field in self.editor: 44 raise NotImplementedError
52 if self._should_remove(field):
53 return self._is_clean(self.editor)
54 return True
55
56 def _is_clean(self, fieldset):
57 """ Helper method of the `is_clean` one """
58 for field in fieldset:
59 remove = self._should_remove(field)
60 if remove is True:
61 return False
62 if remove is FIELD:
63 if not self._is_clean(field):
64 return False
65 return True
66 45
67 def remove_all(self): 46 def remove_all(self):
68 """ Remove all compromising fields 47 """ Remove all compromising fields
69 """ 48 """
70 state = self._remove_all(self.editor)
71 hachoir_core.field.writeIntoFile(self.editor, self.output)
72 self.do_backup()
73 return state
74
75 def _remove_all(self, fieldset):
76 """ Recursive way to handle tree metadatas
77 """
78 try:
79 for field in fieldset:
80 remove = self._should_remove(field)
81 if remove is True:
82 self._remove(fieldset, field.name)
83 if remove is FIELD:
84 self._remove_all(field)
85 return True
86 except:
87 return False
88
89 @staticmethod
90 def _remove(fieldset, field):
91 """ Delete the given field
92 """
93 del fieldset[field]
94
95 def get_meta(self):
96 """ Return a dict with all the meta of the file
97 """
98 metadata = {}
99 self._get_meta(self.editor, metadata)
100 return metadata
101
102 def _get_meta(self, fieldset, metadata):
103 """ Recursive way to handle tree metadatas
104 """
105 for field in fieldset:
106 remove = self._should_remove(field)
107 if remove:
108 try:
109 metadata[field.name] = field.value
110 except:
111 metadata[field.name] = 'harmful content'
112 if remove is FIELD:
113 self._get_meta(field, None)
114
115 def _should_remove(self, key):
116 """ Return True if the field is compromising
117 abstract method
118 """
119 raise NotImplementedError 49 raise NotImplementedError
120 50
121 def create_backup_copy(self): 51 def create_backup_copy(self):
diff --git a/libmat/strippers.py b/libmat/strippers.py
index 3aca04f..5920c41 100644
--- a/libmat/strippers.py
+++ b/libmat/strippers.py
@@ -2,8 +2,7 @@
2""" 2"""
3 3
4import archive 4import archive
5import audio 5import mutagenstripper
6import images
7import logging 6import logging
8import mat 7import mat
9import misc 8import misc
@@ -15,7 +14,6 @@ STRIPPERS = {
15 'application/x-bzip2': archive.Bzip2Stripper, 14 'application/x-bzip2': archive.Bzip2Stripper,
16 'application/x-gzip': archive.GzipStripper, 15 'application/x-gzip': archive.GzipStripper,
17 'application/zip': archive.ZipStripper, 16 'application/zip': archive.ZipStripper,
18 'audio/mpeg': audio.MpegAudioStripper,
19 'application/x-bittorrent': misc.TorrentStripper, 17 'application/x-bittorrent': misc.TorrentStripper,
20 'application/torrent': misc.TorrentStripper, 18 'application/torrent': misc.TorrentStripper,
21 'application/opendocument': office.OpenDocumentStripper, 19 'application/opendocument': office.OpenDocumentStripper,
@@ -52,11 +50,11 @@ if pdfSupport:
52# audio format support with mutagen-python 50# audio format support with mutagen-python
53try: 51try:
54 import mutagen 52 import mutagen
55 STRIPPERS['audio/x-flac'] = audio.FlacStripper 53 STRIPPERS['audio/x-flac'] = mutagenstripper.FlacStripper
56 STRIPPERS['audio/flac'] = audio.FlacStripper 54 STRIPPERS['audio/flac'] = mutagenstripper.FlacStripper
57 STRIPPERS['audio/vorbis'] = audio.OggStripper 55 STRIPPERS['audio/vorbis'] = mutagenstripper.OggStripper
58 STRIPPERS['audio/ogg'] = audio.OggStripper 56 STRIPPERS['audio/ogg'] = mutagenstripper.OggStripper
59 STRIPPERS['audio/mpeg'] = audio.MpegAudioStripper 57 STRIPPERS['audio/mpeg'] = mutagenstripper.MpegAudioStripper
60except ImportError: 58except ImportError:
61 logging.info('Unable to import python-mutagen: limited audio format support') 59 logging.info('Unable to import python-mutagen: limited audio format support')
62 60
@@ -67,7 +65,5 @@ try:
67 STRIPPERS['image/jpeg'] = exiftool.JpegStripper 65 STRIPPERS['image/jpeg'] = exiftool.JpegStripper
68 STRIPPERS['image/png'] = exiftool.PngStripper 66 STRIPPERS['image/png'] = exiftool.PngStripper
69 STRIPPERS['image/tiff'] = exiftool.TiffStripper 67 STRIPPERS['image/tiff'] = exiftool.TiffStripper
70except OSError: # if exiftool is not installed, use hachoir instead 68except OSError:
71 logging.info('Unable to find exiftool: limited images support') 69 logging.info('Unable to find exiftool: limited images support')
72 STRIPPERS['image/jpeg'] = images.JpegStripper
73 STRIPPERS['image/png'] = images.PngStripper