summaryrefslogtreecommitdiff
path: root/libmat
diff options
context:
space:
mode:
Diffstat (limited to 'libmat')
-rw-r--r--libmat/archive.py13
-rw-r--r--libmat/audio.py53
-rw-r--r--libmat/exiftool.py4
-rw-r--r--libmat/hachoir_editor/__init__.py8
-rw-r--r--libmat/hachoir_editor/field.py69
-rw-r--r--libmat/hachoir_editor/fieldset.py352
-rw-r--r--libmat/hachoir_editor/typed_field.py268
-rw-r--r--libmat/images.py52
-rw-r--r--libmat/mat.py21
-rw-r--r--libmat/misc.py4
-rw-r--r--libmat/mutagenstripper.py66
-rw-r--r--libmat/office.py4
-rw-r--r--libmat/parser.py78
-rw-r--r--libmat/strippers.py18
14 files changed, 89 insertions, 921 deletions
diff --git a/libmat/archive.py b/libmat/archive.py
index ad9fdc9..2e14538 100644
--- a/libmat/archive.py
+++ b/libmat/archive.py
@@ -20,9 +20,8 @@ class GenericArchiveStripper(parser.GenericParser):
20 """ Represent a generic archive 20 """ Represent a generic archive
21 """ 21 """
22 22
23 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): 23 def __init__(self, filename, mime, backup, is_writable, **kwargs):
24 super(GenericArchiveStripper, self).__init__(filename, 24 super(GenericArchiveStripper, self).__init__(filename, mime, backup, is_writable, **kwargs)
25 parser, mime, backup, is_writable, **kwargs)
26 self.compression = '' 25 self.compression = ''
27 self.add2archive = kwargs['add2archive'] 26 self.add2archive = kwargs['add2archive']
28 self.tempdir = tempfile.mkdtemp() 27 self.tempdir = tempfile.mkdtemp()
@@ -354,8 +353,8 @@ class GzipStripper(TarStripper):
354 """ Represent a tar.gz archive 353 """ Represent a tar.gz archive
355 """ 354 """
356 355
357 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): 356 def __init__(self, filename, mime, backup, is_writable, **kwargs):
358 super(GzipStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) 357 super(GzipStripper, self).__init__(filename, mime, backup, is_writable, **kwargs)
359 self.compression = ':gz' 358 self.compression = ':gz'
360 359
361 360
@@ -363,6 +362,6 @@ class Bzip2Stripper(TarStripper):
363 """ Represent a tar.bz2 archive 362 """ Represent a tar.bz2 archive
364 """ 363 """
365 364
366 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): 365 def __init__(self, filename, mime, backup, is_writable, **kwargs):
367 super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) 366 super(Bzip2Stripper, self).__init__(filename, mime, backup, is_writable, **kwargs)
368 self.compression = ':bz2' 367 self.compression = ':bz2'
diff --git a/libmat/audio.py b/libmat/audio.py
deleted file mode 100644
index 2747dc1..0000000
--- a/libmat/audio.py
+++ /dev/null
@@ -1,53 +0,0 @@
1""" Care about audio fileformat
2"""
3
4try:
5 from mutagen.flac import FLAC
6 from mutagen.oggvorbis import OggVorbis
7except ImportError:
8 pass
9
10import parser
11import mutagenstripper
12
13
14class MpegAudioStripper(parser.GenericParser):
15 """ Represent mpeg audio file (mp3, ...)
16 """
17 def _should_remove(self, field):
18 return field.name in ("id3v1", "id3v2")
19
20
21class OggStripper(mutagenstripper.MutagenStripper):
22 """ Represent an ogg vorbis file
23 """
24 def _create_mfile(self):
25 self.mfile = OggVorbis(self.filename)
26
27
28class FlacStripper(mutagenstripper.MutagenStripper):
29 """ Represent a Flac audio file
30 """
31 def _create_mfile(self):
32 self.mfile = FLAC(self.filename)
33
34 def remove_all(self):
35 """ Remove the "metadata" block from the file
36 """
37 super(FlacStripper, self).remove_all()
38 self.mfile.clear_pictures()
39 self.mfile.save()
40 return True
41
42 def is_clean(self):
43 """ Check if the "metadata" block is present in the file
44 """
45 return super(FlacStripper, self).is_clean() and not self.mfile.pictures
46
47 def get_meta(self):
48 """ Return the content of the metadata block if present
49 """
50 metadata = super(FlacStripper, self).get_meta()
51 if self.mfile.pictures:
52 metadata['picture:'] = 'yes'
53 return metadata
diff --git a/libmat/exiftool.py b/libmat/exiftool.py
index 07ef06b..ef81ed3 100644
--- a/libmat/exiftool.py
+++ b/libmat/exiftool.py
@@ -9,8 +9,8 @@ class ExiftoolStripper(parser.GenericParser):
9 """ A generic stripper class using exiftool as backend 9 """ A generic stripper class using exiftool as backend
10 """ 10 """
11 11
12 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): 12 def __init__(self, filename, mime, backup, is_writable, **kwargs):
13 super(ExiftoolStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) 13 super(ExiftoolStripper, self).__init__(filename, mime, backup, is_writable, **kwargs)
14 self.allowed = {'ExifTool Version Number', 'File Name', 'Directory', 'File Size', 'File Modification Date/Time', 14 self.allowed = {'ExifTool Version Number', 'File Name', 'Directory', 'File Size', 'File Modification Date/Time',
15 'File Access Date/Time', 'File Permissions', 'File Type', 'File Type Extension', 'MIME Type', 15 'File Access Date/Time', 'File Permissions', 'File Type', 'File Type Extension', 'MIME Type',
16 'Image Width', 'Image Height', 'Image Size', 'File Inode Change Date/Time', 'Megapixels'} 16 'Image Width', 'Image Height', 'Image Size', 'File Inode Change Date/Time', 'Megapixels'}
diff --git a/libmat/hachoir_editor/__init__.py b/libmat/hachoir_editor/__init__.py
deleted file mode 100644
index 1835676..0000000
--- a/libmat/hachoir_editor/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
1from field import (
2 EditorError, FakeField)
3from typed_field import (
4 EditableField, EditableBits, EditableBytes,
5 EditableInteger, EditableString,
6 createEditableField)
7from fieldset import EditableFieldSet, NewFieldSet, createEditor
8
diff --git a/libmat/hachoir_editor/field.py b/libmat/hachoir_editor/field.py
deleted file mode 100644
index 6b1efe3..0000000
--- a/libmat/hachoir_editor/field.py
+++ /dev/null
@@ -1,69 +0,0 @@
1from hachoir_core.error import HachoirError
2from hachoir_core.field import joinPath, MissingField
3
4class EditorError(HachoirError):
5 pass
6
7class FakeField(object):
8 """
9 This class have API looks similar to Field API, but objects don't contain
10 any value: all values are _computed_ by parent methods.
11
12 Example: FakeField(editor, "abc").size calls editor._getFieldSize("abc").
13 """
14 is_field_set = False
15
16 def __init__(self, parent, name):
17 self._parent = parent
18 self._name = name
19
20 def _getPath(self):
21 return joinPath(self._parent.path, self._name)
22 path = property(_getPath)
23
24 def _getName(self):
25 return self._name
26 name = property(_getName)
27
28 def _getAddress(self):
29 return self._parent._getFieldAddress(self._name)
30 address = property(_getAddress)
31
32 def _getSize(self):
33 return self._parent.input[self._name].size
34 size = property(_getSize)
35
36 def _getValue(self):
37 return self._parent.input[self._name].value
38 value = property(_getValue)
39
40 def createDisplay(self):
41 # TODO: Returns new value if field is altered
42 return self._parent.input[self._name].display
43 display = property(createDisplay)
44
45 def _getParent(self):
46 return self._parent
47 parent = property(_getParent)
48
49 def hasValue(self):
50 return self._parent.input[self._name].hasValue()
51
52 def __getitem__(self, key):
53 # TODO: Implement this function!
54 raise MissingField(self, key)
55
56 def _isAltered(self):
57 return False
58 is_altered = property(_isAltered)
59
60 def writeInto(self, output):
61 size = self.size
62 addr = self._parent._getFieldInputAddress(self._name)
63 input = self._parent.input
64 stream = input.stream
65 if size % 8:
66 output.copyBitsFrom(stream, addr, size, input.endian)
67 else:
68 output.copyBytesFrom(stream, addr, size//8)
69
diff --git a/libmat/hachoir_editor/fieldset.py b/libmat/hachoir_editor/fieldset.py
deleted file mode 100644
index b7c9b07..0000000
--- a/libmat/hachoir_editor/fieldset.py
+++ /dev/null
@@ -1,352 +0,0 @@
1from hachoir_core.dict import UniqKeyError
2from hachoir_core.field import MissingField, Float32, Float64, FakeArray
3from hachoir_core.compatibility import any
4from hachoir_core.i18n import _
5from typed_field import createEditableField
6from field import EditorError
7from collections import deque # Python 2.4
8import weakref # Python 2.1
9import struct
10
11class EditableFieldSet(object):
12 MAX_SIZE = (1 << 40) # Arbitrary limit to catch errors
13 is_field_set = True
14
15 def __init__(self, parent, fieldset):
16 self._parent = parent
17 self.input = fieldset # original FieldSet
18 self._fields = {} # cache of editable fields
19 self._deleted = set() # Names of deleted fields
20 self._inserted = {} # Inserted field (name => list of field,
21 # where name is the name after)
22
23 def array(self, key):
24 # FIXME: Use cache?
25 return FakeArray(self, key)
26
27 def _getParent(self):
28 return self._parent
29 parent = property(_getParent)
30
31 def _isAltered(self):
32 if self._inserted:
33 return True
34 if self._deleted:
35 return True
36 return any(field.is_altered for field in self._fields.itervalues())
37 is_altered = property(_isAltered)
38
39 def reset(self):
40 """
41 Reset the field set and the input field set.
42 """
43 for key, field in self._fields.iteritems():
44 if not field.is_altered:
45 del self._fields[key]
46 self.input.reset()
47
48 def __len__(self):
49 return len(self.input) \
50 - len(self._deleted) \
51 + sum( len(new) for new in self._inserted.itervalues() )
52
53 def __iter__(self):
54 for field in self.input:
55 name = field.name
56 if name in self._inserted:
57 for newfield in self._inserted[name]:
58 yield weakref.proxy(newfield)
59 if name not in self._deleted:
60 yield self[name]
61 if None in self._inserted:
62 for newfield in self._inserted[None]:
63 yield weakref.proxy(newfield)
64
65 def insertBefore(self, name, *new_fields):
66 self._insert(name, new_fields, False)
67
68 def insertAfter(self, name, *new_fields):
69 self._insert(name, new_fields, True)
70
71 def insert(self, *new_fields):
72 self._insert(None, new_fields, True)
73
74 def _insert(self, key, new_fields, next):
75 """
76 key is the name of the field before which new_fields
77 will be inserted. If next is True, the fields will be inserted
78 _after_ this field.
79 """
80 # Set unique field name
81 for field in new_fields:
82 if field._name.endswith("[]"):
83 self.input.setUniqueFieldName(field)
84
85 # Check that there is no duplicate in inserted fields
86 new_names = list(field.name for field in new_fields)
87 names_set = set(new_names)
88 if len(names_set) != len(new_fields):
89 duplicates = (name for name in names_set if 1 < new_names.count(name))
90 raise UniqKeyError(_("Duplicates in inserted fields: %s") % ", ".join(duplicates))
91
92 # Check that field names are not in input
93 if self.input: # Write special version for NewFieldSet?
94 for name in new_names:
95 if name in self.input and name not in self._deleted:
96 raise UniqKeyError(_("Field name '%s' already exists") % name)
97
98 # Check that field names are not in inserted fields
99 for fields in self._inserted.itervalues():
100 for field in fields:
101 if field.name in new_names:
102 raise UniqKeyError(_("Field name '%s' already exists") % field.name)
103
104 # Input have already inserted field?
105 if key in self._inserted:
106 if next:
107 self._inserted[key].extend( reversed(new_fields) )
108 else:
109 self._inserted[key].extendleft( reversed(new_fields) )
110 return
111
112 # Whould like to insert in inserted fields?
113 if key:
114 for fields in self._inserted.itervalues():
115 names = [item.name for item in fields]
116 try:
117 pos = names.index(key)
118 except ValueError:
119 continue
120 if 0 <= pos:
121 if next:
122 pos += 1
123 fields.rotate(-pos)
124 fields.extendleft( reversed(new_fields) )
125 fields.rotate(pos)
126 return
127
128 # Get next field. Use None if we are at the end.
129 if next:
130 index = self.input[key].index + 1
131 try:
132 key = self.input[index].name
133 except IndexError:
134 key = None
135
136 # Check that field names are not in input
137 if key not in self.input:
138 raise MissingField(self, key)
139
140 # Insert in original input
141 self._inserted[key]= deque(new_fields)
142
143 def _getDescription(self):
144 return self.input.description
145 description = property(_getDescription)
146
147 def _getStream(self):
148 # FIXME: This property is maybe a bad idea since address may be differents
149 return self.input.stream
150 stream = property(_getStream)
151
152 def _getName(self):
153 return self.input.name
154 name = property(_getName)
155
156 def _getEndian(self):
157 return self.input.endian
158 endian = property(_getEndian)
159
160 def _getAddress(self):
161 if self._parent:
162 return self._parent._getFieldAddress(self.name)
163 else:
164 return 0
165 address = property(_getAddress)
166
167 def _getAbsoluteAddress(self):
168 address = self.address
169 current = self._parent
170 while current:
171 address += current.address
172 current = current._parent
173 return address
174 absolute_address = property(_getAbsoluteAddress)
175
176 def hasValue(self):
177 return False
178# return self._parent.input[self.name].hasValue()
179
180 def _getSize(self):
181 if self.is_altered:
182 return sum(field.size for field in self)
183 else:
184 return self.input.size
185 size = property(_getSize)
186
187 def _getPath(self):
188 return self.input.path
189 path = property(_getPath)
190
191 def _getOriginalField(self, name):
192 assert name in self.input
193 return self.input[name]
194
195 def _getFieldInputAddress(self, name):
196 """
197 Absolute address of a field from the input field set.
198 """
199 assert name in self.input
200 return self.input[name].absolute_address
201
202 def _getFieldAddress(self, name):
203 """
204 Compute relative address of a field. The operation takes care of
205 deleted and resized fields.
206 """
207 #assert name not in self._deleted
208 addr = 0
209 for field in self:
210 if field.name == name:
211 return addr
212 addr += field.size
213 raise MissingField(self, name)
214
215 def _getItemByPath(self, path):
216 if not path[0]:
217 path = path[1:]
218 field = self
219 for name in path:
220 field = field[name]
221 return field
222
223 def __contains__(self, name):
224 try:
225 field = self[name]
226 return (field is not None)
227 except MissingField:
228 return False
229
230 def __getitem__(self, key):
231 """
232 Create a weak reference to an editable field (EditableField) for the
233 field with specified name. If the field is removed later, using the
234 editable field will raise a weakref.ReferenceError exception.
235
236 May raise a MissingField error if the field doesn't exist in original
237 field set or it has been deleted.
238 """
239 if "/" in key:
240 return self._getItemByPath(key.split("/"))
241 if isinstance(key, (int, long)):
242 raise EditorError("Integer index are not supported")
243
244 if (key in self._deleted) or (key not in self.input):
245 raise MissingField(self, key)
246 if key not in self._fields:
247 field = self.input[key]
248 if field.is_field_set:
249 self._fields[key] = createEditableFieldSet(self, field)
250 else:
251 self._fields[key] = createEditableField(self, field)
252 return weakref.proxy(self._fields[key])
253
254 def __delitem__(self, name):
255 """
256 Remove a field from the field set. May raise an MissingField exception
257 if the field has already been deleted.
258 """
259 parts = name.partition('/')
260 if parts[2]:
261 fieldset = self[parts[0]]
262 del fieldset[parts[2]]
263 return
264 if name in self._deleted:
265 raise MissingField(self, name)
266 self._deleted.add(name)
267 if name in self._fields:
268 del self._fields[name]
269
270 def writeInto(self, output):
271 """
272 Write the content if this field set into the output stream
273 (OutputStream).
274 """
275 if not self.is_altered:
276 # Not altered: just copy bits/bytes
277 input = self.input
278 if input.size % 8:
279 output.copyBitsFrom(input.stream,
280 input.absolute_address, input.size, input.endian)
281 else:
282 output.copyBytesFrom(input.stream,
283 input.absolute_address, input.size//8)
284 else:
285 # Altered: call writeInto() method of each field
286 realaddr = 0
287 for field in self:
288 field.writeInto(output)
289 realaddr += field.size
290
291 def _getValue(self):
292 raise EditorError('Field set "%s" has no value' % self.path)
293 def _setValue(self, value):
294 raise EditorError('Field set "%s" value is read only' % self.path)
295 value = property(_getValue, _setValue, "Value of field")
296
297class EditableFloat(EditableFieldSet):
298 _value = None
299
300 def _isAltered(self):
301 return (self._value is not None)
302 is_altered = property(_isAltered)
303
304 def writeInto(self, output):
305 if self._value is not None:
306 self._write(output)
307 else:
308 EditableFieldSet.writeInto(self, output)
309
310 def _write(self, output):
311 format = self.input.struct_format
312 raw = struct.pack(format, self._value)
313 output.writeBytes(raw)
314
315 def _setValue(self, value):
316 self.parent._is_altered = True
317 self._value = value
318 value = property(EditableFieldSet._getValue, _setValue)
319
320def createEditableFieldSet(parent, field):
321 cls = field.__class__
322 # FIXME: Support Float80
323 if cls in (Float32, Float64):
324 return EditableFloat(parent, field)
325 else:
326 return EditableFieldSet(parent, field)
327
328class NewFieldSet(EditableFieldSet):
329 def __init__(self, parent, name):
330 EditableFieldSet.__init__(self, parent, None)
331 self._name = name
332 self._endian = parent.endian
333
334 def __iter__(self):
335 if None in self._inserted:
336 return iter(self._inserted[None])
337 else:
338 raise StopIteration()
339
340 def _getName(self):
341 return self._name
342 name = property(_getName)
343
344 def _getEndian(self):
345 return self._endian
346 endian = property(_getEndian)
347
348 is_altered = property(lambda self: True)
349
350def createEditor(fieldset):
351 return EditableFieldSet(None, fieldset)
352
diff --git a/libmat/hachoir_editor/typed_field.py b/libmat/hachoir_editor/typed_field.py
deleted file mode 100644
index 606d39b..0000000
--- a/libmat/hachoir_editor/typed_field.py
+++ /dev/null
@@ -1,268 +0,0 @@
1from hachoir_core.field import (
2 RawBits, Bit, Bits, PaddingBits,
3 RawBytes, Bytes, PaddingBytes,
4 GenericString, Character,
5 isInteger, isString)
6from field import FakeField
7
8
9class EditableField(FakeField):
10 """
11 Pure virtual class used to write editable field class.
12 """
13
14 _is_altered = False
15
16 def __init__(self, parent, name, value=None):
17 FakeField.__init__(self, parent, name)
18 self._value = value
19
20 def _isAltered(self):
21 return self._is_altered
22
23 is_altered = property(_isAltered)
24
25 def hasValue(self):
26 return True
27
28 def _computeSize(self):
29 raise NotImplementedError()
30
31 def _getValue(self):
32 return self._value
33
34 def _setValue(self, value):
35 self._value = value
36
37 def _propGetValue(self):
38 if self._value is not None:
39 return self._getValue()
40 else:
41 return FakeField._getValue(self)
42
43 def _propSetValue(self, value):
44 self._setValue(value)
45 self._is_altered = True
46
47 value = property(_propGetValue, _propSetValue)
48
49 def _getSize(self):
50 if self._value is not None:
51 return self._computeSize()
52 else:
53 return FakeField._getSize(self)
54
55 size = property(_getSize)
56
57 def _write(self, output):
58 raise NotImplementedError()
59
60 def writeInto(self, output):
61 if self._is_altered:
62 self._write(output)
63 else:
64 return FakeField.writeInto(self, output)
65
66
67class EditableFixedField(EditableField):
68 """
69 Editable field with fixed size.
70 """
71
72 def __init__(self, parent, name, value=None, size=None):
73 EditableField.__init__(self, parent, name, value)
74 if size is not None:
75 self._size = size
76 else:
77 self._size = self._parent._getOriginalField(self._name).size
78
79 def _getSize(self):
80 return self._size
81
82 size = property(_getSize)
83
84
85class EditableBits(EditableFixedField):
86 def __init__(self, parent, name, *args):
87 if args:
88 if len(args) != 2:
89 raise TypeError(
90 "Wrong argument count, EditableBits constructor prototype is: "
91 "(parent, name, [size, value])")
92 size = args[0]
93 value = args[1]
94 assert isinstance(value, (int, long))
95 else:
96 size = None
97 value = None
98 EditableFixedField.__init__(self, parent, name, value, size)
99 if args:
100 self._setValue(args[1])
101 self._is_altered = True
102
103 def _setValue(self, value):
104 if not (0 <= value < (1 << self._size)):
105 raise ValueError("Invalid value, must be in range %s..%s"
106 % (0, (1 << self._size) - 1))
107 self._value = value
108
109 def _write(self, output):
110 output.writeBits(self._size, self._value, self._parent.endian)
111
112
113class EditableBytes(EditableField):
114 def _setValue(self, value):
115 if not value: raise ValueError(
116 "Unable to set empty string to a EditableBytes field")
117 self._value = value
118
119 def _computeSize(self):
120 return len(self._value) * 8
121
122 def _write(self, output):
123 output.writeBytes(self._value)
124
125
126class EditableString(EditableField):
127 MAX_SIZE = {
128 "Pascal8": (1 << 8) - 1,
129 "Pascal16": (1 << 16) - 1,
130 "Pascal32": (1 << 32) - 1,
131 }
132
133 def __init__(self, parent, name, *args, **kw):
134 if len(args) == 2:
135 value = args[1]
136 assert isinstance(value, str) # TODO: support Unicode
137 elif not args:
138 value = None
139 else:
140 raise TypeError(
141 "Wrong argument count, EditableString constructor prototype is:"
142 "(parent, name, [format, value])")
143 EditableField.__init__(self, parent, name, value)
144 if len(args) == 2:
145 self._charset = kw.get('charset', None)
146 self._format = args[0]
147 if self._format in GenericString.PASCAL_FORMATS:
148 self._prefix_size = GenericString.PASCAL_FORMATS[self._format]
149 else:
150 self._prefix_size = 0
151 self._suffix_str = GenericString.staticSuffixStr(
152 self._format, self._charset, self._parent.endian)
153 self._is_altered = True
154 else:
155 orig = self._parent._getOriginalField(name)
156 self._charset = orig.charset
157 self._format = orig.format
158 self._prefix_size = orig.content_offset
159 self._suffix_str = orig.suffix_str
160
161 def _setValue(self, value):
162 size = len(value)
163 if self._format in self.MAX_SIZE and self.MAX_SIZE[self._format] < size:
164 raise ValueError("String is too big")
165 self._value = value
166
167 def _computeSize(self):
168 return (self._prefix_size + len(self._value) + len(self._suffix_str)) * 8
169
170 def _write(self, output):
171 if self._format in GenericString.SUFFIX_FORMAT:
172 output.writeBytes(self._value)
173 output.writeBytes(self._suffix_str)
174 elif self._format == "fixed":
175 output.writeBytes(self._value)
176 else:
177 assert self._format in GenericString.PASCAL_FORMATS
178 size = GenericString.PASCAL_FORMATS[self._format]
179 output.writeInteger(len(self._value), False, size, self._parent.endian)
180 output.writeBytes(self._value)
181
182
183class EditableCharacter(EditableFixedField):
184 def __init__(self, parent, name, *args):
185 if args:
186 if len(args) != 3:
187 raise TypeError(
188 "Wrong argument count, EditableCharacter "
189 "constructor prototype is: (parent, name, [value])")
190 value = args[0]
191 if not isinstance(value, str) or len(value) != 1:
192 raise TypeError("EditableCharacter needs a character")
193 else:
194 value = None
195 EditableFixedField.__init__(self, parent, name, value, 8)
196 if args:
197 self._is_altered = True
198
199 def _setValue(self, value):
200 if not isinstance(value, str) or len(value) != 1:
201 raise TypeError("EditableCharacter needs a character")
202 self._value = value
203
204 def _write(self, output):
205 output.writeBytes(self._value)
206
207
208class EditableInteger(EditableFixedField):
209 VALID_VALUE_SIGNED = {
210 8: (-(1 << 8), (1 << 8) - 1),
211 16: (-(1 << 15), (1 << 15) - 1),
212 32: (-(1 << 31), (1 << 31) - 1),
213 }
214 VALID_VALUE_UNSIGNED = {
215 8: (0, (1 << 8) - 1),
216 16: (0, (1 << 16) - 1),
217 32: (0, (1 << 32) - 1)
218 }
219
220 def __init__(self, parent, name, *args):
221 if args:
222 if len(args) != 3:
223 raise TypeError(
224 "Wrong argument count, EditableInteger constructor prototype is: "
225 "(parent, name, [signed, size, value])")
226 size = args[1]
227 value = args[2]
228 assert isinstance(value, (int, long))
229 else:
230 size = None
231 value = None
232 EditableFixedField.__init__(self, parent, name, value, size)
233 if args:
234 self._signed = args[0]
235 self._is_altered = True
236 else:
237 self._signed = self._parent._getOriginalField(self._name).signed
238
239 def _setValue(self, value):
240 if self._signed:
241 valid = self.VALID_VALUE_SIGNED
242 else:
243 valid = self.VALID_VALUE_UNSIGNED
244 minval, maxval = valid[self._size]
245 if not (minval <= value <= maxval):
246 raise ValueError("Invalid value, must be in range %s..%s"
247 % (minval, maxval))
248 self._value = value
249
250 def _write(self, output):
251 output.writeInteger(
252 self.value, self._signed, self._size // 8, self._parent.endian)
253
254
255def createEditableField(fieldset, field):
256 if isInteger(field):
257 cls = EditableInteger
258 elif isString(field):
259 cls = EditableString
260 elif field.__class__ in (RawBytes, Bytes, PaddingBytes):
261 cls = EditableBytes
262 elif field.__class__ in (RawBits, Bits, Bit, PaddingBits):
263 cls = EditableBits
264 elif field.__class__ == Character:
265 cls = EditableCharacter
266 else:
267 cls = FakeField
268 return cls(fieldset, field.name)
diff --git a/libmat/images.py b/libmat/images.py
deleted file mode 100644
index 813b0fd..0000000
--- a/libmat/images.py
+++ /dev/null
@@ -1,52 +0,0 @@
1""" Takes care about pictures formats
2
3References:
4 - JFIF: http://www.ecma-international.org/publications/techreports/E-TR-098.htm
5 - PNG: http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html
6 - PNG: http://www.w3.org/TR/PNG-Chunks.html
7"""
8
9import parser
10
11
12class JpegStripper(parser.GenericParser):
13 """ Represents a jpeg file.
14 Custom Huffman and Quantization tables
15 are stripped: they may leak
16 some info, and the quality loss is minor.
17 """
18 def _should_remove(self, field):
19 """ Return True if the field is compromising
20 """
21 field_list = frozenset([
22 'start_image', # start of the image
23 'app0', # JFIF data
24 'start_frame', # specify width, height, number of components
25 'start_scan', # specify which slice of data the top-to-bottom scan contains
26 'data', # actual data
27 'end_image']) # end of the image
28 if field.name in field_list:
29 return False
30 elif field.name.startswith('quantization['): # custom Quant. tables
31 return False
32 elif field.name.startswith('huffman['): # custom Huffman tables
33 return False
34 return True
35
36
37class PngStripper(parser.GenericParser):
38 """ Represents a png file
39 """
40 def _should_remove(self, field):
41 """ Return True if the field is compromising
42 """
43 field_list = frozenset([
44 'id',
45 'header', # PNG header
46 'physical', # the intended pixel size or aspect ratio
47 'end']) # end of the image
48 if field.name in field_list:
49 return False
50 elif field.name.startswith('data['): # data
51 return False
52 return True
diff --git a/libmat/mat.py b/libmat/mat.py
index 42357d6..2634cc3 100644
--- a/libmat/mat.py
+++ b/libmat/mat.py
@@ -10,9 +10,6 @@ import platform
10import subprocess 10import subprocess
11import xml.sax 11import xml.sax
12 12
13import hachoir_core.cmd_line
14import hachoir_parser
15
16import libmat.exceptions 13import libmat.exceptions
17 14
18__version__ = '0.5.4' 15__version__ = '0.5.4'
@@ -20,12 +17,10 @@ __author__ = 'jvoisin'
20 17
21# Silence 18# Silence
22LOGGING_LEVEL = logging.CRITICAL 19LOGGING_LEVEL = logging.CRITICAL
23hachoir_core.config.quiet = True
24fname = '' 20fname = ''
25 21
26# Verbose 22# Verbose
27# LOGGING_LEVEL = logging.DEBUG 23# LOGGING_LEVEL = logging.DEBUG
28# hachoir_core.config.quiet = False
29# logname = 'report.log' 24# logname = 'report.log'
30 25
31logging.basicConfig(filename=fname, level=LOGGING_LEVEL) 26logging.basicConfig(filename=fname, level=LOGGING_LEVEL)
@@ -155,22 +150,10 @@ def create_class_file(name, backup, **kwargs):
155 elif not os.access(name, os.R_OK): # check read permissions 150 elif not os.access(name, os.R_OK): # check read permissions
156 logging.error('%s is is not readable', name) 151 logging.error('%s is is not readable', name)
157 return None 152 return None
158 elif not os.path.getsize(name): # check if the file is not empty (hachoir crash on empty files)
159 logging.error('%s is empty', name)
160 return None
161
162 try:
163 filename = hachoir_core.cmd_line.unicodeFilename(name)
164 except TypeError: # get rid of "decoding Unicode is not supported"
165 filename = name
166
167 parser = hachoir_parser.createParser(filename)
168 if not parser:
169 logging.info('Unable to parse %s with hachoir', filename)
170 153
171 mime = mimetypes.guess_type(name)[0] 154 mime = mimetypes.guess_type(name)[0]
172 if not mime: 155 if not mime:
173 logging.info('Unable to find mimetype of %s', filename) 156 logging.info('Unable to find mimetype of %s', name)
174 return None 157 return None
175 158
176 if mime.startswith('application/vnd.oasis.opendocument'): 159 if mime.startswith('application/vnd.oasis.opendocument'):
@@ -186,4 +169,4 @@ def create_class_file(name, backup, **kwargs):
186 logging.info('Don\'t have stripper for %s format', mime) 169 logging.info('Don\'t have stripper for %s format', mime)
187 return None 170 return None
188 171
189 return stripper_class(filename, parser, mime, backup, is_writable, **kwargs) 172 return stripper_class(name, mime, backup, is_writable, **kwargs)
diff --git a/libmat/misc.py b/libmat/misc.py
index a55b8ed..cc480e5 100644
--- a/libmat/misc.py
+++ b/libmat/misc.py
@@ -11,8 +11,8 @@ class TorrentStripper(parser.GenericParser):
11 of the bencode lib from Petru Paler 11 of the bencode lib from Petru Paler
12 """ 12 """
13 13
14 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): 14 def __init__(self, filename, mime, backup, is_writable, **kwargs):
15 super(TorrentStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) 15 super(TorrentStripper, self).__init__(filename, mime, backup, is_writable, **kwargs)
16 self.fields = frozenset(['announce', 'info', 'name', 'path', 'piece length', 'pieces', 16 self.fields = frozenset(['announce', 'info', 'name', 'path', 'piece length', 'pieces',
17 'length', 'files', 'announce-list', 'nodes', 'httpseeds', 'private', 'root hash']) 17 'length', 'files', 'announce-list', 'nodes', 'httpseeds', 'private', 'root hash'])
18 18
diff --git a/libmat/mutagenstripper.py b/libmat/mutagenstripper.py
index 0f9520a..692c56f 100644
--- a/libmat/mutagenstripper.py
+++ b/libmat/mutagenstripper.py
@@ -3,11 +3,15 @@
3 3
4import parser 4import parser
5 5
6from mutagen.flac import FLAC
7from mutagen.oggvorbis import OggVorbis
8from mutagen.mp3 import MP3
9
6 10
7class MutagenStripper(parser.GenericParser): 11class MutagenStripper(parser.GenericParser):
8 """ Parser using the (awesome) mutagen library. """ 12 """ Parser using the (awesome) mutagen library. """
9 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): 13 def __init__(self, filename, mime, backup, is_writable, **kwargs):
10 super(MutagenStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) 14 super(MutagenStripper, self).__init__(filename, mime, backup, is_writable, **kwargs)
11 self.mfile = None # This will be instanciated in self._create_mfile() 15 self.mfile = None # This will be instanciated in self._create_mfile()
12 self._create_mfile() 16 self._create_mfile()
13 17
@@ -36,3 +40,61 @@ class MutagenStripper(parser.GenericParser):
36 for key, value in self.mfile.tags: 40 for key, value in self.mfile.tags:
37 metadata[key] = value 41 metadata[key] = value
38 return metadata 42 return metadata
43
44
45class MpegAudioStripper(MutagenStripper):
46 """ Represent a mp3 vorbis file
47 """
48 def _create_mfile(self):
49 self.mfile = MP3(self.filename)
50
51 def get_meta(self):
52 """
53 Return the content of the metadata block is present
54 """
55 metadata = {}
56 if self.mfile.tags:
57 for key in self.mfile.tags.keys():
58 meta = self.mfile.tags[key]
59 try: # Sometimes, the field has a human-redable description
60 desc = meta.desc
61 except AttributeError:
62 desc = key
63 text = meta.text[0]
64 metadata[desc] = text
65 return metadata
66
67
68class OggStripper(MutagenStripper):
69 """ Represent an ogg vorbis file
70 """
71 def _create_mfile(self):
72 self.mfile = OggVorbis(self.filename)
73
74
75class FlacStripper(MutagenStripper):
76 """ Represent a Flac audio file
77 """
78 def _create_mfile(self):
79 self.mfile = FLAC(self.filename)
80
81 def remove_all(self):
82 """ Remove the "metadata" block from the file
83 """
84 super(FlacStripper, self).remove_all()
85 self.mfile.clear_pictures()
86 self.mfile.save()
87 return True
88
89 def is_clean(self):
90 """ Check if the "metadata" block is present in the file
91 """
92 return super(FlacStripper, self).is_clean() and not self.mfile.pictures
93
94 def get_meta(self):
95 """ Return the content of the metadata block if present
96 """
97 metadata = super(FlacStripper, self).get_meta()
98 if self.mfile.pictures:
99 metadata['picture:'] = 'yes'
100 return metadata
diff --git a/libmat/office.py b/libmat/office.py
index c585cb6..47cd622 100644
--- a/libmat/office.py
+++ b/libmat/office.py
@@ -110,8 +110,8 @@ class PdfStripper(parser.GenericParser):
110 """ Represent a PDF file 110 """ Represent a PDF file
111 """ 111 """
112 112
113 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): 113 def __init__(self, filename, mime, backup, is_writable, **kwargs):
114 super(PdfStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) 114 super(PdfStripper, self).__init__(filename, mime, backup, is_writable, **kwargs)
115 self.uri = 'file://' + os.path.abspath(self.filename) 115 self.uri = 'file://' + os.path.abspath(self.filename)
116 self.password = None 116 self.password = None
117 try: 117 try:
diff --git a/libmat/parser.py b/libmat/parser.py
index 8e10ae9..2a82a25 100644
--- a/libmat/parser.py
+++ b/libmat/parser.py
@@ -5,8 +5,6 @@ import os
5import shutil 5import shutil
6import tempfile 6import tempfile
7 7
8import hachoir_core
9import hachoir_editor
10 8
11import mat 9import mat
12 10
@@ -24,19 +22,14 @@ FIELD = object()
24class GenericParser(object): 22class GenericParser(object):
25 """ Parent class of all parsers 23 """ Parent class of all parsers
26 """ 24 """
27 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): 25 def __init__(self, filename, mime, backup, is_writable, **kwargs):
28 self.filename = '' 26 self.filename = ''
29 self.parser = parser
30 self.mime = mime 27 self.mime = mime
31 self.backup = backup 28 self.backup = backup
32 self.is_writable = is_writable 29 self.is_writable = is_writable
33 self.editor = hachoir_editor.createEditor(parser) 30 self.filename = filename
34 try:
35 self.filename = hachoir_core.cmd_line.unicodeFilename(filename)
36 except TypeError: # get rid of "decoding Unicode is not supported"
37 self.filename = filename
38 self.basename = os.path.basename(filename) 31 self.basename = os.path.basename(filename)
39 self.output = hachoir_core.cmd_line.unicodeFilename(tempfile.mkstemp()[1]) 32 self.output = tempfile.mkstemp()[1]
40 33
41 def __del__(self): 34 def __del__(self):
42 """ Remove tempfile if it was not used 35 """ Remove tempfile if it was not used
@@ -48,74 +41,11 @@ class GenericParser(object):
48 """ 41 """
49 Check if the file is clean from harmful metadatas 42 Check if the file is clean from harmful metadatas
50 """ 43 """
51 for field in self.editor: 44 raise NotImplementedError
52 if self._should_remove(field):
53 return self._is_clean(self.editor)
54 return True
55
56 def _is_clean(self, fieldset):
57 """ Helper method of the `is_clean` one """
58 for field in fieldset:
59 remove = self._should_remove(field)
60 if remove is True:
61 return False
62 if remove is FIELD:
63 if not self._is_clean(field):
64 return False
65 return True
66 45
67 def remove_all(self): 46 def remove_all(self):
68 """ Remove all compromising fields 47 """ Remove all compromising fields
69 """ 48 """
70 state = self._remove_all(self.editor)
71 hachoir_core.field.writeIntoFile(self.editor, self.output)
72 self.do_backup()
73 return state
74
75 def _remove_all(self, fieldset):
76 """ Recursive way to handle tree metadatas
77 """
78 try:
79 for field in fieldset:
80 remove = self._should_remove(field)
81 if remove is True:
82 self._remove(fieldset, field.name)
83 if remove is FIELD:
84 self._remove_all(field)
85 return True
86 except:
87 return False
88
89 @staticmethod
90 def _remove(fieldset, field):
91 """ Delete the given field
92 """
93 del fieldset[field]
94
95 def get_meta(self):
96 """ Return a dict with all the meta of the file
97 """
98 metadata = {}
99 self._get_meta(self.editor, metadata)
100 return metadata
101
102 def _get_meta(self, fieldset, metadata):
103 """ Recursive way to handle tree metadatas
104 """
105 for field in fieldset:
106 remove = self._should_remove(field)
107 if remove:
108 try:
109 metadata[field.name] = field.value
110 except:
111 metadata[field.name] = 'harmful content'
112 if remove is FIELD:
113 self._get_meta(field, None)
114
115 def _should_remove(self, key):
116 """ Return True if the field is compromising
117 abstract method
118 """
119 raise NotImplementedError 49 raise NotImplementedError
120 50
121 def create_backup_copy(self): 51 def create_backup_copy(self):
diff --git a/libmat/strippers.py b/libmat/strippers.py
index 3aca04f..5920c41 100644
--- a/libmat/strippers.py
+++ b/libmat/strippers.py
@@ -2,8 +2,7 @@
2""" 2"""
3 3
4import archive 4import archive
5import audio 5import mutagenstripper
6import images
7import logging 6import logging
8import mat 7import mat
9import misc 8import misc
@@ -15,7 +14,6 @@ STRIPPERS = {
15 'application/x-bzip2': archive.Bzip2Stripper, 14 'application/x-bzip2': archive.Bzip2Stripper,
16 'application/x-gzip': archive.GzipStripper, 15 'application/x-gzip': archive.GzipStripper,
17 'application/zip': archive.ZipStripper, 16 'application/zip': archive.ZipStripper,
18 'audio/mpeg': audio.MpegAudioStripper,
19 'application/x-bittorrent': misc.TorrentStripper, 17 'application/x-bittorrent': misc.TorrentStripper,
20 'application/torrent': misc.TorrentStripper, 18 'application/torrent': misc.TorrentStripper,
21 'application/opendocument': office.OpenDocumentStripper, 19 'application/opendocument': office.OpenDocumentStripper,
@@ -52,11 +50,11 @@ if pdfSupport:
52# audio format support with mutagen-python 50# audio format support with mutagen-python
53try: 51try:
54 import mutagen 52 import mutagen
55 STRIPPERS['audio/x-flac'] = audio.FlacStripper 53 STRIPPERS['audio/x-flac'] = mutagenstripper.FlacStripper
56 STRIPPERS['audio/flac'] = audio.FlacStripper 54 STRIPPERS['audio/flac'] = mutagenstripper.FlacStripper
57 STRIPPERS['audio/vorbis'] = audio.OggStripper 55 STRIPPERS['audio/vorbis'] = mutagenstripper.OggStripper
58 STRIPPERS['audio/ogg'] = audio.OggStripper 56 STRIPPERS['audio/ogg'] = mutagenstripper.OggStripper
59 STRIPPERS['audio/mpeg'] = audio.MpegAudioStripper 57 STRIPPERS['audio/mpeg'] = mutagenstripper.MpegAudioStripper
60except ImportError: 58except ImportError:
61 logging.info('Unable to import python-mutagen: limited audio format support') 59 logging.info('Unable to import python-mutagen: limited audio format support')
62 60
@@ -67,7 +65,5 @@ try:
67 STRIPPERS['image/jpeg'] = exiftool.JpegStripper 65 STRIPPERS['image/jpeg'] = exiftool.JpegStripper
68 STRIPPERS['image/png'] = exiftool.PngStripper 66 STRIPPERS['image/png'] = exiftool.PngStripper
69 STRIPPERS['image/tiff'] = exiftool.TiffStripper 67 STRIPPERS['image/tiff'] = exiftool.TiffStripper
70except OSError: # if exiftool is not installed, use hachoir instead 68except OSError:
71 logging.info('Unable to find exiftool: limited images support') 69 logging.info('Unable to find exiftool: limited images support')
72 STRIPPERS['image/jpeg'] = images.JpegStripper
73 STRIPPERS['image/png'] = images.PngStripper