From c308cf7daaa4fa46377e2df0f2e9a397981e19b2 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Fri, 10 Jun 2011 01:29:29 +0200 Subject: The current version is (mostly) working --- lib/check/images/jpg.py | 17 -- lib/hachoir_editor/__init__.py | 8 - lib/hachoir_editor/field.py | 69 -------- lib/hachoir_editor/fieldset.py | 346 -------------------------------------- lib/hachoir_editor/typed_field.py | 253 ---------------------------- lib/mat.py | 104 ------------ lib/strippers.py | 3 - lib/test.py | 70 -------- 8 files changed, 870 deletions(-) delete mode 100644 lib/check/images/jpg.py delete mode 100644 lib/hachoir_editor/__init__.py delete mode 100644 lib/hachoir_editor/field.py delete mode 100644 lib/hachoir_editor/fieldset.py delete mode 100644 lib/hachoir_editor/typed_field.py delete mode 100644 lib/mat.py delete mode 100644 lib/strippers.py delete mode 100644 lib/test.py (limited to 'lib') diff --git a/lib/check/images/jpg.py b/lib/check/images/jpg.py deleted file mode 100644 index 7f29587..0000000 --- a/lib/check/images/jpg.py +++ /dev/null @@ -1,17 +0,0 @@ -import hachoir_core.error -import hachoir_core.cmd_line -import hachoir_parser -import hachoir_metadata -import sys -import mat - - -class JpegStripper(file): - def checkField(self, field): - print(field.description) - if field.name.startswith("comment"): - return True - return field.name in ("photoshop", "exif", "adobe") - return False - - diff --git a/lib/hachoir_editor/__init__.py b/lib/hachoir_editor/__init__.py deleted file mode 100644 index b106278..0000000 --- a/lib/hachoir_editor/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from hachoir_editor.field import ( - EditorError, FakeField) -from hachoir_editor.typed_field import ( - EditableField, EditableBits, EditableBytes, - EditableInteger, EditableString, - createEditableField) -from hachoir_editor.fieldset import EditableFieldSet, NewFieldSet, createEditor - diff --git a/lib/hachoir_editor/field.py b/lib/hachoir_editor/field.py deleted file mode 100644 index 6b1efe3..0000000 --- a/lib/hachoir_editor/field.py +++ /dev/null @@ -1,69 +0,0 @@ -from hachoir_core.error import HachoirError -from hachoir_core.field import joinPath, MissingField - -class EditorError(HachoirError): - pass - -class FakeField(object): - """ - This class have API looks similar to Field API, but objects don't contain - any value: all values are _computed_ by parent methods. - - Example: FakeField(editor, "abc").size calls editor._getFieldSize("abc"). - """ - is_field_set = False - - def __init__(self, parent, name): - self._parent = parent - self._name = name - - def _getPath(self): - return joinPath(self._parent.path, self._name) - path = property(_getPath) - - def _getName(self): - return self._name - name = property(_getName) - - def _getAddress(self): - return self._parent._getFieldAddress(self._name) - address = property(_getAddress) - - def _getSize(self): - return self._parent.input[self._name].size - size = property(_getSize) - - def _getValue(self): - return self._parent.input[self._name].value - value = property(_getValue) - - def createDisplay(self): - # TODO: Returns new value if field is altered - return self._parent.input[self._name].display - display = property(createDisplay) - - def _getParent(self): - return self._parent - parent = property(_getParent) - - def hasValue(self): - return self._parent.input[self._name].hasValue() - - def __getitem__(self, key): - # TODO: Implement this function! - raise MissingField(self, key) - - def _isAltered(self): - return False - is_altered = property(_isAltered) - - def writeInto(self, output): - size = self.size - addr = self._parent._getFieldInputAddress(self._name) - input = self._parent.input - stream = input.stream - if size % 8: - output.copyBitsFrom(stream, addr, size, input.endian) - else: - output.copyBytesFrom(stream, addr, size//8) - diff --git a/lib/hachoir_editor/fieldset.py b/lib/hachoir_editor/fieldset.py deleted file mode 100644 index 1669b5a..0000000 --- a/lib/hachoir_editor/fieldset.py +++ /dev/null @@ -1,346 +0,0 @@ -from hachoir_core.dict import UniqKeyError -from hachoir_core.field import MissingField, Float32, Float64, FakeArray -from hachoir_core.compatibility import any -from hachoir_core.i18n import _ -from hachoir_editor import createEditableField, EditorError -from collections import deque # Python 2.4 -import weakref # Python 2.1 -import struct - -class EditableFieldSet(object): - MAX_SIZE = (1 << 40) # Arbitrary limit to catch errors - is_field_set = True - - def __init__(self, parent, fieldset): - self._parent = parent - self.input = fieldset # original FieldSet - self._fields = {} # cache of editable fields - self._deleted = set() # Names of deleted fields - self._inserted = {} # Inserted field (name => list of field, - # where name is the name after) - - def array(self, key): - # FIXME: Use cache? - return FakeArray(self, key) - - def _getParent(self): - return self._parent - parent = property(_getParent) - - def _isAltered(self): - if self._inserted: - return True - if self._deleted: - return True - return any(field.is_altered for field in self._fields.itervalues()) - is_altered = property(_isAltered) - - def reset(self): - """ - Reset the field set and the input field set. - """ - for key, field in self._fields.iteritems(): - if not field.is_altered: - del self._fields[key] - self.input.reset() - - def __len__(self): - return len(self.input) \ - - len(self._deleted) \ - + sum( len(new) for new in self._inserted.itervalues() ) - - def __iter__(self): - for field in self.input: - name = field.name - if name in self._inserted: - for newfield in self._inserted[name]: - yield weakref.proxy(newfield) - if name not in self._deleted: - yield self[name] - if None in self._inserted: - for newfield in self._inserted[None]: - yield weakref.proxy(newfield) - - def insertBefore(self, name, *new_fields): - self._insert(name, new_fields, False) - - def insertAfter(self, name, *new_fields): - self._insert(name, new_fields, True) - - def insert(self, *new_fields): - self._insert(None, new_fields, True) - - def _insert(self, key, new_fields, next): - """ - key is the name of the field before which new_fields - will be inserted. If next is True, the fields will be inserted - _after_ this field. - """ - # Set unique field name - for field in new_fields: - if field._name.endswith("[]"): - self.input.setUniqueFieldName(field) - - # Check that there is no duplicate in inserted fields - new_names = list(field.name for field in new_fields) - names_set = set(new_names) - if len(names_set) != len(new_fields): - duplicates = (name for name in names_set if 1 < new_names.count(name)) - raise UniqKeyError(_("Duplicates in inserted fields: %s") % ", ".join(duplicates)) - - # Check that field names are not in input - if self.input: # Write special version for NewFieldSet? - for name in new_names: - if name in self.input and name not in self._deleted: - raise UniqKeyError(_("Field name '%s' already exists") % name) - - # Check that field names are not in inserted fields - for fields in self._inserted.itervalues(): - for field in fields: - if field.name in new_names: - raise UniqKeyError(_("Field name '%s' already exists") % field.name) - - # Input have already inserted field? - if key in self._inserted: - if next: - self._inserted[key].extend( reversed(new_fields) ) - else: - self._inserted[key].extendleft( reversed(new_fields) ) - return - - # Whould like to insert in inserted fields? - if key: - for fields in self._inserted.itervalues(): - names = [item.name for item in fields] - try: - pos = names.index(key) - except ValueError: - continue - if 0 <= pos: - if next: - pos += 1 - fields.rotate(-pos) - fields.extendleft( reversed(new_fields) ) - fields.rotate(pos) - return - - # Get next field. Use None if we are at the end. - if next: - index = self.input[key].index + 1 - try: - key = self.input[index].name - except IndexError: - key = None - - # Check that field names are not in input - if key not in self.input: - raise MissingField(self, key) - - # Insert in original input - self._inserted[key]= deque(new_fields) - - def _getDescription(self): - return self.input.description - description = property(_getDescription) - - def _getStream(self): - # FIXME: This property is maybe a bad idea since address may be differents - return self.input.stream - stream = property(_getStream) - - def _getName(self): - return self.input.name - name = property(_getName) - - def _getEndian(self): - return self.input.endian - endian = property(_getEndian) - - def _getAddress(self): - if self._parent: - return self._parent._getFieldAddress(self.name) - else: - return 0 - address = property(_getAddress) - - def _getAbsoluteAddress(self): - address = self.address - current = self._parent - while current: - address += current.address - current = current._parent - return address - absolute_address = property(_getAbsoluteAddress) - - def hasValue(self): - return False -# return self._parent.input[self.name].hasValue() - - def _getSize(self): - if self.is_altered: - return sum(field.size for field in self) - else: - return self.input.size - size = property(_getSize) - - def _getPath(self): - return self.input.path - path = property(_getPath) - - def _getOriginalField(self, name): - assert name in self.input - return self.input[name] - - def _getFieldInputAddress(self, name): - """ - Absolute address of a field from the input field set. - """ - assert name in self.input - return self.input[name].absolute_address - - def _getFieldAddress(self, name): - """ - Compute relative address of a field. The operation takes care of - deleted and resized fields. - """ - #assert name not in self._deleted - addr = 0 - for field in self: - if field.name == name: - return addr - addr += field.size - raise MissingField(self, name) - - def _getItemByPath(self, path): - if not path[0]: - path = path[1:] - field = self - for name in path: - field = field[name] - return field - - def __contains__(self, name): - try: - field = self[name] - return (field is not None) - except MissingField: - return False - - def __getitem__(self, key): - """ - Create a weak reference to an editable field (EditableField) for the - field with specified name. If the field is removed later, using the - editable field will raise a weakref.ReferenceError exception. - - May raise a MissingField error if the field doesn't exist in original - field set or it has been deleted. - """ - if "/" in key: - return self._getItemByPath(key.split("/")) - if isinstance(key, (int, long)): - raise EditorError("Integer index are not supported") - - if (key in self._deleted) or (key not in self.input): - raise MissingField(self, key) - if key not in self._fields: - field = self.input[key] - if field.is_field_set: - self._fields[key] = createEditableFieldSet(self, field) - else: - self._fields[key] = createEditableField(self, field) - return weakref.proxy(self._fields[key]) - - def __delitem__(self, name): - """ - Remove a field from the field set. May raise an MissingField exception - if the field has already been deleted. - """ - if name in self._deleted: - raise MissingField(self, name) - self._deleted.add(name) - if name in self._fields: - del self._fields[name] - - def writeInto(self, output): - """ - Write the content if this field set into the output stream - (OutputStream). - """ - if not self.is_altered: - # Not altered: just copy bits/bytes - input = self.input - if input.size % 8: - output.copyBitsFrom(input.stream, - input.absolute_address, input.size, input.endian) - else: - output.copyBytesFrom(input.stream, - input.absolute_address, input.size//8) - else: - # Altered: call writeInto() method of each field - realaddr = 0 - for field in self: - field.writeInto(output) - realaddr += field.size - - def _getValue(self): - raise EditorError('Field set "%s" has no value' % self.path) - def _setValue(self, value): - raise EditorError('Field set "%s" value is read only' % self.path) - value = property(_getValue, _setValue, "Value of field") - -class EditableFloat(EditableFieldSet): - _value = None - - def _isAltered(self): - return (self._value is not None) - is_altered = property(_isAltered) - - def writeInto(self, output): - if self._value is not None: - self._write(output) - else: - EditableFieldSet.writeInto(self, output) - - def _write(self, output): - format = self.input.struct_format - raw = struct.pack(format, self._value) - output.writeBytes(raw) - - def _setValue(self, value): - self.parent._is_altered = True - self._value = value - value = property(EditableFieldSet._getValue, _setValue) - -def createEditableFieldSet(parent, field): - cls = field.__class__ - # FIXME: Support Float80 - if cls in (Float32, Float64): - return EditableFloat(parent, field) - else: - return EditableFieldSet(parent, field) - -class NewFieldSet(EditableFieldSet): - def __init__(self, parent, name): - EditableFieldSet.__init__(self, parent, None) - self._name = name - self._endian = parent.endian - - def __iter__(self): - if None in self._inserted: - return iter(self._inserted[None]) - else: - raise StopIteration() - - def _getName(self): - return self._name - name = property(_getName) - - def _getEndian(self): - return self._endian - endian = property(_getEndian) - - is_altered = property(lambda self: True) - -def createEditor(fieldset): - return EditableFieldSet(None, fieldset) - diff --git a/lib/hachoir_editor/typed_field.py b/lib/hachoir_editor/typed_field.py deleted file mode 100644 index 4abc989..0000000 --- a/lib/hachoir_editor/typed_field.py +++ /dev/null @@ -1,253 +0,0 @@ -from hachoir_core.field import ( - RawBits, Bit, Bits, PaddingBits, - RawBytes, Bytes, PaddingBytes, - GenericString, Character, - isInteger, isString) -from hachoir_editor import FakeField - -class EditableField(FakeField): - """ - Pure virtual class used to write editable field class. - """ - - _is_altered = False - def __init__(self, parent, name, value=None): - FakeField.__init__(self, parent, name) - self._value = value - - def _isAltered(self): - return self._is_altered - is_altered = property(_isAltered) - - def hasValue(self): - return True - - def _computeSize(self): - raise NotImplementedError() - def _getValue(self): - return self._value - def _setValue(self, value): - self._value = value - - def _propGetValue(self): - if self._value is not None: - return self._getValue() - else: - return FakeField._getValue(self) - def _propSetValue(self, value): - self._setValue(value) - self._is_altered = True - value = property(_propGetValue, _propSetValue) - - def _getSize(self): - if self._value is not None: - return self._computeSize() - else: - return FakeField._getSize(self) - size = property(_getSize) - - def _write(self, output): - raise NotImplementedError() - - def writeInto(self, output): - if self._is_altered: - self._write(output) - else: - return FakeField.writeInto(self, output) - -class EditableFixedField(EditableField): - """ - Editable field with fixed size. - """ - - def __init__(self, parent, name, value=None, size=None): - EditableField.__init__(self, parent, name, value) - if size is not None: - self._size = size - else: - self._size = self._parent._getOriginalField(self._name).size - - def _getSize(self): - return self._size - size = property(_getSize) - -class EditableBits(EditableFixedField): - def __init__(self, parent, name, *args): - if args: - if len(args) != 2: - raise TypeError( - "Wrong argument count, EditableBits constructor prototype is: " - "(parent, name, [size, value])") - size = args[0] - value = args[1] - assert isinstance(value, (int, long)) - else: - size = None - value = None - EditableFixedField.__init__(self, parent, name, value, size) - if args: - self._setValue(args[1]) - self._is_altered = True - - def _setValue(self, value): - if not(0 <= value < (1 << self._size)): - raise ValueError("Invalid value, must be in range %s..%s" - % (0, (1 << self._size) - 1)) - self._value = value - - def _write(self, output): - output.writeBits(self._size, self._value, self._parent.endian) - -class EditableBytes(EditableField): - def _setValue(self, value): - if not value: raise ValueError( - "Unable to set empty string to a EditableBytes field") - self._value = value - - def _computeSize(self): - return len(self._value) * 8 - - def _write(self, output): - output.writeBytes(self._value) - -class EditableString(EditableField): - MAX_SIZE = { - "Pascal8": (1 << 8)-1, - "Pascal16": (1 << 16)-1, - "Pascal32": (1 << 32)-1, - } - - def __init__(self, parent, name, *args, **kw): - if len(args) == 2: - value = args[1] - assert isinstance(value, str) # TODO: support Unicode - elif not args: - value = None - else: - raise TypeError( - "Wrong argument count, EditableString constructor prototype is:" - "(parent, name, [format, value])") - EditableField.__init__(self, parent, name, value) - if len(args) == 2: - self._charset = kw.get('charset', None) - self._format = args[0] - if self._format in GenericString.PASCAL_FORMATS: - self._prefix_size = GenericString.PASCAL_FORMATS[self._format] - else: - self._prefix_size = 0 - self._suffix_str = GenericString.staticSuffixStr( - self._format, self._charset, self._parent.endian) - self._is_altered = True - else: - orig = self._parent._getOriginalField(name) - self._charset = orig.charset - self._format = orig.format - self._prefix_size = orig.content_offset - self._suffix_str = orig.suffix_str - - def _setValue(self, value): - size = len(value) - if self._format in self.MAX_SIZE and self.MAX_SIZE[self._format] < size: - raise ValueError("String is too big") - self._value = value - - def _computeSize(self): - return (self._prefix_size + len(self._value) + len(self._suffix_str))*8 - - def _write(self, output): - if self._format in GenericString.SUFFIX_FORMAT: - output.writeBytes(self._value) - output.writeBytes(self._suffix_str) - elif self._format == "fixed": - output.writeBytes(self._value) - else: - assert self._format in GenericString.PASCAL_FORMATS - size = GenericString.PASCAL_FORMATS[self._format] - output.writeInteger(len(self._value), False, size, self._parent.endian) - output.writeBytes(self._value) - -class EditableCharacter(EditableFixedField): - def __init__(self, parent, name, *args): - if args: - if len(args) != 3: - raise TypeError( - "Wrong argument count, EditableCharacter " - "constructor prototype is: (parent, name, [value])") - value = args[0] - if not isinstance(value, str) or len(value) != 1: - raise TypeError("EditableCharacter needs a character") - else: - value = None - EditableFixedField.__init__(self, parent, name, value, 8) - if args: - self._is_altered = True - - def _setValue(self, value): - if not isinstance(value, str) or len(value) != 1: - raise TypeError("EditableCharacter needs a character") - self._value = value - - def _write(self, output): - output.writeBytes(self._value) - -class EditableInteger(EditableFixedField): - VALID_VALUE_SIGNED = { - 8: (-(1 << 8), (1 << 8)-1), - 16: (-(1 << 15), (1 << 15)-1), - 32: (-(1 << 31), (1 << 31)-1), - } - VALID_VALUE_UNSIGNED = { - 8: (0, (1 << 8)-1), - 16: (0, (1 << 16)-1), - 32: (0, (1 << 32)-1) - } - - def __init__(self, parent, name, *args): - if args: - if len(args) != 3: - raise TypeError( - "Wrong argument count, EditableInteger constructor prototype is: " - "(parent, name, [signed, size, value])") - size = args[1] - value = args[2] - assert isinstance(value, (int, long)) - else: - size = None - value = None - EditableFixedField.__init__(self, parent, name, value, size) - if args: - self._signed = args[0] - self._is_altered = True - else: - self._signed = self._parent._getOriginalField(self._name).signed - - def _setValue(self, value): - if self._signed: - valid = self.VALID_VALUE_SIGNED - else: - valid = self.VALID_VALUE_UNSIGNED - minval, maxval = valid[self._size] - if not(minval <= value <= maxval): - raise ValueError("Invalid value, must be in range %s..%s" - % (minval, maxval)) - self._value = value - - def _write(self, output): - output.writeInteger( - self.value, self._signed, self._size//8, self._parent.endian) - -def createEditableField(fieldset, field): - if isInteger(field): - cls = EditableInteger - elif isString(field): - cls = EditableString - elif field.__class__ in (RawBytes, Bytes, PaddingBytes): - cls = EditableBytes - elif field.__class__ in (RawBits, Bits, Bit, PaddingBits): - cls = EditableBits - elif field.__class__ == Character: - cls = EditableCharacter - else: - cls = FakeField - return cls(fieldset, field.name) - diff --git a/lib/mat.py b/lib/mat.py deleted file mode 100644 index d22c9ab..0000000 --- a/lib/mat.py +++ /dev/null @@ -1,104 +0,0 @@ -import hachoir_core.error -import hachoir_core.cmd_line -import hachoir_parser -import hachoir_metadata - -from strippers import * - -from hachoir_editor import (createEditor, - NewFieldSet, EditableInteger, EditableBytes) - -import hachoir_editor - -import sys - -__version__ = "0.1" -__author__ = "jvoisin" - - -class file(): - def __init__(self, filename): - self.metadata = {} - self.clean = False - self.editor = createEditor(self.parser) - self.filename = filename - self.filename, self.realname = hachoir_core.cmd_line.unicodeFilename( - self.filename), self.filename - self.parser = hachoir_parser.createParser(self.filename, self.realname) - - if not self.parser: - print("Unable to parse file : sorry") - sys.exit(1) - - try: - self.meta = hachoir_metadata.extractMetadata(self.parser) - except hachoir_core.error.HachoirError, err: - print "Metadata extraction error: %s" % unicode(err) - self.data = None - - if not self.meta: - print "Unable to extract metadata" - sys.exit(1) - - def is_clean(self): - ''' - Return true if the file is clean from any compromizing meta - ''' - return self.clean - - def remove_all(self): - ''' - Remove all the files that are compromizing - ''' - stripEditor(self.editor, self.realname, level, not(values.quiet)) - for key, field in metadata: - if should_remove(key): - remove(self, key) - - def remove(self, field): - ''' - Remove the given file - ''' - del editor[field] - return True - - - def get_meta(self): - '''return a dict with all the meta of the file''' - #FIXME : sooooooooooo dirty ! - for title in self.meta: - if title.values != []: #if the field is not empty - value = "" - for item in title.values: - value = item.text - self.metadata[title.key] = value - return self.metadata - - def should_remove(self, field): - ''' - return True if the field is compromizing - abstract method - ''' - raise NotImplementedError() - -def stripEditor(editor, filename, realname, level, verbose): - ''' - Assign a stripper to an editor - ''' - cls = editor.input.__class__ - try: - stripper_cls = strippers[cls] - except KeyError: - print "Don't have stripper for file type: %s" % editor.description - return False - stripper = stripper_cls(editor, level, verbose) - - if stripper(): - output = FileOutputStream(filename, realname) - editor.writeInto(output) - - else: - print _("Stripper doesn't touch the file") - return True - -file(sys.argv[1]).get_meta() diff --git a/lib/strippers.py b/lib/strippers.py deleted file mode 100644 index 70d0fc7..0000000 --- a/lib/strippers.py +++ /dev/null @@ -1,3 +0,0 @@ -strippers = { - JpegFile: JpegStripper, -} diff --git a/lib/test.py b/lib/test.py deleted file mode 100644 index b1ff2a3..0000000 --- a/lib/test.py +++ /dev/null @@ -1,70 +0,0 @@ -import mat -import unittest -import shutil -import glob -import tempfile - -FILE_LIST = zip(glob.glob('clean*'), glob.glob('dirty*')) - -class MATTest(unittest.TestCase): - def setUp(self): - '''create working copy of the clean and the dirty file in the TMP dir''' - self.file_list = [] - self.tmpdir = tempfile.mkdtemp() - - for clean, dirty in FILE_LIST: - shutil.copy2(clean, self.tmpdir + clean) - shutil.copy2(dirty, self.tmpdir + dirty) - self.file_list.append((self.tmpdir + clean, self.tmpdir + dirty)) - - def tearDown(self): - '''Remove the tmp folder''' - shutil.rmtree(self.tmpdir) - -class Test_Remove(MATTest): - def test_remove(self): - '''make sure that the lib remove all compromizing meta''' - for clean, dirty in self.file_list: - mat.file(dirty).remove_all() - self.assertTrue(mat.file(dirty).is_clean()) - - def test_remove_empty(self): - '''Test removal with clean files''' - for clean, dirty in self.file_list: - mat.file(clean).remove_all() - self.assertTrue(mat.file(clean).is_clean()) - - -class Test_List(MATTest): - def test_list(self): - '''check if get_meta returns all the expected meta''' - for clean, dirty in self.file_list: - meta_list = dict() #FIXME - self.assertDictEqual(mat.file(dirty).get_meta(), meta_list) - - def testlist_list_empty(self): - '''check that a listing of a clean file return an empty dict''' - for clean, dirty in self.file_list: - self.assertEqual(mat.file(clean).get_meta(), None) - - -class Test_isClean(MATTest): - def test_clean(self): - '''test is_clean on clean files''' - for clean, dirty in self.file_list: - print "e" - self.assertTrue(mat.file(clean).is_clean()) - - def test_clean(self): - '''test is_clean on dirty files''' - for clean, dirty in self.file_list: - self.assertFalse(mat.file(dirty).is_clean()) - - -if __name__ == '__main__': - suite = unittest.TestSuite() - suite.addTest(unittest.makeSuite(Test_Remove)) - suite.addTest(unittest.makeSuite(Test_List)) - suite.addTest(unittest.makeSuite(Test_isClean)) - unittest.TextTestRunner(verbosity=2).run(suite) - -- cgit v1.3