From c308cf7daaa4fa46377e2df0f2e9a397981e19b2 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Fri, 10 Jun 2011 01:29:29 +0200 Subject: The current version is (mostly) working --- README | 0 clean.jpg | Bin 0 -> 83261 bytes cli.py | 77 ++++++++- clitest.py | 54 ++++++ dirty.jpg | Bin 0 -> 83261 bytes hachoir_editor/__init__.py | 8 + hachoir_editor/__init__.pyc | Bin 0 -> 603 bytes hachoir_editor/field.py | 69 ++++++++ hachoir_editor/field.pyc | Bin 0 -> 3690 bytes hachoir_editor/fieldset.py | 346 ++++++++++++++++++++++++++++++++++++++ hachoir_editor/fieldset.pyc | Bin 0 -> 14654 bytes hachoir_editor/typed_field.py | 253 ++++++++++++++++++++++++++++ hachoir_editor/typed_field.pyc | Bin 0 -> 11745 bytes lib/check/images/jpg.py | 17 -- lib/hachoir_editor/__init__.py | 8 - lib/hachoir_editor/field.py | 69 -------- lib/hachoir_editor/fieldset.py | 346 -------------------------------------- lib/hachoir_editor/typed_field.py | 253 ---------------------------- lib/mat.py | 104 ------------ lib/strippers.py | 3 - lib/test.py | 70 -------- libtest.py | 56 ++++++ mat.py | 117 +++++++++++++ test.py | 29 ++++ 24 files changed, 1008 insertions(+), 871 deletions(-) delete mode 100644 README create mode 100644 clean.jpg create mode 100644 clitest.py create mode 100644 dirty.jpg create mode 100644 hachoir_editor/__init__.py create mode 100644 hachoir_editor/__init__.pyc create mode 100644 hachoir_editor/field.py create mode 100644 hachoir_editor/field.pyc create mode 100644 hachoir_editor/fieldset.py create mode 100644 hachoir_editor/fieldset.pyc create mode 100644 hachoir_editor/typed_field.py create mode 100644 hachoir_editor/typed_field.pyc delete mode 100644 lib/check/images/jpg.py delete mode 100644 lib/hachoir_editor/__init__.py delete mode 100644 lib/hachoir_editor/field.py delete mode 100644 lib/hachoir_editor/fieldset.py delete mode 100644 lib/hachoir_editor/typed_field.py delete mode 100644 lib/mat.py delete mode 100644 lib/strippers.py delete mode 100644 lib/test.py create mode 100644 libtest.py create mode 100644 mat.py create mode 100644 test.py diff --git a/README b/README deleted file mode 100644 index e69de29..0000000 diff --git a/clean.jpg b/clean.jpg new file mode 100644 index 0000000..09c9161 Binary files /dev/null and b/clean.jpg differ diff --git a/cli.py b/cli.py index d249917..fedb40c 100644 --- a/cli.py +++ b/cli.py @@ -3,7 +3,82 @@ Metadata anonymisation toolkit """ -import lib import sys +import mat +import argparse +__version__ = "0.1" +def parsing(): + ''' + Parse the arguments, + and returns a dict + ''' + parser = argparse.ArgumentParser(version=__version__, + description="Metadata Anonymisation Toolkit - CLI %s" % __version__) + + #list and check clean are mutually exclusives + group = parser.add_mutually_exclusive_group() + + #list meta + group.add_argument('--print-meta', '-p', action="store_true", default=False, + dest='just_list', help='List all the meta of a file,\ + without removing them') + + #check if the file is clean + group.add_argument('--check-clean', '-c', action="store_true", + default=False, dest='just_check', + help='Check if a file is clean of harmfull metadatas') + + #list of files to process + parser.add_argument('filelist', action="store", type=str, nargs="+", + metavar='file', help='File(s) to process') + + return parser.parse_args() + +def list_meta(class_file, filename): + ''' + Print all the meta of "filename" on stdout + ''' + print("[+] File %s :" % filename) + for key, item in class_file.get_meta().iteritems(): + print("\t%s : %s" % (key, item) ) + +def is_clean(class_file, filename): + ''' + Say if "filename" is clean or not + ''' + if class_file.is_clean(): + print("[+] %s is clean" % filename) + else: + print("[+] %s is not clean" % filename) + +def clean_meta(class_file, filename): + ''' + Clean the file "filename" + ''' + print("[+] Cleaning %s" % filename) + if class_file.is_clean(): + print("%s is already clean" % filename) + else: + class_file.remove_all() + print("%s cleaned !" % filename) + +def main(): + args = parsing() + + #func receive the function correponding to the options given as parameters + if args.just_list is True: #only print metadatas + func = list_meta + elif args.just_check is True: #only check if the file is clean + func = is_clean + else: #clean the file + func = clean_meta + + for filename in args.filelist: + class_file = mat.create_class_file(filename) + func(class_file, filename) + print("\n") + +if __name__ == '__main__': + main() diff --git a/clitest.py b/clitest.py new file mode 100644 index 0000000..00955ae --- /dev/null +++ b/clitest.py @@ -0,0 +1,54 @@ +import cli +import unittest +import test + +import shlex +import subprocess + +class Test_Remove_cli(test.MATTest): + def test_remove(self): + '''make sure that the cli remove all compromizing meta''' + for clean, dirty in self.file_list: + subprocess.call("cli.py %s" dirty) + self.assertTrue(mat.file(dirty).is_clean()) + + def test_remove_empty(self): + '''Test removal with clean files''' + for clean, dirty in self.file_list: + subprocess.call("cli.py %s" clean) + self.assertTrue(mat.file(dirty).is_clean()) + + +class Test_List_cli(test.MATTest): + def test_list(self): + '''check if get_meta returns all the expected meta''' + for clean, dirty in self.file_list: + meta_list = dict("fixme":"please",) #FIXME + self.assertDictEqual(mat.file(dirty).get_meta(), meta_list) + + def testlist_list_empty(self): + '''check that a listing of a clean file return an empty dict''' + for clean, dirty in self.file_list: + self.assertEqual(mat.file(clean).get_meta(), None) + + +class Test_isClean_cli(test.MATTest): + def test_clean(self): + '''test is_clean on clean files''' + for clean, dirty in self.file_list: + print "e" + self.assertTrue(mat.file(clean).is_clean()) + + def test_clean(self): + '''test is_clean on dirty files''' + for clean, dirty in self.file_list: + self.assertFalse(mat.file(dirty).is_clean()) + + +if __name__ == '__main__': + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(Test_Remove)) + suite.addTest(unittest.makeSuite(Test_List)) + suite.addTest(unittest.makeSuite(Test_isClean)) + unittest.TextTestRunner(verbosity=2).run(suite) + diff --git a/dirty.jpg b/dirty.jpg new file mode 100644 index 0000000..09c9161 Binary files /dev/null and b/dirty.jpg differ diff --git a/hachoir_editor/__init__.py b/hachoir_editor/__init__.py new file mode 100644 index 0000000..b106278 --- /dev/null +++ b/hachoir_editor/__init__.py @@ -0,0 +1,8 @@ +from hachoir_editor.field import ( + EditorError, FakeField) +from hachoir_editor.typed_field import ( + EditableField, EditableBits, EditableBytes, + EditableInteger, EditableString, + createEditableField) +from hachoir_editor.fieldset import EditableFieldSet, NewFieldSet, createEditor + diff --git a/hachoir_editor/__init__.pyc b/hachoir_editor/__init__.pyc new file mode 100644 index 0000000..23f4a8e Binary files /dev/null and b/hachoir_editor/__init__.pyc differ diff --git a/hachoir_editor/field.py b/hachoir_editor/field.py new file mode 100644 index 0000000..6b1efe3 --- /dev/null +++ b/hachoir_editor/field.py @@ -0,0 +1,69 @@ +from hachoir_core.error import HachoirError +from hachoir_core.field import joinPath, MissingField + +class EditorError(HachoirError): + pass + +class FakeField(object): + """ + This class have API looks similar to Field API, but objects don't contain + any value: all values are _computed_ by parent methods. + + Example: FakeField(editor, "abc").size calls editor._getFieldSize("abc"). + """ + is_field_set = False + + def __init__(self, parent, name): + self._parent = parent + self._name = name + + def _getPath(self): + return joinPath(self._parent.path, self._name) + path = property(_getPath) + + def _getName(self): + return self._name + name = property(_getName) + + def _getAddress(self): + return self._parent._getFieldAddress(self._name) + address = property(_getAddress) + + def _getSize(self): + return self._parent.input[self._name].size + size = property(_getSize) + + def _getValue(self): + return self._parent.input[self._name].value + value = property(_getValue) + + def createDisplay(self): + # TODO: Returns new value if field is altered + return self._parent.input[self._name].display + display = property(createDisplay) + + def _getParent(self): + return self._parent + parent = property(_getParent) + + def hasValue(self): + return self._parent.input[self._name].hasValue() + + def __getitem__(self, key): + # TODO: Implement this function! + raise MissingField(self, key) + + def _isAltered(self): + return False + is_altered = property(_isAltered) + + def writeInto(self, output): + size = self.size + addr = self._parent._getFieldInputAddress(self._name) + input = self._parent.input + stream = input.stream + if size % 8: + output.copyBitsFrom(stream, addr, size, input.endian) + else: + output.copyBytesFrom(stream, addr, size//8) + diff --git a/hachoir_editor/field.pyc b/hachoir_editor/field.pyc new file mode 100644 index 0000000..ff7b91d Binary files /dev/null and b/hachoir_editor/field.pyc differ diff --git a/hachoir_editor/fieldset.py b/hachoir_editor/fieldset.py new file mode 100644 index 0000000..1669b5a --- /dev/null +++ b/hachoir_editor/fieldset.py @@ -0,0 +1,346 @@ +from hachoir_core.dict import UniqKeyError +from hachoir_core.field import MissingField, Float32, Float64, FakeArray +from hachoir_core.compatibility import any +from hachoir_core.i18n import _ +from hachoir_editor import createEditableField, EditorError +from collections import deque # Python 2.4 +import weakref # Python 2.1 +import struct + +class EditableFieldSet(object): + MAX_SIZE = (1 << 40) # Arbitrary limit to catch errors + is_field_set = True + + def __init__(self, parent, fieldset): + self._parent = parent + self.input = fieldset # original FieldSet + self._fields = {} # cache of editable fields + self._deleted = set() # Names of deleted fields + self._inserted = {} # Inserted field (name => list of field, + # where name is the name after) + + def array(self, key): + # FIXME: Use cache? + return FakeArray(self, key) + + def _getParent(self): + return self._parent + parent = property(_getParent) + + def _isAltered(self): + if self._inserted: + return True + if self._deleted: + return True + return any(field.is_altered for field in self._fields.itervalues()) + is_altered = property(_isAltered) + + def reset(self): + """ + Reset the field set and the input field set. + """ + for key, field in self._fields.iteritems(): + if not field.is_altered: + del self._fields[key] + self.input.reset() + + def __len__(self): + return len(self.input) \ + - len(self._deleted) \ + + sum( len(new) for new in self._inserted.itervalues() ) + + def __iter__(self): + for field in self.input: + name = field.name + if name in self._inserted: + for newfield in self._inserted[name]: + yield weakref.proxy(newfield) + if name not in self._deleted: + yield self[name] + if None in self._inserted: + for newfield in self._inserted[None]: + yield weakref.proxy(newfield) + + def insertBefore(self, name, *new_fields): + self._insert(name, new_fields, False) + + def insertAfter(self, name, *new_fields): + self._insert(name, new_fields, True) + + def insert(self, *new_fields): + self._insert(None, new_fields, True) + + def _insert(self, key, new_fields, next): + """ + key is the name of the field before which new_fields + will be inserted. If next is True, the fields will be inserted + _after_ this field. + """ + # Set unique field name + for field in new_fields: + if field._name.endswith("[]"): + self.input.setUniqueFieldName(field) + + # Check that there is no duplicate in inserted fields + new_names = list(field.name for field in new_fields) + names_set = set(new_names) + if len(names_set) != len(new_fields): + duplicates = (name for name in names_set if 1 < new_names.count(name)) + raise UniqKeyError(_("Duplicates in inserted fields: %s") % ", ".join(duplicates)) + + # Check that field names are not in input + if self.input: # Write special version for NewFieldSet? + for name in new_names: + if name in self.input and name not in self._deleted: + raise UniqKeyError(_("Field name '%s' already exists") % name) + + # Check that field names are not in inserted fields + for fields in self._inserted.itervalues(): + for field in fields: + if field.name in new_names: + raise UniqKeyError(_("Field name '%s' already exists") % field.name) + + # Input have already inserted field? + if key in self._inserted: + if next: + self._inserted[key].extend( reversed(new_fields) ) + else: + self._inserted[key].extendleft( reversed(new_fields) ) + return + + # Whould like to insert in inserted fields? + if key: + for fields in self._inserted.itervalues(): + names = [item.name for item in fields] + try: + pos = names.index(key) + except ValueError: + continue + if 0 <= pos: + if next: + pos += 1 + fields.rotate(-pos) + fields.extendleft( reversed(new_fields) ) + fields.rotate(pos) + return + + # Get next field. Use None if we are at the end. + if next: + index = self.input[key].index + 1 + try: + key = self.input[index].name + except IndexError: + key = None + + # Check that field names are not in input + if key not in self.input: + raise MissingField(self, key) + + # Insert in original input + self._inserted[key]= deque(new_fields) + + def _getDescription(self): + return self.input.description + description = property(_getDescription) + + def _getStream(self): + # FIXME: This property is maybe a bad idea since address may be differents + return self.input.stream + stream = property(_getStream) + + def _getName(self): + return self.input.name + name = property(_getName) + + def _getEndian(self): + return self.input.endian + endian = property(_getEndian) + + def _getAddress(self): + if self._parent: + return self._parent._getFieldAddress(self.name) + else: + return 0 + address = property(_getAddress) + + def _getAbsoluteAddress(self): + address = self.address + current = self._parent + while current: + address += current.address + current = current._parent + return address + absolute_address = property(_getAbsoluteAddress) + + def hasValue(self): + return False +# return self._parent.input[self.name].hasValue() + + def _getSize(self): + if self.is_altered: + return sum(field.size for field in self) + else: + return self.input.size + size = property(_getSize) + + def _getPath(self): + return self.input.path + path = property(_getPath) + + def _getOriginalField(self, name): + assert name in self.input + return self.input[name] + + def _getFieldInputAddress(self, name): + """ + Absolute address of a field from the input field set. + """ + assert name in self.input + return self.input[name].absolute_address + + def _getFieldAddress(self, name): + """ + Compute relative address of a field. The operation takes care of + deleted and resized fields. + """ + #assert name not in self._deleted + addr = 0 + for field in self: + if field.name == name: + return addr + addr += field.size + raise MissingField(self, name) + + def _getItemByPath(self, path): + if not path[0]: + path = path[1:] + field = self + for name in path: + field = field[name] + return field + + def __contains__(self, name): + try: + field = self[name] + return (field is not None) + except MissingField: + return False + + def __getitem__(self, key): + """ + Create a weak reference to an editable field (EditableField) for the + field with specified name. If the field is removed later, using the + editable field will raise a weakref.ReferenceError exception. + + May raise a MissingField error if the field doesn't exist in original + field set or it has been deleted. + """ + if "/" in key: + return self._getItemByPath(key.split("/")) + if isinstance(key, (int, long)): + raise EditorError("Integer index are not supported") + + if (key in self._deleted) or (key not in self.input): + raise MissingField(self, key) + if key not in self._fields: + field = self.input[key] + if field.is_field_set: + self._fields[key] = createEditableFieldSet(self, field) + else: + self._fields[key] = createEditableField(self, field) + return weakref.proxy(self._fields[key]) + + def __delitem__(self, name): + """ + Remove a field from the field set. May raise an MissingField exception + if the field has already been deleted. + """ + if name in self._deleted: + raise MissingField(self, name) + self._deleted.add(name) + if name in self._fields: + del self._fields[name] + + def writeInto(self, output): + """ + Write the content if this field set into the output stream + (OutputStream). + """ + if not self.is_altered: + # Not altered: just copy bits/bytes + input = self.input + if input.size % 8: + output.copyBitsFrom(input.stream, + input.absolute_address, input.size, input.endian) + else: + output.copyBytesFrom(input.stream, + input.absolute_address, input.size//8) + else: + # Altered: call writeInto() method of each field + realaddr = 0 + for field in self: + field.writeInto(output) + realaddr += field.size + + def _getValue(self): + raise EditorError('Field set "%s" has no value' % self.path) + def _setValue(self, value): + raise EditorError('Field set "%s" value is read only' % self.path) + value = property(_getValue, _setValue, "Value of field") + +class EditableFloat(EditableFieldSet): + _value = None + + def _isAltered(self): + return (self._value is not None) + is_altered = property(_isAltered) + + def writeInto(self, output): + if self._value is not None: + self._write(output) + else: + EditableFieldSet.writeInto(self, output) + + def _write(self, output): + format = self.input.struct_format + raw = struct.pack(format, self._value) + output.writeBytes(raw) + + def _setValue(self, value): + self.parent._is_altered = True + self._value = value + value = property(EditableFieldSet._getValue, _setValue) + +def createEditableFieldSet(parent, field): + cls = field.__class__ + # FIXME: Support Float80 + if cls in (Float32, Float64): + return EditableFloat(parent, field) + else: + return EditableFieldSet(parent, field) + +class NewFieldSet(EditableFieldSet): + def __init__(self, parent, name): + EditableFieldSet.__init__(self, parent, None) + self._name = name + self._endian = parent.endian + + def __iter__(self): + if None in self._inserted: + return iter(self._inserted[None]) + else: + raise StopIteration() + + def _getName(self): + return self._name + name = property(_getName) + + def _getEndian(self): + return self._endian + endian = property(_getEndian) + + is_altered = property(lambda self: True) + +def createEditor(fieldset): + return EditableFieldSet(None, fieldset) + diff --git a/hachoir_editor/fieldset.pyc b/hachoir_editor/fieldset.pyc new file mode 100644 index 0000000..f4e37fa Binary files /dev/null and b/hachoir_editor/fieldset.pyc differ diff --git a/hachoir_editor/typed_field.py b/hachoir_editor/typed_field.py new file mode 100644 index 0000000..4abc989 --- /dev/null +++ b/hachoir_editor/typed_field.py @@ -0,0 +1,253 @@ +from hachoir_core.field import ( + RawBits, Bit, Bits, PaddingBits, + RawBytes, Bytes, PaddingBytes, + GenericString, Character, + isInteger, isString) +from hachoir_editor import FakeField + +class EditableField(FakeField): + """ + Pure virtual class used to write editable field class. + """ + + _is_altered = False + def __init__(self, parent, name, value=None): + FakeField.__init__(self, parent, name) + self._value = value + + def _isAltered(self): + return self._is_altered + is_altered = property(_isAltered) + + def hasValue(self): + return True + + def _computeSize(self): + raise NotImplementedError() + def _getValue(self): + return self._value + def _setValue(self, value): + self._value = value + + def _propGetValue(self): + if self._value is not None: + return self._getValue() + else: + return FakeField._getValue(self) + def _propSetValue(self, value): + self._setValue(value) + self._is_altered = True + value = property(_propGetValue, _propSetValue) + + def _getSize(self): + if self._value is not None: + return self._computeSize() + else: + return FakeField._getSize(self) + size = property(_getSize) + + def _write(self, output): + raise NotImplementedError() + + def writeInto(self, output): + if self._is_altered: + self._write(output) + else: + return FakeField.writeInto(self, output) + +class EditableFixedField(EditableField): + """ + Editable field with fixed size. + """ + + def __init__(self, parent, name, value=None, size=None): + EditableField.__init__(self, parent, name, value) + if size is not None: + self._size = size + else: + self._size = self._parent._getOriginalField(self._name).size + + def _getSize(self): + return self._size + size = property(_getSize) + +class EditableBits(EditableFixedField): + def __init__(self, parent, name, *args): + if args: + if len(args) != 2: + raise TypeError( + "Wrong argument count, EditableBits constructor prototype is: " + "(parent, name, [size, value])") + size = args[0] + value = args[1] + assert isinstance(value, (int, long)) + else: + size = None + value = None + EditableFixedField.__init__(self, parent, name, value, size) + if args: + self._setValue(args[1]) + self._is_altered = True + + def _setValue(self, value): + if not(0 <= value < (1 << self._size)): + raise ValueError("Invalid value, must be in range %s..%s" + % (0, (1 << self._size) - 1)) + self._value = value + + def _write(self, output): + output.writeBits(self._size, self._value, self._parent.endian) + +class EditableBytes(EditableField): + def _setValue(self, value): + if not value: raise ValueError( + "Unable to set empty string to a EditableBytes field") + self._value = value + + def _computeSize(self): + return len(self._value) * 8 + + def _write(self, output): + output.writeBytes(self._value) + +class EditableString(EditableField): + MAX_SIZE = { + "Pascal8": (1 << 8)-1, + "Pascal16": (1 << 16)-1, + "Pascal32": (1 << 32)-1, + } + + def __init__(self, parent, name, *args, **kw): + if len(args) == 2: + value = args[1] + assert isinstance(value, str) # TODO: support Unicode + elif not args: + value = None + else: + raise TypeError( + "Wrong argument count, EditableString constructor prototype is:" + "(parent, name, [format, value])") + EditableField.__init__(self, parent, name, value) + if len(args) == 2: + self._charset = kw.get('charset', None) + self._format = args[0] + if self._format in GenericString.PASCAL_FORMATS: + self._prefix_size = GenericString.PASCAL_FORMATS[self._format] + else: + self._prefix_size = 0 + self._suffix_str = GenericString.staticSuffixStr( + self._format, self._charset, self._parent.endian) + self._is_altered = True + else: + orig = self._parent._getOriginalField(name) + self._charset = orig.charset + self._format = orig.format + self._prefix_size = orig.content_offset + self._suffix_str = orig.suffix_str + + def _setValue(self, value): + size = len(value) + if self._format in self.MAX_SIZE and self.MAX_SIZE[self._format] < size: + raise ValueError("String is too big") + self._value = value + + def _computeSize(self): + return (self._prefix_size + len(self._value) + len(self._suffix_str))*8 + + def _write(self, output): + if self._format in GenericString.SUFFIX_FORMAT: + output.writeBytes(self._value) + output.writeBytes(self._suffix_str) + elif self._format == "fixed": + output.writeBytes(self._value) + else: + assert self._format in GenericString.PASCAL_FORMATS + size = GenericString.PASCAL_FORMATS[self._format] + output.writeInteger(len(self._value), False, size, self._parent.endian) + output.writeBytes(self._value) + +class EditableCharacter(EditableFixedField): + def __init__(self, parent, name, *args): + if args: + if len(args) != 3: + raise TypeError( + "Wrong argument count, EditableCharacter " + "constructor prototype is: (parent, name, [value])") + value = args[0] + if not isinstance(value, str) or len(value) != 1: + raise TypeError("EditableCharacter needs a character") + else: + value = None + EditableFixedField.__init__(self, parent, name, value, 8) + if args: + self._is_altered = True + + def _setValue(self, value): + if not isinstance(value, str) or len(value) != 1: + raise TypeError("EditableCharacter needs a character") + self._value = value + + def _write(self, output): + output.writeBytes(self._value) + +class EditableInteger(EditableFixedField): + VALID_VALUE_SIGNED = { + 8: (-(1 << 8), (1 << 8)-1), + 16: (-(1 << 15), (1 << 15)-1), + 32: (-(1 << 31), (1 << 31)-1), + } + VALID_VALUE_UNSIGNED = { + 8: (0, (1 << 8)-1), + 16: (0, (1 << 16)-1), + 32: (0, (1 << 32)-1) + } + + def __init__(self, parent, name, *args): + if args: + if len(args) != 3: + raise TypeError( + "Wrong argument count, EditableInteger constructor prototype is: " + "(parent, name, [signed, size, value])") + size = args[1] + value = args[2] + assert isinstance(value, (int, long)) + else: + size = None + value = None + EditableFixedField.__init__(self, parent, name, value, size) + if args: + self._signed = args[0] + self._is_altered = True + else: + self._signed = self._parent._getOriginalField(self._name).signed + + def _setValue(self, value): + if self._signed: + valid = self.VALID_VALUE_SIGNED + else: + valid = self.VALID_VALUE_UNSIGNED + minval, maxval = valid[self._size] + if not(minval <= value <= maxval): + raise ValueError("Invalid value, must be in range %s..%s" + % (minval, maxval)) + self._value = value + + def _write(self, output): + output.writeInteger( + self.value, self._signed, self._size//8, self._parent.endian) + +def createEditableField(fieldset, field): + if isInteger(field): + cls = EditableInteger + elif isString(field): + cls = EditableString + elif field.__class__ in (RawBytes, Bytes, PaddingBytes): + cls = EditableBytes + elif field.__class__ in (RawBits, Bits, Bit, PaddingBits): + cls = EditableBits + elif field.__class__ == Character: + cls = EditableCharacter + else: + cls = FakeField + return cls(fieldset, field.name) + diff --git a/hachoir_editor/typed_field.pyc b/hachoir_editor/typed_field.pyc new file mode 100644 index 0000000..3d442ed Binary files /dev/null and b/hachoir_editor/typed_field.pyc differ diff --git a/lib/check/images/jpg.py b/lib/check/images/jpg.py deleted file mode 100644 index 7f29587..0000000 --- a/lib/check/images/jpg.py +++ /dev/null @@ -1,17 +0,0 @@ -import hachoir_core.error -import hachoir_core.cmd_line -import hachoir_parser -import hachoir_metadata -import sys -import mat - - -class JpegStripper(file): - def checkField(self, field): - print(field.description) - if field.name.startswith("comment"): - return True - return field.name in ("photoshop", "exif", "adobe") - return False - - diff --git a/lib/hachoir_editor/__init__.py b/lib/hachoir_editor/__init__.py deleted file mode 100644 index b106278..0000000 --- a/lib/hachoir_editor/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from hachoir_editor.field import ( - EditorError, FakeField) -from hachoir_editor.typed_field import ( - EditableField, EditableBits, EditableBytes, - EditableInteger, EditableString, - createEditableField) -from hachoir_editor.fieldset import EditableFieldSet, NewFieldSet, createEditor - diff --git a/lib/hachoir_editor/field.py b/lib/hachoir_editor/field.py deleted file mode 100644 index 6b1efe3..0000000 --- a/lib/hachoir_editor/field.py +++ /dev/null @@ -1,69 +0,0 @@ -from hachoir_core.error import HachoirError -from hachoir_core.field import joinPath, MissingField - -class EditorError(HachoirError): - pass - -class FakeField(object): - """ - This class have API looks similar to Field API, but objects don't contain - any value: all values are _computed_ by parent methods. - - Example: FakeField(editor, "abc").size calls editor._getFieldSize("abc"). - """ - is_field_set = False - - def __init__(self, parent, name): - self._parent = parent - self._name = name - - def _getPath(self): - return joinPath(self._parent.path, self._name) - path = property(_getPath) - - def _getName(self): - return self._name - name = property(_getName) - - def _getAddress(self): - return self._parent._getFieldAddress(self._name) - address = property(_getAddress) - - def _getSize(self): - return self._parent.input[self._name].size - size = property(_getSize) - - def _getValue(self): - return self._parent.input[self._name].value - value = property(_getValue) - - def createDisplay(self): - # TODO: Returns new value if field is altered - return self._parent.input[self._name].display - display = property(createDisplay) - - def _getParent(self): - return self._parent - parent = property(_getParent) - - def hasValue(self): - return self._parent.input[self._name].hasValue() - - def __getitem__(self, key): - # TODO: Implement this function! - raise MissingField(self, key) - - def _isAltered(self): - return False - is_altered = property(_isAltered) - - def writeInto(self, output): - size = self.size - addr = self._parent._getFieldInputAddress(self._name) - input = self._parent.input - stream = input.stream - if size % 8: - output.copyBitsFrom(stream, addr, size, input.endian) - else: - output.copyBytesFrom(stream, addr, size//8) - diff --git a/lib/hachoir_editor/fieldset.py b/lib/hachoir_editor/fieldset.py deleted file mode 100644 index 1669b5a..0000000 --- a/lib/hachoir_editor/fieldset.py +++ /dev/null @@ -1,346 +0,0 @@ -from hachoir_core.dict import UniqKeyError -from hachoir_core.field import MissingField, Float32, Float64, FakeArray -from hachoir_core.compatibility import any -from hachoir_core.i18n import _ -from hachoir_editor import createEditableField, EditorError -from collections import deque # Python 2.4 -import weakref # Python 2.1 -import struct - -class EditableFieldSet(object): - MAX_SIZE = (1 << 40) # Arbitrary limit to catch errors - is_field_set = True - - def __init__(self, parent, fieldset): - self._parent = parent - self.input = fieldset # original FieldSet - self._fields = {} # cache of editable fields - self._deleted = set() # Names of deleted fields - self._inserted = {} # Inserted field (name => list of field, - # where name is the name after) - - def array(self, key): - # FIXME: Use cache? - return FakeArray(self, key) - - def _getParent(self): - return self._parent - parent = property(_getParent) - - def _isAltered(self): - if self._inserted: - return True - if self._deleted: - return True - return any(field.is_altered for field in self._fields.itervalues()) - is_altered = property(_isAltered) - - def reset(self): - """ - Reset the field set and the input field set. - """ - for key, field in self._fields.iteritems(): - if not field.is_altered: - del self._fields[key] - self.input.reset() - - def __len__(self): - return len(self.input) \ - - len(self._deleted) \ - + sum( len(new) for new in self._inserted.itervalues() ) - - def __iter__(self): - for field in self.input: - name = field.name - if name in self._inserted: - for newfield in self._inserted[name]: - yield weakref.proxy(newfield) - if name not in self._deleted: - yield self[name] - if None in self._inserted: - for newfield in self._inserted[None]: - yield weakref.proxy(newfield) - - def insertBefore(self, name, *new_fields): - self._insert(name, new_fields, False) - - def insertAfter(self, name, *new_fields): - self._insert(name, new_fields, True) - - def insert(self, *new_fields): - self._insert(None, new_fields, True) - - def _insert(self, key, new_fields, next): - """ - key is the name of the field before which new_fields - will be inserted. If next is True, the fields will be inserted - _after_ this field. - """ - # Set unique field name - for field in new_fields: - if field._name.endswith("[]"): - self.input.setUniqueFieldName(field) - - # Check that there is no duplicate in inserted fields - new_names = list(field.name for field in new_fields) - names_set = set(new_names) - if len(names_set) != len(new_fields): - duplicates = (name for name in names_set if 1 < new_names.count(name)) - raise UniqKeyError(_("Duplicates in inserted fields: %s") % ", ".join(duplicates)) - - # Check that field names are not in input - if self.input: # Write special version for NewFieldSet? - for name in new_names: - if name in self.input and name not in self._deleted: - raise UniqKeyError(_("Field name '%s' already exists") % name) - - # Check that field names are not in inserted fields - for fields in self._inserted.itervalues(): - for field in fields: - if field.name in new_names: - raise UniqKeyError(_("Field name '%s' already exists") % field.name) - - # Input have already inserted field? - if key in self._inserted: - if next: - self._inserted[key].extend( reversed(new_fields) ) - else: - self._inserted[key].extendleft( reversed(new_fields) ) - return - - # Whould like to insert in inserted fields? - if key: - for fields in self._inserted.itervalues(): - names = [item.name for item in fields] - try: - pos = names.index(key) - except ValueError: - continue - if 0 <= pos: - if next: - pos += 1 - fields.rotate(-pos) - fields.extendleft( reversed(new_fields) ) - fields.rotate(pos) - return - - # Get next field. Use None if we are at the end. - if next: - index = self.input[key].index + 1 - try: - key = self.input[index].name - except IndexError: - key = None - - # Check that field names are not in input - if key not in self.input: - raise MissingField(self, key) - - # Insert in original input - self._inserted[key]= deque(new_fields) - - def _getDescription(self): - return self.input.description - description = property(_getDescription) - - def _getStream(self): - # FIXME: This property is maybe a bad idea since address may be differents - return self.input.stream - stream = property(_getStream) - - def _getName(self): - return self.input.name - name = property(_getName) - - def _getEndian(self): - return self.input.endian - endian = property(_getEndian) - - def _getAddress(self): - if self._parent: - return self._parent._getFieldAddress(self.name) - else: - return 0 - address = property(_getAddress) - - def _getAbsoluteAddress(self): - address = self.address - current = self._parent - while current: - address += current.address - current = current._parent - return address - absolute_address = property(_getAbsoluteAddress) - - def hasValue(self): - return False -# return self._parent.input[self.name].hasValue() - - def _getSize(self): - if self.is_altered: - return sum(field.size for field in self) - else: - return self.input.size - size = property(_getSize) - - def _getPath(self): - return self.input.path - path = property(_getPath) - - def _getOriginalField(self, name): - assert name in self.input - return self.input[name] - - def _getFieldInputAddress(self, name): - """ - Absolute address of a field from the input field set. - """ - assert name in self.input - return self.input[name].absolute_address - - def _getFieldAddress(self, name): - """ - Compute relative address of a field. The operation takes care of - deleted and resized fields. - """ - #assert name not in self._deleted - addr = 0 - for field in self: - if field.name == name: - return addr - addr += field.size - raise MissingField(self, name) - - def _getItemByPath(self, path): - if not path[0]: - path = path[1:] - field = self - for name in path: - field = field[name] - return field - - def __contains__(self, name): - try: - field = self[name] - return (field is not None) - except MissingField: - return False - - def __getitem__(self, key): - """ - Create a weak reference to an editable field (EditableField) for the - field with specified name. If the field is removed later, using the - editable field will raise a weakref.ReferenceError exception. - - May raise a MissingField error if the field doesn't exist in original - field set or it has been deleted. - """ - if "/" in key: - return self._getItemByPath(key.split("/")) - if isinstance(key, (int, long)): - raise EditorError("Integer index are not supported") - - if (key in self._deleted) or (key not in self.input): - raise MissingField(self, key) - if key not in self._fields: - field = self.input[key] - if field.is_field_set: - self._fields[key] = createEditableFieldSet(self, field) - else: - self._fields[key] = createEditableField(self, field) - return weakref.proxy(self._fields[key]) - - def __delitem__(self, name): - """ - Remove a field from the field set. May raise an MissingField exception - if the field has already been deleted. - """ - if name in self._deleted: - raise MissingField(self, name) - self._deleted.add(name) - if name in self._fields: - del self._fields[name] - - def writeInto(self, output): - """ - Write the content if this field set into the output stream - (OutputStream). - """ - if not self.is_altered: - # Not altered: just copy bits/bytes - input = self.input - if input.size % 8: - output.copyBitsFrom(input.stream, - input.absolute_address, input.size, input.endian) - else: - output.copyBytesFrom(input.stream, - input.absolute_address, input.size//8) - else: - # Altered: call writeInto() method of each field - realaddr = 0 - for field in self: - field.writeInto(output) - realaddr += field.size - - def _getValue(self): - raise EditorError('Field set "%s" has no value' % self.path) - def _setValue(self, value): - raise EditorError('Field set "%s" value is read only' % self.path) - value = property(_getValue, _setValue, "Value of field") - -class EditableFloat(EditableFieldSet): - _value = None - - def _isAltered(self): - return (self._value is not None) - is_altered = property(_isAltered) - - def writeInto(self, output): - if self._value is not None: - self._write(output) - else: - EditableFieldSet.writeInto(self, output) - - def _write(self, output): - format = self.input.struct_format - raw = struct.pack(format, self._value) - output.writeBytes(raw) - - def _setValue(self, value): - self.parent._is_altered = True - self._value = value - value = property(EditableFieldSet._getValue, _setValue) - -def createEditableFieldSet(parent, field): - cls = field.__class__ - # FIXME: Support Float80 - if cls in (Float32, Float64): - return EditableFloat(parent, field) - else: - return EditableFieldSet(parent, field) - -class NewFieldSet(EditableFieldSet): - def __init__(self, parent, name): - EditableFieldSet.__init__(self, parent, None) - self._name = name - self._endian = parent.endian - - def __iter__(self): - if None in self._inserted: - return iter(self._inserted[None]) - else: - raise StopIteration() - - def _getName(self): - return self._name - name = property(_getName) - - def _getEndian(self): - return self._endian - endian = property(_getEndian) - - is_altered = property(lambda self: True) - -def createEditor(fieldset): - return EditableFieldSet(None, fieldset) - diff --git a/lib/hachoir_editor/typed_field.py b/lib/hachoir_editor/typed_field.py deleted file mode 100644 index 4abc989..0000000 --- a/lib/hachoir_editor/typed_field.py +++ /dev/null @@ -1,253 +0,0 @@ -from hachoir_core.field import ( - RawBits, Bit, Bits, PaddingBits, - RawBytes, Bytes, PaddingBytes, - GenericString, Character, - isInteger, isString) -from hachoir_editor import FakeField - -class EditableField(FakeField): - """ - Pure virtual class used to write editable field class. - """ - - _is_altered = False - def __init__(self, parent, name, value=None): - FakeField.__init__(self, parent, name) - self._value = value - - def _isAltered(self): - return self._is_altered - is_altered = property(_isAltered) - - def hasValue(self): - return True - - def _computeSize(self): - raise NotImplementedError() - def _getValue(self): - return self._value - def _setValue(self, value): - self._value = value - - def _propGetValue(self): - if self._value is not None: - return self._getValue() - else: - return FakeField._getValue(self) - def _propSetValue(self, value): - self._setValue(value) - self._is_altered = True - value = property(_propGetValue, _propSetValue) - - def _getSize(self): - if self._value is not None: - return self._computeSize() - else: - return FakeField._getSize(self) - size = property(_getSize) - - def _write(self, output): - raise NotImplementedError() - - def writeInto(self, output): - if self._is_altered: - self._write(output) - else: - return FakeField.writeInto(self, output) - -class EditableFixedField(EditableField): - """ - Editable field with fixed size. - """ - - def __init__(self, parent, name, value=None, size=None): - EditableField.__init__(self, parent, name, value) - if size is not None: - self._size = size - else: - self._size = self._parent._getOriginalField(self._name).size - - def _getSize(self): - return self._size - size = property(_getSize) - -class EditableBits(EditableFixedField): - def __init__(self, parent, name, *args): - if args: - if len(args) != 2: - raise TypeError( - "Wrong argument count, EditableBits constructor prototype is: " - "(parent, name, [size, value])") - size = args[0] - value = args[1] - assert isinstance(value, (int, long)) - else: - size = None - value = None - EditableFixedField.__init__(self, parent, name, value, size) - if args: - self._setValue(args[1]) - self._is_altered = True - - def _setValue(self, value): - if not(0 <= value < (1 << self._size)): - raise ValueError("Invalid value, must be in range %s..%s" - % (0, (1 << self._size) - 1)) - self._value = value - - def _write(self, output): - output.writeBits(self._size, self._value, self._parent.endian) - -class EditableBytes(EditableField): - def _setValue(self, value): - if not value: raise ValueError( - "Unable to set empty string to a EditableBytes field") - self._value = value - - def _computeSize(self): - return len(self._value) * 8 - - def _write(self, output): - output.writeBytes(self._value) - -class EditableString(EditableField): - MAX_SIZE = { - "Pascal8": (1 << 8)-1, - "Pascal16": (1 << 16)-1, - "Pascal32": (1 << 32)-1, - } - - def __init__(self, parent, name, *args, **kw): - if len(args) == 2: - value = args[1] - assert isinstance(value, str) # TODO: support Unicode - elif not args: - value = None - else: - raise TypeError( - "Wrong argument count, EditableString constructor prototype is:" - "(parent, name, [format, value])") - EditableField.__init__(self, parent, name, value) - if len(args) == 2: - self._charset = kw.get('charset', None) - self._format = args[0] - if self._format in GenericString.PASCAL_FORMATS: - self._prefix_size = GenericString.PASCAL_FORMATS[self._format] - else: - self._prefix_size = 0 - self._suffix_str = GenericString.staticSuffixStr( - self._format, self._charset, self._parent.endian) - self._is_altered = True - else: - orig = self._parent._getOriginalField(name) - self._charset = orig.charset - self._format = orig.format - self._prefix_size = orig.content_offset - self._suffix_str = orig.suffix_str - - def _setValue(self, value): - size = len(value) - if self._format in self.MAX_SIZE and self.MAX_SIZE[self._format] < size: - raise ValueError("String is too big") - self._value = value - - def _computeSize(self): - return (self._prefix_size + len(self._value) + len(self._suffix_str))*8 - - def _write(self, output): - if self._format in GenericString.SUFFIX_FORMAT: - output.writeBytes(self._value) - output.writeBytes(self._suffix_str) - elif self._format == "fixed": - output.writeBytes(self._value) - else: - assert self._format in GenericString.PASCAL_FORMATS - size = GenericString.PASCAL_FORMATS[self._format] - output.writeInteger(len(self._value), False, size, self._parent.endian) - output.writeBytes(self._value) - -class EditableCharacter(EditableFixedField): - def __init__(self, parent, name, *args): - if args: - if len(args) != 3: - raise TypeError( - "Wrong argument count, EditableCharacter " - "constructor prototype is: (parent, name, [value])") - value = args[0] - if not isinstance(value, str) or len(value) != 1: - raise TypeError("EditableCharacter needs a character") - else: - value = None - EditableFixedField.__init__(self, parent, name, value, 8) - if args: - self._is_altered = True - - def _setValue(self, value): - if not isinstance(value, str) or len(value) != 1: - raise TypeError("EditableCharacter needs a character") - self._value = value - - def _write(self, output): - output.writeBytes(self._value) - -class EditableInteger(EditableFixedField): - VALID_VALUE_SIGNED = { - 8: (-(1 << 8), (1 << 8)-1), - 16: (-(1 << 15), (1 << 15)-1), - 32: (-(1 << 31), (1 << 31)-1), - } - VALID_VALUE_UNSIGNED = { - 8: (0, (1 << 8)-1), - 16: (0, (1 << 16)-1), - 32: (0, (1 << 32)-1) - } - - def __init__(self, parent, name, *args): - if args: - if len(args) != 3: - raise TypeError( - "Wrong argument count, EditableInteger constructor prototype is: " - "(parent, name, [signed, size, value])") - size = args[1] - value = args[2] - assert isinstance(value, (int, long)) - else: - size = None - value = None - EditableFixedField.__init__(self, parent, name, value, size) - if args: - self._signed = args[0] - self._is_altered = True - else: - self._signed = self._parent._getOriginalField(self._name).signed - - def _setValue(self, value): - if self._signed: - valid = self.VALID_VALUE_SIGNED - else: - valid = self.VALID_VALUE_UNSIGNED - minval, maxval = valid[self._size] - if not(minval <= value <= maxval): - raise ValueError("Invalid value, must be in range %s..%s" - % (minval, maxval)) - self._value = value - - def _write(self, output): - output.writeInteger( - self.value, self._signed, self._size//8, self._parent.endian) - -def createEditableField(fieldset, field): - if isInteger(field): - cls = EditableInteger - elif isString(field): - cls = EditableString - elif field.__class__ in (RawBytes, Bytes, PaddingBytes): - cls = EditableBytes - elif field.__class__ in (RawBits, Bits, Bit, PaddingBits): - cls = EditableBits - elif field.__class__ == Character: - cls = EditableCharacter - else: - cls = FakeField - return cls(fieldset, field.name) - diff --git a/lib/mat.py b/lib/mat.py deleted file mode 100644 index d22c9ab..0000000 --- a/lib/mat.py +++ /dev/null @@ -1,104 +0,0 @@ -import hachoir_core.error -import hachoir_core.cmd_line -import hachoir_parser -import hachoir_metadata - -from strippers import * - -from hachoir_editor import (createEditor, - NewFieldSet, EditableInteger, EditableBytes) - -import hachoir_editor - -import sys - -__version__ = "0.1" -__author__ = "jvoisin" - - -class file(): - def __init__(self, filename): - self.metadata = {} - self.clean = False - self.editor = createEditor(self.parser) - self.filename = filename - self.filename, self.realname = hachoir_core.cmd_line.unicodeFilename( - self.filename), self.filename - self.parser = hachoir_parser.createParser(self.filename, self.realname) - - if not self.parser: - print("Unable to parse file : sorry") - sys.exit(1) - - try: - self.meta = hachoir_metadata.extractMetadata(self.parser) - except hachoir_core.error.HachoirError, err: - print "Metadata extraction error: %s" % unicode(err) - self.data = None - - if not self.meta: - print "Unable to extract metadata" - sys.exit(1) - - def is_clean(self): - ''' - Return true if the file is clean from any compromizing meta - ''' - return self.clean - - def remove_all(self): - ''' - Remove all the files that are compromizing - ''' - stripEditor(self.editor, self.realname, level, not(values.quiet)) - for key, field in metadata: - if should_remove(key): - remove(self, key) - - def remove(self, field): - ''' - Remove the given file - ''' - del editor[field] - return True - - - def get_meta(self): - '''return a dict with all the meta of the file''' - #FIXME : sooooooooooo dirty ! - for title in self.meta: - if title.values != []: #if the field is not empty - value = "" - for item in title.values: - value = item.text - self.metadata[title.key] = value - return self.metadata - - def should_remove(self, field): - ''' - return True if the field is compromizing - abstract method - ''' - raise NotImplementedError() - -def stripEditor(editor, filename, realname, level, verbose): - ''' - Assign a stripper to an editor - ''' - cls = editor.input.__class__ - try: - stripper_cls = strippers[cls] - except KeyError: - print "Don't have stripper for file type: %s" % editor.description - return False - stripper = stripper_cls(editor, level, verbose) - - if stripper(): - output = FileOutputStream(filename, realname) - editor.writeInto(output) - - else: - print _("Stripper doesn't touch the file") - return True - -file(sys.argv[1]).get_meta() diff --git a/lib/strippers.py b/lib/strippers.py deleted file mode 100644 index 70d0fc7..0000000 --- a/lib/strippers.py +++ /dev/null @@ -1,3 +0,0 @@ -strippers = { - JpegFile: JpegStripper, -} diff --git a/lib/test.py b/lib/test.py deleted file mode 100644 index b1ff2a3..0000000 --- a/lib/test.py +++ /dev/null @@ -1,70 +0,0 @@ -import mat -import unittest -import shutil -import glob -import tempfile - -FILE_LIST = zip(glob.glob('clean*'), glob.glob('dirty*')) - -class MATTest(unittest.TestCase): - def setUp(self): - '''create working copy of the clean and the dirty file in the TMP dir''' - self.file_list = [] - self.tmpdir = tempfile.mkdtemp() - - for clean, dirty in FILE_LIST: - shutil.copy2(clean, self.tmpdir + clean) - shutil.copy2(dirty, self.tmpdir + dirty) - self.file_list.append((self.tmpdir + clean, self.tmpdir + dirty)) - - def tearDown(self): - '''Remove the tmp folder''' - shutil.rmtree(self.tmpdir) - -class Test_Remove(MATTest): - def test_remove(self): - '''make sure that the lib remove all compromizing meta''' - for clean, dirty in self.file_list: - mat.file(dirty).remove_all() - self.assertTrue(mat.file(dirty).is_clean()) - - def test_remove_empty(self): - '''Test removal with clean files''' - for clean, dirty in self.file_list: - mat.file(clean).remove_all() - self.assertTrue(mat.file(clean).is_clean()) - - -class Test_List(MATTest): - def test_list(self): - '''check if get_meta returns all the expected meta''' - for clean, dirty in self.file_list: - meta_list = dict() #FIXME - self.assertDictEqual(mat.file(dirty).get_meta(), meta_list) - - def testlist_list_empty(self): - '''check that a listing of a clean file return an empty dict''' - for clean, dirty in self.file_list: - self.assertEqual(mat.file(clean).get_meta(), None) - - -class Test_isClean(MATTest): - def test_clean(self): - '''test is_clean on clean files''' - for clean, dirty in self.file_list: - print "e" - self.assertTrue(mat.file(clean).is_clean()) - - def test_clean(self): - '''test is_clean on dirty files''' - for clean, dirty in self.file_list: - self.assertFalse(mat.file(dirty).is_clean()) - - -if __name__ == '__main__': - suite = unittest.TestSuite() - suite.addTest(unittest.makeSuite(Test_Remove)) - suite.addTest(unittest.makeSuite(Test_List)) - suite.addTest(unittest.makeSuite(Test_isClean)) - unittest.TextTestRunner(verbosity=2).run(suite) - diff --git a/libtest.py b/libtest.py new file mode 100644 index 0000000..191a981 --- /dev/null +++ b/libtest.py @@ -0,0 +1,56 @@ +import mat +import unittest +import test + +class Test_Remove_lib(test.MATTest): + def test_remove(self): + '''make sure that the lib remove all compromizing meta''' + for clean, dirty in self.file_list: + current_file = mat.create_class_file(dirty) + current_file.remove_all() + self.assertTrue(current_file.is_clean()) + + def test_remove_empty(self): + '''Test removal with clean files''' + for clean, dirty in self.file_list: + current_file = mat.create_class_file(clean) + current_file.remove_all() + self.assertTrue(current_file.is_clean()) + + +class Test_List_lib(test.MATTest): + def test_list(self): + '''check if get_meta returns all the expected meta''' + for clean, dirty in self.file_list: + current_file = mat.create_class_file(dirty) + meta_list = dict({"fixme":"please"},) + self.assertDictEqual(current_file.get_meta(), meta_list) + + def testlist_list_empty(self): + '''check that a listing of a clean file return an empty dict''' + for clean, dirty in self.file_list: + current_file = mat.create_class_file(clean) + self.assertEqual(current_file.get_meta(), dict()) #dirty, isn't it ? + + +class Test_isClean_lib(test.MATTest): + def test_clean(self): + '''test is_clean on clean files''' + for clean, dirty in self.file_list: + current_file = mat.create_class_file(dirty) + self.assertTrue(current_file.is_clean()) + + def test_clean(self): + '''test is_clean on dirty files''' + for clean, dirty in self.file_list: + current_file = mat.create_class_file(clean) + self.assertFalse(current_file.is_clean()) + + +if __name__ == '__main__': + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(Test_Remove_lib)) + suite.addTest(unittest.makeSuite(Test_List_lib)) + suite.addTest(unittest.makeSuite(Test_isClean_lib)) + unittest.TextTestRunner(verbosity=2).run(suite) + diff --git a/mat.py b/mat.py new file mode 100644 index 0000000..8a3afcd --- /dev/null +++ b/mat.py @@ -0,0 +1,117 @@ +import hachoir_core.error +import hachoir_core.cmd_line +import hachoir_parser +import hachoir_metadata +import hachoir_editor + +import sys +import os +import hachoir_parser.image + +__version__ = "0.1" +__author__ = "jvoisin" + + +class file(): + def __init__(self, realname, filename, parser, editor): + self.meta = {} + self.clean = False + self.filename = filename + self.realname = realname + self.parser = parser + self.editor = editor + self.meta = self.__fill_meta() + + def __fill_meta(self): + metadata = {} + try: + meta = hachoir_metadata.extractMetadata(self.parser) + except hachoir_core.error.HachoirError, err: + print("Metadata extraction error: %s" % err) + + if not meta: + print("Unable to extract metadata from the file %s" % self.filename) + sys.exit(1) + + for title in meta: + #fixme i'm so dirty + if title.values != []: #if the field is not empty + value = "" + for item in title.values: + value = item.text + metadata[title.key] = value + return metadata + + def is_clean(self): + ''' + Return true if the file is clean from any compromizing meta + ''' + return self.clean + + def remove_all(self): + ''' + Remove all the files that are compromizing + ''' + for key, field in self.meta.iteritems(): + if self._should_remove(key): + print "BLEH" #DEBUG + #__remove(self, key) + #self.clean = True + + def __remove(self, field): + ''' + Remove the given file + ''' + del self.editor[field] + + + def get_meta(self): + ''' + return a dict with all the meta of the file + ''' + return self.meta + + def _should_remove(self, field): + ''' + return True if the field is compromizing + abstract method + ''' + raise NotImplementedError() + +class JpegStripper(file): + def _should_remove(self, field): + return False + +strippers = { + hachoir_parser.image.JpegFile: JpegStripper, +} + +def create_class_file(name): + ''' + return a $FILETYPEStripper() class, + corresponding to the filetype of the given file + ''' + if not(os.path.isfile(name)): #check if the file exist + print("Error: %s is not a valid file" % name) + sys.exit(1) + + filename = "" + realname = name + filename = hachoir_core.cmd_line.unicodeFilename(name) + parser = hachoir_parser.createParser(filename, realname) + if not parser: + print("Unable to parse the file %s : sorry" % filename) + sys.exit(1) + + editor = hachoir_editor.createEditor(parser) + try: + '''this part is a little tricky : + stripper_class will receice the name of the class $FILETYPEStripper, + (which herits from the "file" class), based on the editor + of given file (name) + ''' + stripper_class = strippers[editor.input.__class__] + except KeyError: + print("Don't have stripper for file type: %s" % editor.description) + sys.exit(1) + return stripper_class(realname, filename, parser, editor) diff --git a/test.py b/test.py new file mode 100644 index 0000000..3138be7 --- /dev/null +++ b/test.py @@ -0,0 +1,29 @@ +''' + Class for the testing suite : + - get the list of all test files + - create a copy of them on start + - remove the copy on end +''' + +import shutil +import glob +import tempfile +import unittest +import mat + +FILE_LIST = zip(glob.glob('clean*'), glob.glob('dirty*')) + +class MATTest(unittest.TestCase): + def setUp(self): + '''create working copy of the clean and the dirty file in the TMP dir''' + self.file_list = [] + self.tmpdir = tempfile.mkdtemp() + + for clean, dirty in FILE_LIST: + shutil.copy2(clean, self.tmpdir + clean) + shutil.copy2(dirty, self.tmpdir + dirty) + self.file_list.append((self.tmpdir + clean, self.tmpdir + dirty)) + + def tearDown(self): + '''Remove the tmp folder''' + shutil.rmtree(self.tmpdir) -- cgit v1.3