From af36529554c39a2eefcc2c8723715e2d25b401b8 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sun, 8 Jun 2014 13:39:18 +0200 Subject: Rename the MAT folder to libmat. This commit fixes some issues for dump operating systems who doesn't handle capitalization. --- MANIFEST.in | 2 +- MAT/__init__.py | 1 - MAT/archive.py | 335 --------------------------------- MAT/audio.py | 53 ------ MAT/bencode/__init__.py | 1 - MAT/bencode/bencode.py | 143 -------------- MAT/exceptions.py | 14 -- MAT/exiftool.py | 78 -------- MAT/hachoir_editor/__init__.py | 8 - MAT/hachoir_editor/field.py | 69 ------- MAT/hachoir_editor/fieldset.py | 352 ----------------------------------- MAT/hachoir_editor/typed_field.py | 253 ------------------------- MAT/images.py | 52 ------ MAT/mat.py | 186 ------------------ MAT/misc.py | 76 -------- MAT/mutagenstripper.py | 33 ---- MAT/office.py | 191 ------------------- MAT/parser.py | 135 -------------- MAT/strippers.py | 70 ------- RELEASE | 4 +- libmat/__init__.py | 1 + libmat/archive.py | 335 +++++++++++++++++++++++++++++++++ libmat/audio.py | 53 ++++++ libmat/bencode/__init__.py | 1 + libmat/bencode/bencode.py | 143 ++++++++++++++ libmat/exceptions.py | 14 ++ libmat/exiftool.py | 78 ++++++++ libmat/hachoir_editor/__init__.py | 8 + libmat/hachoir_editor/field.py | 69 +++++++ libmat/hachoir_editor/fieldset.py | 352 +++++++++++++++++++++++++++++++++++ libmat/hachoir_editor/typed_field.py | 253 +++++++++++++++++++++++++ libmat/images.py | 52 ++++++ libmat/mat.py | 186 ++++++++++++++++++ libmat/misc.py | 76 ++++++++ libmat/mutagenstripper.py | 33 ++++ libmat/office.py | 191 +++++++++++++++++++ libmat/parser.py | 135 ++++++++++++++ libmat/strippers.py | 70 +++++++ mat | 6 +- mat-gui | 8 +- nautilus/nautilus-mat.py | 8 +- setup.py | 2 +- test/clitest.py | 2 +- test/libtest.py | 44 ++--- 44 files changed, 2088 insertions(+), 2088 deletions(-) delete mode 100644 MAT/__init__.py delete mode 100644 MAT/archive.py delete mode 100644 MAT/audio.py delete mode 100644 MAT/bencode/__init__.py delete mode 100644 MAT/bencode/bencode.py delete mode 100644 MAT/exceptions.py delete mode 100644 MAT/exiftool.py delete mode 100644 MAT/hachoir_editor/__init__.py delete mode 100644 MAT/hachoir_editor/field.py delete mode 100644 MAT/hachoir_editor/fieldset.py delete mode 100644 MAT/hachoir_editor/typed_field.py delete mode 100644 MAT/images.py delete mode 100644 MAT/mat.py delete mode 100644 MAT/misc.py delete mode 100644 MAT/mutagenstripper.py delete mode 100644 MAT/office.py delete mode 100644 MAT/parser.py delete mode 100644 MAT/strippers.py create mode 100644 libmat/__init__.py create mode 100644 libmat/archive.py create mode 100644 libmat/audio.py create mode 100644 libmat/bencode/__init__.py create mode 100644 libmat/bencode/bencode.py create mode 100644 libmat/exceptions.py create mode 100644 libmat/exiftool.py create mode 100644 libmat/hachoir_editor/__init__.py create mode 100644 libmat/hachoir_editor/field.py create mode 100644 libmat/hachoir_editor/fieldset.py create mode 100644 libmat/hachoir_editor/typed_field.py create mode 100644 libmat/images.py create mode 100644 libmat/mat.py create mode 100644 libmat/misc.py create mode 100644 libmat/mutagenstripper.py create mode 100644 libmat/office.py create mode 100644 libmat/parser.py create mode 100644 libmat/strippers.py diff --git a/MANIFEST.in b/MANIFEST.in index 57c97d0..fb0af66 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,7 +2,7 @@ include README README.security LICENSE mat mat-gui include TODO mat.desktop include mat.1 mat-gui.1 recursive-include data * -recursive-include MAT *.py +recursive-include libmat *.py recursive-include test *.py clean.* dirty.* recursive-include po * recursive-include nautilus *.py diff --git a/MAT/__init__.py b/MAT/__init__.py deleted file mode 100644 index 8b13789..0000000 --- a/MAT/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/MAT/archive.py b/MAT/archive.py deleted file mode 100644 index d483dcc..0000000 --- a/MAT/archive.py +++ /dev/null @@ -1,335 +0,0 @@ -''' Take care of archives formats -''' - -import datetime -import logging -import os -import shutil -import stat -import tarfile -import tempfile -import zipfile - -import mat -import parser - -# Zip files do not support dates older than 01/01/1980 -ZIP_EPOCH = (1980, 1, 1, 0, 0, 0) -ZIP_EPOCH_SECONDS = (datetime.datetime(1980, 1, 1, 0, 0, 0) - - datetime.datetime(1970, 1, 1, 1, 0, 0)).total_seconds() - - -class GenericArchiveStripper(parser.GenericParser): - ''' Represent a generic archive - ''' - def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): - super(GenericArchiveStripper, self).__init__(filename, - parser, mime, backup, is_writable, **kwargs) - self.compression = '' - self.add2archive = kwargs['add2archive'] - self.tempdir = tempfile.mkdtemp() - - def __del__(self): - ''' Remove the files inside the temp dir, - then remove the temp dir - ''' - for root, dirs, files in os.walk(self.tempdir): - for item in files: - path_file = os.path.join(root, item) - mat.secure_remove(path_file) - shutil.rmtree(self.tempdir) - - def is_clean(self, list_unsupported=False): - ''' Virtual method to check for harmul metadata - ''' - raise NotImplementedError - - def list_unsupported(self): - ''' Get a list of every non-supported files present in the archive - ''' - return self.is_clean(list_unsupported=True) - - def remove_all(self): - ''' Virtual method to remove all metadata - ''' - raise NotImplementedError - - -class ZipStripper(GenericArchiveStripper): - ''' Represent a zip file - ''' - def __is_zipfile_clean(self, fileinfo): - ''' Check if a ZipInfo object is clean of metadata added - by zip itself, independently of the corresponding file metadata - ''' - if fileinfo.comment != '': - return False - elif fileinfo.date_time != ZIP_EPOCH: - return False - elif fileinfo.create_system != 3: # 3 is UNIX - return False - return True - - def is_clean(self, list_unsupported=False): - ''' Check if the given file is clean from harmful metadata - When list_unsupported is True, the method returns a list - of all non-supported/archives files contained in the - archive. - ''' - ret_list = [] - zipin = zipfile.ZipFile(self.filename, 'r') - if zipin.comment != '' and not list_unsupported: - logging.debug('%s has a comment' % self.filename) - return False - for item in zipin.infolist(): - zipin.extract(item, self.tempdir) - path = os.path.join(self.tempdir, item.filename) - if not self.__is_zipfile_clean(item) and not list_unsupported: - logging.debug('%s from %s has compromising zipinfo' % - (item.filename, self.filename)) - return False - if os.path.isfile(path): - cfile = mat.create_class_file(path, False, add2archive=self.add2archive) - if cfile is not None: - if not cfile.is_clean(): - logging.debug('%s from %s has metadata' % (item.filename, self.filename)) - if not list_unsupported: - return False - else: - logging.info('%s\'s fileformat is not supported or harmless.' - % item.filename) - basename, ext = os.path.splitext(path) - if os.path.basename(item.filename) not in ('mimetype', '.rels'): - if ext not in parser.NOMETA: - if not list_unsupported: - return False - ret_list.append(item.filename) - zipin.close() - if list_unsupported: - return ret_list - return True - - def get_meta(self): - ''' Return all the metadata of a zip archive''' - zipin = zipfile.ZipFile(self.filename, 'r') - metadata = {} - if zipin.comment != '': - metadata['comment'] = zipin.comment - for item in zipin.infolist(): - zipinfo_meta = self.__get_zipinfo_meta(item) - if zipinfo_meta != {}: # zipinfo metadata - metadata[item.filename + "'s zipinfo"] = str(zipinfo_meta) - zipin.extract(item, self.tempdir) - path = os.path.join(self.tempdir, item.filename) - if os.path.isfile(path): - cfile = mat.create_class_file(path, False, add2archive=self.add2archive) - if cfile is not None: - cfile_meta = cfile.get_meta() - if cfile_meta != {}: - metadata[item.filename] = str(cfile_meta) - else: - logging.info('%s\'s fileformat is not supported or harmless' - % item.filename) - zipin.close() - return metadata - - def __get_zipinfo_meta(self, zipinfo): - ''' Return all the metadata of a ZipInfo - ''' - metadata = {} - if zipinfo.comment != '': - metadata['comment'] = zipinfo.comment - if zipinfo.date_time != ZIP_EPOCH: - metadata['modified'] = zipinfo.date_time - if zipinfo.create_system != 3: # 3 is UNIX - metadata['system'] = "windows" if zipinfo.create_system == 2 else "unknown" - return metadata - - def remove_all(self, whitelist=[], beginning_blacklist=[], ending_blacklist=[]): - ''' Remove all metadata from a zip archive, even thoses - added by Python's zipfile itself. It will not add - files starting with "begining_blacklist", or ending with - "ending_blacklist". This method also add files present in - whitelist to the archive. - ''' - zipin = zipfile.ZipFile(self.filename, 'r') - zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True) - for item in zipin.infolist(): - zipin.extract(item, self.tempdir) - path = os.path.join(self.tempdir, item.filename) - - beginning = any((True for f in beginning_blacklist if item.filename.startswith(f))) - ending = any((True for f in ending_blacklist if item.filename.endswith(f))) - - if os.path.isfile(path) and not beginning and not ending: - cfile = mat.create_class_file(path, False, add2archive=self.add2archive) - if cfile is not None: - # Handle read-only files inside archive - old_stat = os.stat(path).st_mode - os.chmod(path, old_stat|stat.S_IWUSR) - cfile.remove_all() - os.chmod(path, old_stat) - logging.debug('Processing %s from %s' % (item.filename, self.filename)) - elif item.filename not in whitelist: - logging.info('%s\'s format is not supported or harmless' % item.filename) - basename, ext = os.path.splitext(path) - if not (self.add2archive or ext in parser.NOMETA): - continue - os.utime(path, (ZIP_EPOCH_SECONDS, ZIP_EPOCH_SECONDS)) - zipout.write(path, item.filename) - zipin.close() - zipout.close() - - logging.info('%s processed' % self.filename) - self.do_backup() - return True - - -class TarStripper(GenericArchiveStripper): - ''' Represent a tarfile archive - ''' - def _remove(self, current_file): - ''' Remove the meta added by tarfile itself to the file - ''' - current_file.mtime = 0 - current_file.uid = 0 - current_file.gid = 0 - current_file.uname = '' - current_file.gname = '' - return current_file - - def remove_all(self, whitelist=[]): - ''' Remove all harmful metadata from the tarfile. - The method will also add every files matching - whitelist in the produced archive. - ''' - tarin = tarfile.open(self.filename, 'r' + self.compression, encoding='utf-8') - tarout = tarfile.open(self.output, 'w' + self.compression, encoding='utf-8') - for item in tarin.getmembers(): - tarin.extract(item, self.tempdir) - if item.isfile(): - path = os.path.join(self.tempdir, item.name) - cfile = mat.create_class_file(path, False, add2archive=self.add2archive) - if cfile is not None: - # Handle read-only files inside archive - old_stat = os.stat(path).st_mode - os.chmod(path, old_stat|stat.S_IWUSR) - cfile.remove_all() - os.chmod(path, old_stat) - elif self.add2archive or os.path.splitext(item.name)[1] in parser.NOMETA: - logging.debug('%s\' format is either not supported or harmless' % item.name) - elif item.name in whitelist: - logging.debug('%s is not supported, but MAT was told to add it anyway.' - % item.name) - else: # Don't add the file to the archive - logging.debug('%s will not be added' % item.name) - continue - tarout.add(path, item.name, filter=self._remove) - tarin.close() - tarout.close() - self.do_backup() - return True - - def is_file_clean(self, current_file): - ''' Check metadatas added by tarfile - ''' - if current_file.mtime != 0: - return False - elif current_file.uid != 0: - return False - elif current_file.gid != 0: - return False - elif current_file.uname != '': - return False - elif current_file.gname != '': - return False - return True - - def is_clean(self, list_unsupported=False): - ''' Check if the file is clean from harmful metadatas - When list_unsupported is True, the method returns a list - of all non-supported/archives files contained in the - archive. - ''' - ret_list = [] - tarin = tarfile.open(self.filename, 'r' + self.compression) - for item in tarin.getmembers(): - if not self.is_file_clean(item) and not list_unsupported: - logging.debug('%s from %s has compromising tarinfo' % - (item.name, self.filename)) - return False - tarin.extract(item, self.tempdir) - path = os.path.join(self.tempdir, item.name) - if item.isfile(): - cfile = mat.create_class_file(path, False, add2archive=self.add2archive) - if cfile is not None: - if not cfile.is_clean(): - logging.debug('%s from %s has metadata' % - (item.name.decode("utf8"), self.filename)) - if not list_unsupported: - return False - # Nested archives are treated like unsupported files - elif isinstance(cfile, GenericArchiveStripper): - ret_list.append(item.name) - else: - logging.error('%s\'s format is not supported or harmless' % item.name) - if os.path.splitext(path)[1] not in parser.NOMETA: - if not list_unsupported: - return False - ret_list.append(item.name) - tarin.close() - if list_unsupported: - return ret_list - return True - - def get_meta(self): - ''' Return a dict with all the meta of the tarfile - ''' - tarin = tarfile.open(self.filename, 'r' + self.compression) - metadata = {} - for item in tarin.getmembers(): - current_meta = {} - if item.isfile(): - tarin.extract(item, self.tempdir) - path = os.path.join(self.tempdir, item.name) - class_file = mat.create_class_file(path, False, add2archive=self.add2archive) - if class_file is not None: - meta = class_file.get_meta() - if meta: - current_meta['file'] = str(meta) - else: - logging.error('%s\'s format is not supported or harmless' % item.name) - - if not self.is_file_clean(item): # if there is meta - current_meta['mtime'] = item.mtime - current_meta['uid'] = item.uid - current_meta['gid'] = item.gid - current_meta['uname'] = item.uname - current_meta['gname'] = item.gname - metadata[item.name] = str(current_meta) - tarin.close() - return metadata - - -class TerminalZipStripper(ZipStripper): - ''' Represent a terminal level archive. - This type of archive can not contain nested archives. - It is used for formats like docx, which are basically - ziped xml. - ''' - - -class GzipStripper(TarStripper): - ''' Represent a tar.gz archive - ''' - def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): - super(GzipStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) - self.compression = ':gz' - - -class Bzip2Stripper(TarStripper): - ''' Represent a tar.bz2 archive - ''' - def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): - super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) - self.compression = ':bz2' diff --git a/MAT/audio.py b/MAT/audio.py deleted file mode 100644 index dae9d75..0000000 --- a/MAT/audio.py +++ /dev/null @@ -1,53 +0,0 @@ -''' Care about audio fileformat -''' - -try: - from mutagen.flac import FLAC - from mutagen.oggvorbis import OggVorbis -except ImportError: - pass - -import parser -import mutagenstripper - - -class MpegAudioStripper(parser.GenericParser): - ''' Represent mpeg audio file (mp3, ...) - ''' - def _should_remove(self, field): - return field.name in ("id3v1", "id3v2") - - -class OggStripper(mutagenstripper.MutagenStripper): - ''' Represent an ogg vorbis file - ''' - def _create_mfile(self): - self.mfile = OggVorbis(self.filename) - - -class FlacStripper(mutagenstripper.MutagenStripper): - ''' Represent a Flac audio file - ''' - def _create_mfile(self): - self.mfile = FLAC(self.filename) - - def remove_all(self): - ''' Remove the "metadata" block from the file - ''' - super(FlacStripper, self).remove_all() - self.mfile.clear_pictures() - self.mfile.save() - return True - - def is_clean(self): - ''' Check if the "metadata" block is present in the file - ''' - return super(FlacStripper, self).is_clean() and not self.mfile.pictures - - def get_meta(self): - ''' Return the content of the metadata block if present - ''' - metadata = super(FlacStripper, self).get_meta() - if self.mfile.pictures: - metadata['picture:'] = 'yes' - return metadata diff --git a/MAT/bencode/__init__.py b/MAT/bencode/__init__.py deleted file mode 100644 index 8b13789..0000000 --- a/MAT/bencode/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/MAT/bencode/bencode.py b/MAT/bencode/bencode.py deleted file mode 100644 index a0cc99a..0000000 --- a/MAT/bencode/bencode.py +++ /dev/null @@ -1,143 +0,0 @@ -# Copyright 2007 by Petru Paler -# Copyright 2011 by Julien (jvoisin) Voisin -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -# - -''' - A quick (and also nice) lib to bencode/bdecode torrent files -''' - - -class BTFailure(Exception): - '''Custom Exception''' - pass - - -class Bencached(object): - '''Custom type : cached string''' - __slots__ = ['bencoded'] - - def __init__(self, string): - self.bencoded = string - - -def decode_int(x, f): - '''decode an int''' - f += 1 - newf = x.index('e', f) - if x[f:f+1] == '-0': - raise ValueError - elif x[f] == '0' and newf != f + 1: - raise ValueError - return int(x[f:newf]), newf + 1 - - -def decode_string(x, f): - '''decode a string''' - colon = x.index(':', f) - if x[f] == '0' and colon != f + 1: - raise ValueError - n = int(x[f:colon]) - colon += 1 - return x[colon:colon + n], colon + n - - -def decode_list(x, f): - '''decode a list''' - result = [] - f += 1 - while x[f] != 'e': - v, f = DECODE_FUNC[x[f]](x, f) - result.append(v) - return result, f + 1 - - -def decode_dict(x, f): - '''decode a dict''' - result = {} - f += 1 - while x[f] != 'e': - k, f = decode_string(x, f) - result[k], f = DECODE_FUNC[x[f]](x, f) - return result, f + 1 - - -def encode_bool(x, r): - '''bencode a boolean''' - encode_int(1 if r else 0, r) - - -def encode_int(x, r): - '''bencode an integer/float''' - r.extend(('i', str(x), 'e')) - - -def encode_list(x, r): - '''bencode a list/tuple''' - r.append('l') - [ENCODE_FUNC[type(item)](item, r) for item in x] - r.append('e') - - -def encode_dict(x, result): - '''bencode a dict''' - result.append('d') - ilist = list(x.items()) - ilist.sort() - for k, v in ilist: - result.extend((str(len(k)), ':', k)) - ENCODE_FUNC[type(v)](v, result) - result.append('e') - - -DECODE_FUNC = {str(x):decode_string for x in range(9)} -DECODE_FUNC['l'] = decode_list -DECODE_FUNC['d'] = decode_dict -DECODE_FUNC['i'] = decode_int - - -ENCODE_FUNC = {} -ENCODE_FUNC[Bencached] = lambda x, r: r.append(x.bencoded) -ENCODE_FUNC[int] = encode_int -ENCODE_FUNC[int] = encode_int -ENCODE_FUNC[bytes] = lambda x, r: r.extend((str(len(x)), ':', x)) -ENCODE_FUNC[list] = encode_list -ENCODE_FUNC[tuple] = encode_list -ENCODE_FUNC[dict] = encode_dict -ENCODE_FUNC[bool] = encode_bool - - -def bencode(string): - '''bencode $string''' - table = [] - ENCODE_FUNC[type(string)](string, table) - return ''.join(table) - - -def bdecode(string): - '''decode $string''' - try: - result, lenght = DECODE_FUNC[string[0]](string, 0) - except (IndexError, KeyError, ValueError): - raise BTFailure('Not a valid bencoded string') - if lenght != len(string): - raise BTFailure('Invalid bencoded value (data after valid prefix)') - return result diff --git a/MAT/exceptions.py b/MAT/exceptions.py deleted file mode 100644 index 47da15c..0000000 --- a/MAT/exceptions.py +++ /dev/null @@ -1,14 +0,0 @@ -''' Base exceptions for MAT -''' - - -class UnableToRemoveFile(Exception): - '''This exception is raised when a file could not be removed - ''' - pass - -class UnableToWriteFile(Exception): - '''This exception is raised when a file - can could not be chmod +w - ''' - pass diff --git a/MAT/exiftool.py b/MAT/exiftool.py deleted file mode 100644 index 9e38f04..0000000 --- a/MAT/exiftool.py +++ /dev/null @@ -1,78 +0,0 @@ -''' Care about images with help of the amazing (perl) library Exiftool. -''' - -import parser -import subprocess - - -class ExiftoolStripper(parser.GenericParser): - ''' A generic stripper class using exiftool as backend - ''' - - def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): - super(ExiftoolStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) - self.allowed = set(['ExifTool Version Number', 'File Name', 'Directory', - 'File Size', 'File Modification Date/Time', 'File Access Date/Time', 'File Permissions', - 'File Type', 'MIME Type', 'Image Width', 'Image Height', - 'Image Size', 'File Inode Change Date/Time']) - self._set_allowed() - - def _set_allowed(self): - ''' Virtual method. Set the allowed/harmless list of metadata - ''' - raise NotImplementedError - - def remove_all(self): - ''' Remove all metadata with help of exiftool - ''' - try: - if self.backup: - self.create_backup_copy() - # Note: '-All=' must be followed by a known exiftool option. - subprocess.call(['exiftool', '-m', '-all=', - '-adobe=', '-overwrite_original', self.filename], - stdout=open('/dev/null')) - return True - except: - return False - - def is_clean(self): - ''' Check if the file is clean with the help of exiftool - ''' - return not self.get_meta() - - def get_meta(self): - ''' Return every harmful meta with help of exiftool. - Exiftool output looks like this: - field name : value - field name : value - ''' - output = subprocess.Popen(['exiftool', self.filename], - stdout=subprocess.PIPE).communicate()[0] - meta = {} - for i in output.split('\n')[:-1]: # chop last char ('\n') - key = i.split(':')[0].strip() - if key not in self.allowed: - meta[key] = i.split(':')[1].strip() # add the field name to the metadata set - return meta - - -class JpegStripper(ExiftoolStripper): - ''' Care about jpeg files with help - of exiftool - ''' - def _set_allowed(self): - self.allowed.update(['JFIF Version', 'Resolution Unit', - 'X Resolution', 'Y Resolution', 'Encoding Process', - 'Bits Per Sample', 'Color Components', 'Y Cb Cr Sub Sampling']) - - -class PngStripper(ExiftoolStripper): - ''' Care about png files with help - of exiftool - ''' - def _set_allowed(self): - self.allowed.update(['Bit Depth', 'Color Type', - 'Compression', 'Filter', 'Interlace', 'Pixels Per Unit X', - 'Pixels Per Unit Y', 'Pixel Units', 'Significant Bits', - 'Background Color', 'SRGB Rendering']) diff --git a/MAT/hachoir_editor/__init__.py b/MAT/hachoir_editor/__init__.py deleted file mode 100644 index 1835676..0000000 --- a/MAT/hachoir_editor/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from field import ( - EditorError, FakeField) -from typed_field import ( - EditableField, EditableBits, EditableBytes, - EditableInteger, EditableString, - createEditableField) -from fieldset import EditableFieldSet, NewFieldSet, createEditor - diff --git a/MAT/hachoir_editor/field.py b/MAT/hachoir_editor/field.py deleted file mode 100644 index 6b1efe3..0000000 --- a/MAT/hachoir_editor/field.py +++ /dev/null @@ -1,69 +0,0 @@ -from hachoir_core.error import HachoirError -from hachoir_core.field import joinPath, MissingField - -class EditorError(HachoirError): - pass - -class FakeField(object): - """ - This class have API looks similar to Field API, but objects don't contain - any value: all values are _computed_ by parent methods. - - Example: FakeField(editor, "abc").size calls editor._getFieldSize("abc"). - """ - is_field_set = False - - def __init__(self, parent, name): - self._parent = parent - self._name = name - - def _getPath(self): - return joinPath(self._parent.path, self._name) - path = property(_getPath) - - def _getName(self): - return self._name - name = property(_getName) - - def _getAddress(self): - return self._parent._getFieldAddress(self._name) - address = property(_getAddress) - - def _getSize(self): - return self._parent.input[self._name].size - size = property(_getSize) - - def _getValue(self): - return self._parent.input[self._name].value - value = property(_getValue) - - def createDisplay(self): - # TODO: Returns new value if field is altered - return self._parent.input[self._name].display - display = property(createDisplay) - - def _getParent(self): - return self._parent - parent = property(_getParent) - - def hasValue(self): - return self._parent.input[self._name].hasValue() - - def __getitem__(self, key): - # TODO: Implement this function! - raise MissingField(self, key) - - def _isAltered(self): - return False - is_altered = property(_isAltered) - - def writeInto(self, output): - size = self.size - addr = self._parent._getFieldInputAddress(self._name) - input = self._parent.input - stream = input.stream - if size % 8: - output.copyBitsFrom(stream, addr, size, input.endian) - else: - output.copyBytesFrom(stream, addr, size//8) - diff --git a/MAT/hachoir_editor/fieldset.py b/MAT/hachoir_editor/fieldset.py deleted file mode 100644 index b7c9b07..0000000 --- a/MAT/hachoir_editor/fieldset.py +++ /dev/null @@ -1,352 +0,0 @@ -from hachoir_core.dict import UniqKeyError -from hachoir_core.field import MissingField, Float32, Float64, FakeArray -from hachoir_core.compatibility import any -from hachoir_core.i18n import _ -from typed_field import createEditableField -from field import EditorError -from collections import deque # Python 2.4 -import weakref # Python 2.1 -import struct - -class EditableFieldSet(object): - MAX_SIZE = (1 << 40) # Arbitrary limit to catch errors - is_field_set = True - - def __init__(self, parent, fieldset): - self._parent = parent - self.input = fieldset # original FieldSet - self._fields = {} # cache of editable fields - self._deleted = set() # Names of deleted fields - self._inserted = {} # Inserted field (name => list of field, - # where name is the name after) - - def array(self, key): - # FIXME: Use cache? - return FakeArray(self, key) - - def _getParent(self): - return self._parent - parent = property(_getParent) - - def _isAltered(self): - if self._inserted: - return True - if self._deleted: - return True - return any(field.is_altered for field in self._fields.itervalues()) - is_altered = property(_isAltered) - - def reset(self): - """ - Reset the field set and the input field set. - """ - for key, field in self._fields.iteritems(): - if not field.is_altered: - del self._fields[key] - self.input.reset() - - def __len__(self): - return len(self.input) \ - - len(self._deleted) \ - + sum( len(new) for new in self._inserted.itervalues() ) - - def __iter__(self): - for field in self.input: - name = field.name - if name in self._inserted: - for newfield in self._inserted[name]: - yield weakref.proxy(newfield) - if name not in self._deleted: - yield self[name] - if None in self._inserted: - for newfield in self._inserted[None]: - yield weakref.proxy(newfield) - - def insertBefore(self, name, *new_fields): - self._insert(name, new_fields, False) - - def insertAfter(self, name, *new_fields): - self._insert(name, new_fields, True) - - def insert(self, *new_fields): - self._insert(None, new_fields, True) - - def _insert(self, key, new_fields, next): - """ - key is the name of the field before which new_fields - will be inserted. If next is True, the fields will be inserted - _after_ this field. - """ - # Set unique field name - for field in new_fields: - if field._name.endswith("[]"): - self.input.setUniqueFieldName(field) - - # Check that there is no duplicate in inserted fields - new_names = list(field.name for field in new_fields) - names_set = set(new_names) - if len(names_set) != len(new_fields): - duplicates = (name for name in names_set if 1 < new_names.count(name)) - raise UniqKeyError(_("Duplicates in inserted fields: %s") % ", ".join(duplicates)) - - # Check that field names are not in input - if self.input: # Write special version for NewFieldSet? - for name in new_names: - if name in self.input and name not in self._deleted: - raise UniqKeyError(_("Field name '%s' already exists") % name) - - # Check that field names are not in inserted fields - for fields in self._inserted.itervalues(): - for field in fields: - if field.name in new_names: - raise UniqKeyError(_("Field name '%s' already exists") % field.name) - - # Input have already inserted field? - if key in self._inserted: - if next: - self._inserted[key].extend( reversed(new_fields) ) - else: - self._inserted[key].extendleft( reversed(new_fields) ) - return - - # Whould like to insert in inserted fields? - if key: - for fields in self._inserted.itervalues(): - names = [item.name for item in fields] - try: - pos = names.index(key) - except ValueError: - continue - if 0 <= pos: - if next: - pos += 1 - fields.rotate(-pos) - fields.extendleft( reversed(new_fields) ) - fields.rotate(pos) - return - - # Get next field. Use None if we are at the end. - if next: - index = self.input[key].index + 1 - try: - key = self.input[index].name - except IndexError: - key = None - - # Check that field names are not in input - if key not in self.input: - raise MissingField(self, key) - - # Insert in original input - self._inserted[key]= deque(new_fields) - - def _getDescription(self): - return self.input.description - description = property(_getDescription) - - def _getStream(self): - # FIXME: This property is maybe a bad idea since address may be differents - return self.input.stream - stream = property(_getStream) - - def _getName(self): - return self.input.name - name = property(_getName) - - def _getEndian(self): - return self.input.endian - endian = property(_getEndian) - - def _getAddress(self): - if self._parent: - return self._parent._getFieldAddress(self.name) - else: - return 0 - address = property(_getAddress) - - def _getAbsoluteAddress(self): - address = self.address - current = self._parent - while current: - address += current.address - current = current._parent - return address - absolute_address = property(_getAbsoluteAddress) - - def hasValue(self): - return False -# return self._parent.input[self.name].hasValue() - - def _getSize(self): - if self.is_altered: - return sum(field.size for field in self) - else: - return self.input.size - size = property(_getSize) - - def _getPath(self): - return self.input.path - path = property(_getPath) - - def _getOriginalField(self, name): - assert name in self.input - return self.input[name] - - def _getFieldInputAddress(self, name): - """ - Absolute address of a field from the input field set. - """ - assert name in self.input - return self.input[name].absolute_address - - def _getFieldAddress(self, name): - """ - Compute relative address of a field. The operation takes care of - deleted and resized fields. - """ - #assert name not in self._deleted - addr = 0 - for field in self: - if field.name == name: - return addr - addr += field.size - raise MissingField(self, name) - - def _getItemByPath(self, path): - if not path[0]: - path = path[1:] - field = self - for name in path: - field = field[name] - return field - - def __contains__(self, name): - try: - field = self[name] - return (field is not None) - except MissingField: - return False - - def __getitem__(self, key): - """ - Create a weak reference to an editable field (EditableField) for the - field with specified name. If the field is removed later, using the - editable field will raise a weakref.ReferenceError exception. - - May raise a MissingField error if the field doesn't exist in original - field set or it has been deleted. - """ - if "/" in key: - return self._getItemByPath(key.split("/")) - if isinstance(key, (int, long)): - raise EditorError("Integer index are not supported") - - if (key in self._deleted) or (key not in self.input): - raise MissingField(self, key) - if key not in self._fields: - field = self.input[key] - if field.is_field_set: - self._fields[key] = createEditableFieldSet(self, field) - else: - self._fields[key] = createEditableField(self, field) - return weakref.proxy(self._fields[key]) - - def __delitem__(self, name): - """ - Remove a field from the field set. May raise an MissingField exception - if the field has already been deleted. - """ - parts = name.partition('/') - if parts[2]: - fieldset = self[parts[0]] - del fieldset[parts[2]] - return - if name in self._deleted: - raise MissingField(self, name) - self._deleted.add(name) - if name in self._fields: - del self._fields[name] - - def writeInto(self, output): - """ - Write the content if this field set into the output stream - (OutputStream). - """ - if not self.is_altered: - # Not altered: just copy bits/bytes - input = self.input - if input.size % 8: - output.copyBitsFrom(input.stream, - input.absolute_address, input.size, input.endian) - else: - output.copyBytesFrom(input.stream, - input.absolute_address, input.size//8) - else: - # Altered: call writeInto() method of each field - realaddr = 0 - for field in self: - field.writeInto(output) - realaddr += field.size - - def _getValue(self): - raise EditorError('Field set "%s" has no value' % self.path) - def _setValue(self, value): - raise EditorError('Field set "%s" value is read only' % self.path) - value = property(_getValue, _setValue, "Value of field") - -class EditableFloat(EditableFieldSet): - _value = None - - def _isAltered(self): - return (self._value is not None) - is_altered = property(_isAltered) - - def writeInto(self, output): - if self._value is not None: - self._write(output) - else: - EditableFieldSet.writeInto(self, output) - - def _write(self, output): - format = self.input.struct_format - raw = struct.pack(format, self._value) - output.writeBytes(raw) - - def _setValue(self, value): - self.parent._is_altered = True - self._value = value - value = property(EditableFieldSet._getValue, _setValue) - -def createEditableFieldSet(parent, field): - cls = field.__class__ - # FIXME: Support Float80 - if cls in (Float32, Float64): - return EditableFloat(parent, field) - else: - return EditableFieldSet(parent, field) - -class NewFieldSet(EditableFieldSet): - def __init__(self, parent, name): - EditableFieldSet.__init__(self, parent, None) - self._name = name - self._endian = parent.endian - - def __iter__(self): - if None in self._inserted: - return iter(self._inserted[None]) - else: - raise StopIteration() - - def _getName(self): - return self._name - name = property(_getName) - - def _getEndian(self): - return self._endian - endian = property(_getEndian) - - is_altered = property(lambda self: True) - -def createEditor(fieldset): - return EditableFieldSet(None, fieldset) - diff --git a/MAT/hachoir_editor/typed_field.py b/MAT/hachoir_editor/typed_field.py deleted file mode 100644 index 0f0427b..0000000 --- a/MAT/hachoir_editor/typed_field.py +++ /dev/null @@ -1,253 +0,0 @@ -from hachoir_core.field import ( - RawBits, Bit, Bits, PaddingBits, - RawBytes, Bytes, PaddingBytes, - GenericString, Character, - isInteger, isString) -from field import FakeField - -class EditableField(FakeField): - """ - Pure virtual class used to write editable field class. - """ - - _is_altered = False - def __init__(self, parent, name, value=None): - FakeField.__init__(self, parent, name) - self._value = value - - def _isAltered(self): - return self._is_altered - is_altered = property(_isAltered) - - def hasValue(self): - return True - - def _computeSize(self): - raise NotImplementedError() - def _getValue(self): - return self._value - def _setValue(self, value): - self._value = value - - def _propGetValue(self): - if self._value is not None: - return self._getValue() - else: - return FakeField._getValue(self) - def _propSetValue(self, value): - self._setValue(value) - self._is_altered = True - value = property(_propGetValue, _propSetValue) - - def _getSize(self): - if self._value is not None: - return self._computeSize() - else: - return FakeField._getSize(self) - size = property(_getSize) - - def _write(self, output): - raise NotImplementedError() - - def writeInto(self, output): - if self._is_altered: - self._write(output) - else: - return FakeField.writeInto(self, output) - -class EditableFixedField(EditableField): - """ - Editable field with fixed size. - """ - - def __init__(self, parent, name, value=None, size=None): - EditableField.__init__(self, parent, name, value) - if size is not None: - self._size = size - else: - self._size = self._parent._getOriginalField(self._name).size - - def _getSize(self): - return self._size - size = property(_getSize) - -class EditableBits(EditableFixedField): - def __init__(self, parent, name, *args): - if args: - if len(args) != 2: - raise TypeError( - "Wrong argument count, EditableBits constructor prototype is: " - "(parent, name, [size, value])") - size = args[0] - value = args[1] - assert isinstance(value, (int, long)) - else: - size = None - value = None - EditableFixedField.__init__(self, parent, name, value, size) - if args: - self._setValue(args[1]) - self._is_altered = True - - def _setValue(self, value): - if not(0 <= value < (1 << self._size)): - raise ValueError("Invalid value, must be in range %s..%s" - % (0, (1 << self._size) - 1)) - self._value = value - - def _write(self, output): - output.writeBits(self._size, self._value, self._parent.endian) - -class EditableBytes(EditableField): - def _setValue(self, value): - if not value: raise ValueError( - "Unable to set empty string to a EditableBytes field") - self._value = value - - def _computeSize(self): - return len(self._value) * 8 - - def _write(self, output): - output.writeBytes(self._value) - -class EditableString(EditableField): - MAX_SIZE = { - "Pascal8": (1 << 8)-1, - "Pascal16": (1 << 16)-1, - "Pascal32": (1 << 32)-1, - } - - def __init__(self, parent, name, *args, **kw): - if len(args) == 2: - value = args[1] - assert isinstance(value, str) # TODO: support Unicode - elif not args: - value = None - else: - raise TypeError( - "Wrong argument count, EditableString constructor prototype is:" - "(parent, name, [format, value])") - EditableField.__init__(self, parent, name, value) - if len(args) == 2: - self._charset = kw.get('charset', None) - self._format = args[0] - if self._format in GenericString.PASCAL_FORMATS: - self._prefix_size = GenericString.PASCAL_FORMATS[self._format] - else: - self._prefix_size = 0 - self._suffix_str = GenericString.staticSuffixStr( - self._format, self._charset, self._parent.endian) - self._is_altered = True - else: - orig = self._parent._getOriginalField(name) - self._charset = orig.charset - self._format = orig.format - self._prefix_size = orig.content_offset - self._suffix_str = orig.suffix_str - - def _setValue(self, value): - size = len(value) - if self._format in self.MAX_SIZE and self.MAX_SIZE[self._format] < size: - raise ValueError("String is too big") - self._value = value - - def _computeSize(self): - return (self._prefix_size + len(self._value) + len(self._suffix_str))*8 - - def _write(self, output): - if self._format in GenericString.SUFFIX_FORMAT: - output.writeBytes(self._value) - output.writeBytes(self._suffix_str) - elif self._format == "fixed": - output.writeBytes(self._value) - else: - assert self._format in GenericString.PASCAL_FORMATS - size = GenericString.PASCAL_FORMATS[self._format] - output.writeInteger(len(self._value), False, size, self._parent.endian) - output.writeBytes(self._value) - -class EditableCharacter(EditableFixedField): - def __init__(self, parent, name, *args): - if args: - if len(args) != 3: - raise TypeError( - "Wrong argument count, EditableCharacter " - "constructor prototype is: (parent, name, [value])") - value = args[0] - if not isinstance(value, str) or len(value) != 1: - raise TypeError("EditableCharacter needs a character") - else: - value = None - EditableFixedField.__init__(self, parent, name, value, 8) - if args: - self._is_altered = True - - def _setValue(self, value): - if not isinstance(value, str) or len(value) != 1: - raise TypeError("EditableCharacter needs a character") - self._value = value - - def _write(self, output): - output.writeBytes(self._value) - -class EditableInteger(EditableFixedField): - VALID_VALUE_SIGNED = { - 8: (-(1 << 8), (1 << 8)-1), - 16: (-(1 << 15), (1 << 15)-1), - 32: (-(1 << 31), (1 << 31)-1), - } - VALID_VALUE_UNSIGNED = { - 8: (0, (1 << 8)-1), - 16: (0, (1 << 16)-1), - 32: (0, (1 << 32)-1) - } - - def __init__(self, parent, name, *args): - if args: - if len(args) != 3: - raise TypeError( - "Wrong argument count, EditableInteger constructor prototype is: " - "(parent, name, [signed, size, value])") - size = args[1] - value = args[2] - assert isinstance(value, (int, long)) - else: - size = None - value = None - EditableFixedField.__init__(self, parent, name, value, size) - if args: - self._signed = args[0] - self._is_altered = True - else: - self._signed = self._parent._getOriginalField(self._name).signed - - def _setValue(self, value): - if self._signed: - valid = self.VALID_VALUE_SIGNED - else: - valid = self.VALID_VALUE_UNSIGNED - minval, maxval = valid[self._size] - if not(minval <= value <= maxval): - raise ValueError("Invalid value, must be in range %s..%s" - % (minval, maxval)) - self._value = value - - def _write(self, output): - output.writeInteger( - self.value, self._signed, self._size//8, self._parent.endian) - -def createEditableField(fieldset, field): - if isInteger(field): - cls = EditableInteger - elif isString(field): - cls = EditableString - elif field.__class__ in (RawBytes, Bytes, PaddingBytes): - cls = EditableBytes - elif field.__class__ in (RawBits, Bits, Bit, PaddingBits): - cls = EditableBits - elif field.__class__ == Character: - cls = EditableCharacter - else: - cls = FakeField - return cls(fieldset, field.name) - diff --git a/MAT/images.py b/MAT/images.py deleted file mode 100644 index 67c710f..0000000 --- a/MAT/images.py +++ /dev/null @@ -1,52 +0,0 @@ -''' Takes care about pictures formats - -References: - - JFIF: http://www.ecma-international.org/publications/techreports/E-TR-098.htm - - PNG: http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html - - PNG: http://www.w3.org/TR/PNG-Chunks.html -''' - -import parser - - -class JpegStripper(parser.GenericParser): - ''' Represents a jpeg file. - Custom Huffman and Quantization tables - are stripped: they may leak - some info, and the quality loss is minor. - ''' - def _should_remove(self, field): - ''' Return True if the field is compromising - ''' - field_list = frozenset([ - 'start_image', # start of the image - 'app0', # JFIF data - 'start_frame', # specify width, height, number of components - 'start_scan', # specify which slice of data the top-to-bottom scan contains - 'data', # actual data - 'end_image']) # end of the image - if field.name in field_list: - return False - elif field.name.startswith('quantization['): # custom Quant. tables - return False - elif field.name.startswith('huffman['): # custom Huffman tables - return False - return True - - -class PngStripper(parser.GenericParser): - ''' Represents a png file - ''' - def _should_remove(self, field): - ''' Return True if the field is compromising - ''' - field_list = frozenset([ - 'id', - 'header', # PNG header - 'physical', # the intended pixel size or aspect ratio - 'end']) # end of the image - if field.name in field_list: - return False - if field.name.startswith('data['): # data - return False - return True diff --git a/MAT/mat.py b/MAT/mat.py deleted file mode 100644 index 5b1fbda..0000000 --- a/MAT/mat.py +++ /dev/null @@ -1,186 +0,0 @@ -#!/usr/bin/env python - -''' Metadata anonymisation toolkit library -''' - -import logging -import mimetypes -import os -import subprocess -import xml.sax - -import hachoir_core.cmd_line -import hachoir_parser - -import MAT.exceptions - -__version__ = '0.5.2' -__author__ = 'jvoisin' - -#Silence -LOGGING_LEVEL = logging.CRITICAL -hachoir_core.config.quiet = True -fname = '' - -#Verbose -#LOGGING_LEVEL = logging.DEBUG -#hachoir_core.config.quiet = False -#logname = 'report.log' - -logging.basicConfig(filename=fname, level=LOGGING_LEVEL) - -import strippers # this is loaded here because we need LOGGING_LEVEL - - -def get_logo(): - ''' Return the path to the logo - ''' - if os.path.isfile('./data/mat.png'): - return './data/mat.png' - elif os.path.isfile('/usr/share/pixmaps/mat.png'): - return '/usr/share/pixmaps/mat.png' - elif os.path.isfile('/usr/local/share/pixmaps/mat.png'): - return '/usr/local/share/pixmaps/mat.png' - - -def get_datadir(): - ''' Return the path to the data directory - ''' - if os.path.isdir('./data/'): - return './data/' - elif os.path.isdir('/usr/local/share/mat/'): - return '/usr/local/share/mat/' - elif os.path.isdir('/usr/share/mat/'): - return '/usr/share/mat/' - - -def list_supported_formats(): - ''' Return a list of all locally supported fileformat. - It parses that FORMATS file, and removes locally - non-supported formats. - ''' - handler = XMLParser() - parser = xml.sax.make_parser() - parser.setContentHandler(handler) - path = os.path.join(get_datadir(), 'FORMATS') - with open(path, 'r') as xmlfile: - parser.parse(xmlfile) - - localy_supported = [] - for item in handler.list: - if item['mimetype'].split(',')[0] in strippers.STRIPPERS: - localy_supported.append(item) - - return localy_supported - - -class XMLParser(xml.sax.handler.ContentHandler): - ''' Parse the supported format xml, and return a corresponding - list of dict - ''' - def __init__(self): - self.dict = {} - self.list = [] - self.content, self.key = '', '' - self.between = False - - def startElement(self, name, attrs): - ''' Called when entering into xml tag - ''' - self.between = True - self.key = name - self.content = '' - - def endElement(self, name): - ''' Called when exiting a xml tag - ''' - if name == 'format': # leaving a fileformat section - self.list.append(self.dict.copy()) - self.dict.clear() - else: - content = self.content.replace('\s', ' ') - self.dict[self.key] = content - self.between = False - - def characters(self, characters): - ''' Concatenate the content between opening and closing tags - ''' - if self.between: - self.content += characters - - -def secure_remove(filename): - ''' Securely remove the file - ''' - # I want the file removed, even if it's ro - try: - os.chmod(filename, 220) - except OSError: - logging.error('Unable to add write rights to %s' % filename) - raise MAT.exceptions.UnableToWriteFile - - try: - if not subprocess.call(['shred', '--remove', filename]): - return True - else: - raise OSError - except OSError: - logging.error('Unable to securely remove %s' % filename) - - try: - os.remove(filename) - except OSError: - logging.error('Unable to remove %s' % filename) - raise MAT.exceptions.UnableToRemoveFile - - return True - - -def create_class_file(name, backup, **kwargs): - ''' Return a $FILETYPEStripper() class, - corresponding to the filetype of the given file - ''' - if not os.path.isfile(name): # check if the file exists - logging.error('%s is not a valid file' % name) - return None - - if not os.access(name, os.R_OK): # check read permissions - logging.error('%s is is not readable' % name) - return None - - if not os.path.getsize(name): - #check if the file is not empty (hachoir crash on empty files) - logging.error('%s is empty' % name) - return None - - filename = '' - try: - filename = hachoir_core.cmd_line.unicodeFilename(name) - except TypeError: # get rid of "decoding Unicode is not supported" - filename = name - - parser = hachoir_parser.createParser(filename) - if not parser: - logging.info('Unable to parse %s' % filename) - return None - - mime = parser.mime_type - - if mime == 'application/zip': # some formats are zipped stuff - if mimetypes.guess_type(name)[0]: - mime = mimetypes.guess_type(name)[0] - - if mime.startswith('application/vnd.oasis.opendocument'): - mime = 'application/opendocument' # opendocument fileformat - elif mime.startswith('application/vnd.openxmlformats-officedocument'): - mime = 'application/officeopenxml' # office openxml - - is_writable = os.access(name, os.W_OK) - - try: - stripper_class = strippers.STRIPPERS[mime] - except KeyError: - logging.info('Don\'t have stripper for %s format' % mime) - return None - - return stripper_class(filename, parser, mime, backup, is_writable, **kwargs) diff --git a/MAT/misc.py b/MAT/misc.py deleted file mode 100644 index 450f381..0000000 --- a/MAT/misc.py +++ /dev/null @@ -1,76 +0,0 @@ -''' Care about misc formats -''' - -import parser - -from bencode import bencode - - -class TorrentStripper(parser.GenericParser): - ''' Represent a torrent file with the help - of the bencode lib from Petru Paler - ''' - def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): - super(TorrentStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) - self.fields = frozenset(['announce', 'info', 'name', 'path', 'piece length', 'pieces', - 'length', 'files', 'announce-list', 'nodes', 'httpseeds', 'private', 'root hash']) - - def __get_key_recursively(self, dictionary): - ''' Get recursively all keys from a dict and - its subdicts - ''' - for i, j in list(dictionary.items()): - if isinstance(j, dict): - return set([i]).union(self.__get_key_recursively(j)) - return set([i]) - - def is_clean(self): - ''' Check if the file is clean from harmful metadata - ''' - with open(self.filename, 'r') as f: - decoded = bencode.bdecode(f.read()) - return self.fields.issuperset(self.__get_key_recursively(decoded)) - - def __get_meta_recursively(self, dictionary): - ''' Get recursively all harmful metadata - ''' - d = dict() - for i, j in list(dictionary.items()): - if i not in self.fields: - d[i] = j - elif isinstance(j, dict): - d = dict(d.items() + list(self.__get_meta_recursively(j).items())) - return d - - def get_meta(self): - ''' Return a dict with all the meta of the file - ''' - with open(self.filename, 'r') as f: - decoded = bencode.bdecode(f.read()) - return self.__get_meta_recursively(decoded) - - def __remove_all_recursively(self, dictionary): - ''' Remove recursively all compromizing fields - ''' - d = dict() - for i, j in [i for i in list(dictionary.items()) if i in self.fields]: - if isinstance(j, dict): - d = dict(list(d.items()) + list(self.__get_meta_recursively(j).items())) - else: - d[i] = j - return d - - def remove_all(self): - ''' Remove all comprimizing fields - ''' - decoded = '' - with open(self.filename, 'r') as f: - decoded = bencode.bdecode(f.read()) - - cleaned = {i: j for i, j in list(decoded.items()) if i in self.fields} - - with open(self.output, 'w') as f: # encode the decoded torrent - f.write(bencode.bencode(cleaned)) # and write it in self.output - - self.do_backup() - return True diff --git a/MAT/mutagenstripper.py b/MAT/mutagenstripper.py deleted file mode 100644 index 403c9a7..0000000 --- a/MAT/mutagenstripper.py +++ /dev/null @@ -1,33 +0,0 @@ -''' Take care of mutagen-supported formats (audio) -''' - -import parser - - -class MutagenStripper(parser.GenericParser): - def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): - super(MutagenStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) - self._create_mfile() - - def _create_mfile(self): - raise NotImplementedError - - def is_clean(self): - return not self.mfile.tags - - def remove_all(self): - if self.backup: - self.create_backup_copy() - self.mfile.delete() - self.mfile.save() - return True - - def get_meta(self): - ''' - Return the content of the metadata block is present - ''' - metadata = {} - if self.mfile.tags: - for key, value in self.mfile.tags: - metadata[key] = value - return metadata diff --git a/MAT/office.py b/MAT/office.py deleted file mode 100644 index 0ca1ff1..0000000 --- a/MAT/office.py +++ /dev/null @@ -1,191 +0,0 @@ -''' Care about office's formats - -''' - -import logging -import os -import shutil -import tempfile -import xml.dom.minidom as minidom -import zipfile - -try: - import cairo - from gi.repository import Poppler -except ImportError: - logging.info('office.py loaded without PDF support') - pass - -import parser -import archive - - -class OpenDocumentStripper(archive.TerminalZipStripper): - ''' An open document file is a zip, with xml file into. - The one that interest us is meta.xml - ''' - - def get_meta(self): - ''' Return a dict with all the meta of the file by - trying to read the meta.xml file. - ''' - metadata = super(OpenDocumentStripper, self).get_meta() - zipin = zipfile.ZipFile(self.filename, 'r') - try: - content = zipin.read('meta.xml') - dom1 = minidom.parseString(content) - elements = dom1.getElementsByTagName('office:meta') - for i in elements[0].childNodes: - if i.tagName != 'meta:document-statistic': - nodename = ''.join(i.nodeName.split(':')[1:]) - metadata[nodename] = ''.join([j.data for j in i.childNodes]) - else: - # thank you w3c for not providing a nice - # method to get all attributes of a node - pass - except KeyError: # no meta.xml file found - logging.debug('%s has no opendocument metadata' % self.filename) - zipin.close() - return metadata - - def remove_all(self): - ''' Removes metadata - ''' - return super(OpenDocumentStripper, self).remove_all(ending_blacklist=['meta.xml']) - - def is_clean(self): - ''' Check if the file is clean from harmful metadatas - ''' - clean_super = super(OpenDocumentStripper, self).is_clean() - if clean_super is False: - return False - - zipin = zipfile.ZipFile(self.filename, 'r') - try: - zipin.getinfo('meta.xml') - except KeyError: # no meta.xml in the file - return True - zipin.close() - return False - - -class OpenXmlStripper(archive.TerminalZipStripper): - ''' Represent an office openxml document, which is like - an opendocument format, with some tricky stuff added. - It contains mostly xml, but can have media blobs, crap, ... - (I don't like this format.) - ''' - def remove_all(self): - return super(OpenXmlStripper, self).remove_all( - beginning_blacklist=('docProps/'), whitelist=('.rels')) - - def is_clean(self): - ''' Check if the file is clean from harmful metadatas. - This implementation is faster than something like - "return this.get_meta() == {}". - ''' - clean_super = super(OpenXmlStripper, self).is_clean() - if clean_super is False: - return False - - zipin = zipfile.ZipFile(self.filename, 'r') - for item in zipin.namelist(): - if item.startswith('docProps/'): - return False - zipin.close() - return True - - def get_meta(self): - ''' Return a dict with all the meta of the file - ''' - metadata = super(OpenXmlStripper, self).get_meta() - - zipin = zipfile.ZipFile(self.filename, 'r') - for item in zipin.namelist(): - if item.startswith('docProps/'): - metadata[item] = 'harmful content' - zipin.close() - return metadata - - -class PdfStripper(parser.GenericParser): - ''' Represent a PDF file - ''' - def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): - super(PdfStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) - self.uri = 'file://' + os.path.abspath(self.filename) - self.password = None - try: - self.pdf_quality = kwargs['low_pdf_quality'] - except KeyError: - self.pdf_quality = False - - self.meta_list = frozenset(['title', 'author', 'subject', - 'keywords', 'creator', 'producer', 'metadata']) - - def is_clean(self): - ''' Check if the file is clean from harmful metadatas - ''' - document = Poppler.Document.new_from_file(self.uri, self.password) - for key in self.meta_list: - if document.get_property(key): - return False - return True - - def remove_all(self): - ''' Opening the PDF with poppler, then doing a render - on a cairo pdfsurface for each pages. - - http://cairographics.org/documentation/pycairo/2/ - - The use of an intermediate tempfile is necessary because - python-cairo segfaults on unicode. - See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=699457 - ''' - document = Poppler.Document.new_from_file(self.uri, self.password) - try: - output = tempfile.mkstemp()[1] - page = document.get_page(0) - # assume that every pages are the same size - page_width, page_height = page.get_size() - surface = cairo.PDFSurface(output, page_width, page_height) - context = cairo.Context(surface) # context draws on the surface - logging.debug('PDF rendering of %s' % self.filename) - for pagenum in range(document.get_n_pages()): - page = document.get_page(pagenum) - context.translate(0, 0) - if self.pdf_quality: - page.render(context) # render the page on context - else: - page.render_for_printing(context) # render the page on context - context.show_page() # draw context on surface - surface.finish() - shutil.move(output, self.output) - except: - logging.error('Something went wrong when cleaning %s.' % self.filename) - return False - - try: - import pdfrw # For now, poppler cannot write meta, so we must use pdfrw - logging.debug('Removing %s\'s superficial metadata' % self.filename) - trailer = pdfrw.PdfReader(self.output) - trailer.Info.Producer = None - trailer.Info.Creator = None - writer = pdfrw.PdfWriter() - writer.trailer = trailer - writer.write(self.output) - self.do_backup() - except: - logging.error('Unable to remove all metadata from %s, please install pdfrw' % self.output) - return False - return True - - def get_meta(self): - ''' Return a dict with all the meta of the file - ''' - document = Poppler.Document.new_from_file(self.uri, self.password) - metadata = {} - for key in self.meta_list: - if document.get_property(key): - metadata[key] = document.get_property(key) - return metadata diff --git a/MAT/parser.py b/MAT/parser.py deleted file mode 100644 index 1765da8..0000000 --- a/MAT/parser.py +++ /dev/null @@ -1,135 +0,0 @@ -''' Parent class of all parser -''' - -import os -import shutil -import tempfile - -import hachoir_core -import hachoir_editor - -import mat - -NOMETA = frozenset(( - '.bmp', # "raw" image - '.rdf', # text - '.txt', # plain text - '.xml', # formated text (XML) - '.rels', # openXML formated text -)) - -FIELD = object() - - -class GenericParser(object): - ''' Parent class of all parsers - ''' - def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): - self.filename = '' - self.parser = parser - self.mime = mime - self.backup = backup - self.is_writable = is_writable - self.editor = hachoir_editor.createEditor(parser) - try: - self.filename = hachoir_core.cmd_line.unicodeFilename(filename) - except TypeError: # get rid of "decoding Unicode is not supported" - self.filename = filename - self.basename = os.path.basename(filename) - _, output = tempfile.mkstemp() - self.output = hachoir_core.cmd_line.unicodeFilename(output) - - def __del__(self): - ''' Remove tempfile if it was not used - ''' - if os.path.exists(self.output): - mat.secure_remove(self.output) - - def is_clean(self): - ''' - Check if the file is clean from harmful metadatas - ''' - for field in self.editor: - if self._should_remove(field): - return self._is_clean(self.editor) - return True - - def _is_clean(self, fieldset): - for field in fieldset: - remove = self._should_remove(field) - if remove is True: - return False - if remove is FIELD: - if not self._is_clean(field): - return False - return True - - def remove_all(self): - ''' Remove all compromising fields - ''' - state = self._remove_all(self.editor) - hachoir_core.field.writeIntoFile(self.editor, self.output) - self.do_backup() - return state - - def _remove_all(self, fieldset): - ''' Recursive way to handle tree metadatas - ''' - try: - for field in fieldset: - remove = self._should_remove(field) - if remove is True: - self._remove(fieldset, field.name) - if remove is FIELD: - self._remove_all(field) - return True - except: - return False - - def _remove(self, fieldset, field): - ''' Delete the given field - ''' - del fieldset[field] - - def get_meta(self): - ''' Return a dict with all the meta of the file - ''' - metadata = {} - self._get_meta(self.editor, metadata) - return metadata - - def _get_meta(self, fieldset, metadata): - ''' Recursive way to handle tree metadatas - ''' - for field in fieldset: - remove = self._should_remove(field) - if remove: - try: - metadata[field.name] = field.value - except: - metadata[field.name] = 'harmful content' - if remove is FIELD: - self._get_meta(field, None) - - def _should_remove(self, key): - ''' Return True if the field is compromising - abstract method - ''' - raise NotImplementedError - - def create_backup_copy(self): - ''' Create a backup copy - ''' - shutil.copy2(self.filename, self.filename + '.bak') - - def do_backup(self): - ''' Keep a backup of the file if asked. - - The process of double-renaming is not very elegant, - but it greatly simplify new strippers implementation. - ''' - if self.backup: - shutil.move(self.filename, self.filename + '.bak') - else: - mat.secure_remove(self.filename) - shutil.move(self.output, self.filename) diff --git a/MAT/strippers.py b/MAT/strippers.py deleted file mode 100644 index aea98da..0000000 --- a/MAT/strippers.py +++ /dev/null @@ -1,70 +0,0 @@ -''' Manage which fileformat can be processed -''' - -import archive -import audio -import gi -import images -import logging -import mat -import misc -import office -import subprocess - -STRIPPERS = { - 'application/x-tar': archive.TarStripper, - 'application/x-bzip2': archive.Bzip2Stripper, - 'application/x-gzip': archive.GzipStripper, - 'application/zip': archive.ZipStripper, - 'audio/mpeg': audio.MpegAudioStripper, - 'application/x-bittorrent': misc.TorrentStripper, - 'application/opendocument': office.OpenDocumentStripper, - 'application/officeopenxml': office.OpenXmlStripper, -} - -logging.basicConfig(level=mat.LOGGING_LEVEL) - -# PDF support -pdfSupport = True -try: - from gi.repository import Poppler -except ImportError: - logging.info('Unable to import Poppler: no PDF support') - pdfSupport = False - -try: - import cairo -except ImportError: - logging.info('Unable to import python-cairo: no PDF support') - pdfSupport = False - -try: - import pdfrw -except ImportError: - logging.info('Unable to import python-pdfrw: no PDf support') - pdfSupport = False - -if pdfSupport: - STRIPPERS['application/x-pdf'] = office.PdfStripper - STRIPPERS['application/pdf'] = office.PdfStripper - - -# audio format support with mutagen-python -try: - import mutagen - STRIPPERS['audio/x-flac'] = audio.FlacStripper - STRIPPERS['audio/vorbis'] = audio.OggStripper - STRIPPERS['audio/mpeg'] = audio.MpegAudioStripper -except ImportError: - logging.info('Unable to import python-mutagen: limited audio format support') - -# exiftool -try: - subprocess.check_output(['exiftool', '-ver']) - import exiftool - STRIPPERS['image/jpeg'] = exiftool.JpegStripper - STRIPPERS['image/png'] = exiftool.PngStripper -except OSError: # if exiftool is not installed, use hachoir instead - logging.info('Unable to find exiftool: limited images support') - STRIPPERS['image/jpeg'] = images.JpegStripper - STRIPPERS['image/png'] = images.PngStripper diff --git a/RELEASE b/RELEASE index 3a34cd7..99e6fcf 100644 --- a/RELEASE +++ b/RELEASE @@ -9,10 +9,10 @@ update changelog: git log -> CHANGELOG update version number (Don't be affraid to use -rc or -beta) - MAT/mat.py -> __version__ = $VERSION + libmat/mat.py -> __version__ = $VERSION commit release changes - git commit CHANGELOG MAT/mat.py + git commit CHANGELOG libmat/mat.py create a tag git tag -s $VERSION diff --git a/libmat/__init__.py b/libmat/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/libmat/__init__.py @@ -0,0 +1 @@ + diff --git a/libmat/archive.py b/libmat/archive.py new file mode 100644 index 0000000..d483dcc --- /dev/null +++ b/libmat/archive.py @@ -0,0 +1,335 @@ +''' Take care of archives formats +''' + +import datetime +import logging +import os +import shutil +import stat +import tarfile +import tempfile +import zipfile + +import mat +import parser + +# Zip files do not support dates older than 01/01/1980 +ZIP_EPOCH = (1980, 1, 1, 0, 0, 0) +ZIP_EPOCH_SECONDS = (datetime.datetime(1980, 1, 1, 0, 0, 0) + - datetime.datetime(1970, 1, 1, 1, 0, 0)).total_seconds() + + +class GenericArchiveStripper(parser.GenericParser): + ''' Represent a generic archive + ''' + def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): + super(GenericArchiveStripper, self).__init__(filename, + parser, mime, backup, is_writable, **kwargs) + self.compression = '' + self.add2archive = kwargs['add2archive'] + self.tempdir = tempfile.mkdtemp() + + def __del__(self): + ''' Remove the files inside the temp dir, + then remove the temp dir + ''' + for root, dirs, files in os.walk(self.tempdir): + for item in files: + path_file = os.path.join(root, item) + mat.secure_remove(path_file) + shutil.rmtree(self.tempdir) + + def is_clean(self, list_unsupported=False): + ''' Virtual method to check for harmul metadata + ''' + raise NotImplementedError + + def list_unsupported(self): + ''' Get a list of every non-supported files present in the archive + ''' + return self.is_clean(list_unsupported=True) + + def remove_all(self): + ''' Virtual method to remove all metadata + ''' + raise NotImplementedError + + +class ZipStripper(GenericArchiveStripper): + ''' Represent a zip file + ''' + def __is_zipfile_clean(self, fileinfo): + ''' Check if a ZipInfo object is clean of metadata added + by zip itself, independently of the corresponding file metadata + ''' + if fileinfo.comment != '': + return False + elif fileinfo.date_time != ZIP_EPOCH: + return False + elif fileinfo.create_system != 3: # 3 is UNIX + return False + return True + + def is_clean(self, list_unsupported=False): + ''' Check if the given file is clean from harmful metadata + When list_unsupported is True, the method returns a list + of all non-supported/archives files contained in the + archive. + ''' + ret_list = [] + zipin = zipfile.ZipFile(self.filename, 'r') + if zipin.comment != '' and not list_unsupported: + logging.debug('%s has a comment' % self.filename) + return False + for item in zipin.infolist(): + zipin.extract(item, self.tempdir) + path = os.path.join(self.tempdir, item.filename) + if not self.__is_zipfile_clean(item) and not list_unsupported: + logging.debug('%s from %s has compromising zipinfo' % + (item.filename, self.filename)) + return False + if os.path.isfile(path): + cfile = mat.create_class_file(path, False, add2archive=self.add2archive) + if cfile is not None: + if not cfile.is_clean(): + logging.debug('%s from %s has metadata' % (item.filename, self.filename)) + if not list_unsupported: + return False + else: + logging.info('%s\'s fileformat is not supported or harmless.' + % item.filename) + basename, ext = os.path.splitext(path) + if os.path.basename(item.filename) not in ('mimetype', '.rels'): + if ext not in parser.NOMETA: + if not list_unsupported: + return False + ret_list.append(item.filename) + zipin.close() + if list_unsupported: + return ret_list + return True + + def get_meta(self): + ''' Return all the metadata of a zip archive''' + zipin = zipfile.ZipFile(self.filename, 'r') + metadata = {} + if zipin.comment != '': + metadata['comment'] = zipin.comment + for item in zipin.infolist(): + zipinfo_meta = self.__get_zipinfo_meta(item) + if zipinfo_meta != {}: # zipinfo metadata + metadata[item.filename + "'s zipinfo"] = str(zipinfo_meta) + zipin.extract(item, self.tempdir) + path = os.path.join(self.tempdir, item.filename) + if os.path.isfile(path): + cfile = mat.create_class_file(path, False, add2archive=self.add2archive) + if cfile is not None: + cfile_meta = cfile.get_meta() + if cfile_meta != {}: + metadata[item.filename] = str(cfile_meta) + else: + logging.info('%s\'s fileformat is not supported or harmless' + % item.filename) + zipin.close() + return metadata + + def __get_zipinfo_meta(self, zipinfo): + ''' Return all the metadata of a ZipInfo + ''' + metadata = {} + if zipinfo.comment != '': + metadata['comment'] = zipinfo.comment + if zipinfo.date_time != ZIP_EPOCH: + metadata['modified'] = zipinfo.date_time + if zipinfo.create_system != 3: # 3 is UNIX + metadata['system'] = "windows" if zipinfo.create_system == 2 else "unknown" + return metadata + + def remove_all(self, whitelist=[], beginning_blacklist=[], ending_blacklist=[]): + ''' Remove all metadata from a zip archive, even thoses + added by Python's zipfile itself. It will not add + files starting with "begining_blacklist", or ending with + "ending_blacklist". This method also add files present in + whitelist to the archive. + ''' + zipin = zipfile.ZipFile(self.filename, 'r') + zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True) + for item in zipin.infolist(): + zipin.extract(item, self.tempdir) + path = os.path.join(self.tempdir, item.filename) + + beginning = any((True for f in beginning_blacklist if item.filename.startswith(f))) + ending = any((True for f in ending_blacklist if item.filename.endswith(f))) + + if os.path.isfile(path) and not beginning and not ending: + cfile = mat.create_class_file(path, False, add2archive=self.add2archive) + if cfile is not None: + # Handle read-only files inside archive + old_stat = os.stat(path).st_mode + os.chmod(path, old_stat|stat.S_IWUSR) + cfile.remove_all() + os.chmod(path, old_stat) + logging.debug('Processing %s from %s' % (item.filename, self.filename)) + elif item.filename not in whitelist: + logging.info('%s\'s format is not supported or harmless' % item.filename) + basename, ext = os.path.splitext(path) + if not (self.add2archive or ext in parser.NOMETA): + continue + os.utime(path, (ZIP_EPOCH_SECONDS, ZIP_EPOCH_SECONDS)) + zipout.write(path, item.filename) + zipin.close() + zipout.close() + + logging.info('%s processed' % self.filename) + self.do_backup() + return True + + +class TarStripper(GenericArchiveStripper): + ''' Represent a tarfile archive + ''' + def _remove(self, current_file): + ''' Remove the meta added by tarfile itself to the file + ''' + current_file.mtime = 0 + current_file.uid = 0 + current_file.gid = 0 + current_file.uname = '' + current_file.gname = '' + return current_file + + def remove_all(self, whitelist=[]): + ''' Remove all harmful metadata from the tarfile. + The method will also add every files matching + whitelist in the produced archive. + ''' + tarin = tarfile.open(self.filename, 'r' + self.compression, encoding='utf-8') + tarout = tarfile.open(self.output, 'w' + self.compression, encoding='utf-8') + for item in tarin.getmembers(): + tarin.extract(item, self.tempdir) + if item.isfile(): + path = os.path.join(self.tempdir, item.name) + cfile = mat.create_class_file(path, False, add2archive=self.add2archive) + if cfile is not None: + # Handle read-only files inside archive + old_stat = os.stat(path).st_mode + os.chmod(path, old_stat|stat.S_IWUSR) + cfile.remove_all() + os.chmod(path, old_stat) + elif self.add2archive or os.path.splitext(item.name)[1] in parser.NOMETA: + logging.debug('%s\' format is either not supported or harmless' % item.name) + elif item.name in whitelist: + logging.debug('%s is not supported, but MAT was told to add it anyway.' + % item.name) + else: # Don't add the file to the archive + logging.debug('%s will not be added' % item.name) + continue + tarout.add(path, item.name, filter=self._remove) + tarin.close() + tarout.close() + self.do_backup() + return True + + def is_file_clean(self, current_file): + ''' Check metadatas added by tarfile + ''' + if current_file.mtime != 0: + return False + elif current_file.uid != 0: + return False + elif current_file.gid != 0: + return False + elif current_file.uname != '': + return False + elif current_file.gname != '': + return False + return True + + def is_clean(self, list_unsupported=False): + ''' Check if the file is clean from harmful metadatas + When list_unsupported is True, the method returns a list + of all non-supported/archives files contained in the + archive. + ''' + ret_list = [] + tarin = tarfile.open(self.filename, 'r' + self.compression) + for item in tarin.getmembers(): + if not self.is_file_clean(item) and not list_unsupported: + logging.debug('%s from %s has compromising tarinfo' % + (item.name, self.filename)) + return False + tarin.extract(item, self.tempdir) + path = os.path.join(self.tempdir, item.name) + if item.isfile(): + cfile = mat.create_class_file(path, False, add2archive=self.add2archive) + if cfile is not None: + if not cfile.is_clean(): + logging.debug('%s from %s has metadata' % + (item.name.decode("utf8"), self.filename)) + if not list_unsupported: + return False + # Nested archives are treated like unsupported files + elif isinstance(cfile, GenericArchiveStripper): + ret_list.append(item.name) + else: + logging.error('%s\'s format is not supported or harmless' % item.name) + if os.path.splitext(path)[1] not in parser.NOMETA: + if not list_unsupported: + return False + ret_list.append(item.name) + tarin.close() + if list_unsupported: + return ret_list + return True + + def get_meta(self): + ''' Return a dict with all the meta of the tarfile + ''' + tarin = tarfile.open(self.filename, 'r' + self.compression) + metadata = {} + for item in tarin.getmembers(): + current_meta = {} + if item.isfile(): + tarin.extract(item, self.tempdir) + path = os.path.join(self.tempdir, item.name) + class_file = mat.create_class_file(path, False, add2archive=self.add2archive) + if class_file is not None: + meta = class_file.get_meta() + if meta: + current_meta['file'] = str(meta) + else: + logging.error('%s\'s format is not supported or harmless' % item.name) + + if not self.is_file_clean(item): # if there is meta + current_meta['mtime'] = item.mtime + current_meta['uid'] = item.uid + current_meta['gid'] = item.gid + current_meta['uname'] = item.uname + current_meta['gname'] = item.gname + metadata[item.name] = str(current_meta) + tarin.close() + return metadata + + +class TerminalZipStripper(ZipStripper): + ''' Represent a terminal level archive. + This type of archive can not contain nested archives. + It is used for formats like docx, which are basically + ziped xml. + ''' + + +class GzipStripper(TarStripper): + ''' Represent a tar.gz archive + ''' + def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): + super(GzipStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) + self.compression = ':gz' + + +class Bzip2Stripper(TarStripper): + ''' Represent a tar.bz2 archive + ''' + def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): + super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) + self.compression = ':bz2' diff --git a/libmat/audio.py b/libmat/audio.py new file mode 100644 index 0000000..dae9d75 --- /dev/null +++ b/libmat/audio.py @@ -0,0 +1,53 @@ +''' Care about audio fileformat +''' + +try: + from mutagen.flac import FLAC + from mutagen.oggvorbis import OggVorbis +except ImportError: + pass + +import parser +import mutagenstripper + + +class MpegAudioStripper(parser.GenericParser): + ''' Represent mpeg audio file (mp3, ...) + ''' + def _should_remove(self, field): + return field.name in ("id3v1", "id3v2") + + +class OggStripper(mutagenstripper.MutagenStripper): + ''' Represent an ogg vorbis file + ''' + def _create_mfile(self): + self.mfile = OggVorbis(self.filename) + + +class FlacStripper(mutagenstripper.MutagenStripper): + ''' Represent a Flac audio file + ''' + def _create_mfile(self): + self.mfile = FLAC(self.filename) + + def remove_all(self): + ''' Remove the "metadata" block from the file + ''' + super(FlacStripper, self).remove_all() + self.mfile.clear_pictures() + self.mfile.save() + return True + + def is_clean(self): + ''' Check if the "metadata" block is present in the file + ''' + return super(FlacStripper, self).is_clean() and not self.mfile.pictures + + def get_meta(self): + ''' Return the content of the metadata block if present + ''' + metadata = super(FlacStripper, self).get_meta() + if self.mfile.pictures: + metadata['picture:'] = 'yes' + return metadata diff --git a/libmat/bencode/__init__.py b/libmat/bencode/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/libmat/bencode/__init__.py @@ -0,0 +1 @@ + diff --git a/libmat/bencode/bencode.py b/libmat/bencode/bencode.py new file mode 100644 index 0000000..a0cc99a --- /dev/null +++ b/libmat/bencode/bencode.py @@ -0,0 +1,143 @@ +# Copyright 2007 by Petru Paler +# Copyright 2011 by Julien (jvoisin) Voisin +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# + +''' + A quick (and also nice) lib to bencode/bdecode torrent files +''' + + +class BTFailure(Exception): + '''Custom Exception''' + pass + + +class Bencached(object): + '''Custom type : cached string''' + __slots__ = ['bencoded'] + + def __init__(self, string): + self.bencoded = string + + +def decode_int(x, f): + '''decode an int''' + f += 1 + newf = x.index('e', f) + if x[f:f+1] == '-0': + raise ValueError + elif x[f] == '0' and newf != f + 1: + raise ValueError + return int(x[f:newf]), newf + 1 + + +def decode_string(x, f): + '''decode a string''' + colon = x.index(':', f) + if x[f] == '0' and colon != f + 1: + raise ValueError + n = int(x[f:colon]) + colon += 1 + return x[colon:colon + n], colon + n + + +def decode_list(x, f): + '''decode a list''' + result = [] + f += 1 + while x[f] != 'e': + v, f = DECODE_FUNC[x[f]](x, f) + result.append(v) + return result, f + 1 + + +def decode_dict(x, f): + '''decode a dict''' + result = {} + f += 1 + while x[f] != 'e': + k, f = decode_string(x, f) + result[k], f = DECODE_FUNC[x[f]](x, f) + return result, f + 1 + + +def encode_bool(x, r): + '''bencode a boolean''' + encode_int(1 if r else 0, r) + + +def encode_int(x, r): + '''bencode an integer/float''' + r.extend(('i', str(x), 'e')) + + +def encode_list(x, r): + '''bencode a list/tuple''' + r.append('l') + [ENCODE_FUNC[type(item)](item, r) for item in x] + r.append('e') + + +def encode_dict(x, result): + '''bencode a dict''' + result.append('d') + ilist = list(x.items()) + ilist.sort() + for k, v in ilist: + result.extend((str(len(k)), ':', k)) + ENCODE_FUNC[type(v)](v, result) + result.append('e') + + +DECODE_FUNC = {str(x):decode_string for x in range(9)} +DECODE_FUNC['l'] = decode_list +DECODE_FUNC['d'] = decode_dict +DECODE_FUNC['i'] = decode_int + + +ENCODE_FUNC = {} +ENCODE_FUNC[Bencached] = lambda x, r: r.append(x.bencoded) +ENCODE_FUNC[int] = encode_int +ENCODE_FUNC[int] = encode_int +ENCODE_FUNC[bytes] = lambda x, r: r.extend((str(len(x)), ':', x)) +ENCODE_FUNC[list] = encode_list +ENCODE_FUNC[tuple] = encode_list +ENCODE_FUNC[dict] = encode_dict +ENCODE_FUNC[bool] = encode_bool + + +def bencode(string): + '''bencode $string''' + table = [] + ENCODE_FUNC[type(string)](string, table) + return ''.join(table) + + +def bdecode(string): + '''decode $string''' + try: + result, lenght = DECODE_FUNC[string[0]](string, 0) + except (IndexError, KeyError, ValueError): + raise BTFailure('Not a valid bencoded string') + if lenght != len(string): + raise BTFailure('Invalid bencoded value (data after valid prefix)') + return result diff --git a/libmat/exceptions.py b/libmat/exceptions.py new file mode 100644 index 0000000..47da15c --- /dev/null +++ b/libmat/exceptions.py @@ -0,0 +1,14 @@ +''' Base exceptions for MAT +''' + + +class UnableToRemoveFile(Exception): + '''This exception is raised when a file could not be removed + ''' + pass + +class UnableToWriteFile(Exception): + '''This exception is raised when a file + can could not be chmod +w + ''' + pass diff --git a/libmat/exiftool.py b/libmat/exiftool.py new file mode 100644 index 0000000..9e38f04 --- /dev/null +++ b/libmat/exiftool.py @@ -0,0 +1,78 @@ +''' Care about images with help of the amazing (perl) library Exiftool. +''' + +import parser +import subprocess + + +class ExiftoolStripper(parser.GenericParser): + ''' A generic stripper class using exiftool as backend + ''' + + def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): + super(ExiftoolStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) + self.allowed = set(['ExifTool Version Number', 'File Name', 'Directory', + 'File Size', 'File Modification Date/Time', 'File Access Date/Time', 'File Permissions', + 'File Type', 'MIME Type', 'Image Width', 'Image Height', + 'Image Size', 'File Inode Change Date/Time']) + self._set_allowed() + + def _set_allowed(self): + ''' Virtual method. Set the allowed/harmless list of metadata + ''' + raise NotImplementedError + + def remove_all(self): + ''' Remove all metadata with help of exiftool + ''' + try: + if self.backup: + self.create_backup_copy() + # Note: '-All=' must be followed by a known exiftool option. + subprocess.call(['exiftool', '-m', '-all=', + '-adobe=', '-overwrite_original', self.filename], + stdout=open('/dev/null')) + return True + except: + return False + + def is_clean(self): + ''' Check if the file is clean with the help of exiftool + ''' + return not self.get_meta() + + def get_meta(self): + ''' Return every harmful meta with help of exiftool. + Exiftool output looks like this: + field name : value + field name : value + ''' + output = subprocess.Popen(['exiftool', self.filename], + stdout=subprocess.PIPE).communicate()[0] + meta = {} + for i in output.split('\n')[:-1]: # chop last char ('\n') + key = i.split(':')[0].strip() + if key not in self.allowed: + meta[key] = i.split(':')[1].strip() # add the field name to the metadata set + return meta + + +class JpegStripper(ExiftoolStripper): + ''' Care about jpeg files with help + of exiftool + ''' + def _set_allowed(self): + self.allowed.update(['JFIF Version', 'Resolution Unit', + 'X Resolution', 'Y Resolution', 'Encoding Process', + 'Bits Per Sample', 'Color Components', 'Y Cb Cr Sub Sampling']) + + +class PngStripper(ExiftoolStripper): + ''' Care about png files with help + of exiftool + ''' + def _set_allowed(self): + self.allowed.update(['Bit Depth', 'Color Type', + 'Compression', 'Filter', 'Interlace', 'Pixels Per Unit X', + 'Pixels Per Unit Y', 'Pixel Units', 'Significant Bits', + 'Background Color', 'SRGB Rendering']) diff --git a/libmat/hachoir_editor/__init__.py b/libmat/hachoir_editor/__init__.py new file mode 100644 index 0000000..1835676 --- /dev/null +++ b/libmat/hachoir_editor/__init__.py @@ -0,0 +1,8 @@ +from field import ( + EditorError, FakeField) +from typed_field import ( + EditableField, EditableBits, EditableBytes, + EditableInteger, EditableString, + createEditableField) +from fieldset import EditableFieldSet, NewFieldSet, createEditor + diff --git a/libmat/hachoir_editor/field.py b/libmat/hachoir_editor/field.py new file mode 100644 index 0000000..6b1efe3 --- /dev/null +++ b/libmat/hachoir_editor/field.py @@ -0,0 +1,69 @@ +from hachoir_core.error import HachoirError +from hachoir_core.field import joinPath, MissingField + +class EditorError(HachoirError): + pass + +class FakeField(object): + """ + This class have API looks similar to Field API, but objects don't contain + any value: all values are _computed_ by parent methods. + + Example: FakeField(editor, "abc").size calls editor._getFieldSize("abc"). + """ + is_field_set = False + + def __init__(self, parent, name): + self._parent = parent + self._name = name + + def _getPath(self): + return joinPath(self._parent.path, self._name) + path = property(_getPath) + + def _getName(self): + return self._name + name = property(_getName) + + def _getAddress(self): + return self._parent._getFieldAddress(self._name) + address = property(_getAddress) + + def _getSize(self): + return self._parent.input[self._name].size + size = property(_getSize) + + def _getValue(self): + return self._parent.input[self._name].value + value = property(_getValue) + + def createDisplay(self): + # TODO: Returns new value if field is altered + return self._parent.input[self._name].display + display = property(createDisplay) + + def _getParent(self): + return self._parent + parent = property(_getParent) + + def hasValue(self): + return self._parent.input[self._name].hasValue() + + def __getitem__(self, key): + # TODO: Implement this function! + raise MissingField(self, key) + + def _isAltered(self): + return False + is_altered = property(_isAltered) + + def writeInto(self, output): + size = self.size + addr = self._parent._getFieldInputAddress(self._name) + input = self._parent.input + stream = input.stream + if size % 8: + output.copyBitsFrom(stream, addr, size, input.endian) + else: + output.copyBytesFrom(stream, addr, size//8) + diff --git a/libmat/hachoir_editor/fieldset.py b/libmat/hachoir_editor/fieldset.py new file mode 100644 index 0000000..b7c9b07 --- /dev/null +++ b/libmat/hachoir_editor/fieldset.py @@ -0,0 +1,352 @@ +from hachoir_core.dict import UniqKeyError +from hachoir_core.field import MissingField, Float32, Float64, FakeArray +from hachoir_core.compatibility import any +from hachoir_core.i18n import _ +from typed_field import createEditableField +from field import EditorError +from collections import deque # Python 2.4 +import weakref # Python 2.1 +import struct + +class EditableFieldSet(object): + MAX_SIZE = (1 << 40) # Arbitrary limit to catch errors + is_field_set = True + + def __init__(self, parent, fieldset): + self._parent = parent + self.input = fieldset # original FieldSet + self._fields = {} # cache of editable fields + self._deleted = set() # Names of deleted fields + self._inserted = {} # Inserted field (name => list of field, + # where name is the name after) + + def array(self, key): + # FIXME: Use cache? + return FakeArray(self, key) + + def _getParent(self): + return self._parent + parent = property(_getParent) + + def _isAltered(self): + if self._inserted: + return True + if self._deleted: + return True + return any(field.is_altered for field in self._fields.itervalues()) + is_altered = property(_isAltered) + + def reset(self): + """ + Reset the field set and the input field set. + """ + for key, field in self._fields.iteritems(): + if not field.is_altered: + del self._fields[key] + self.input.reset() + + def __len__(self): + return len(self.input) \ + - len(self._deleted) \ + + sum( len(new) for new in self._inserted.itervalues() ) + + def __iter__(self): + for field in self.input: + name = field.name + if name in self._inserted: + for newfield in self._inserted[name]: + yield weakref.proxy(newfield) + if name not in self._deleted: + yield self[name] + if None in self._inserted: + for newfield in self._inserted[None]: + yield weakref.proxy(newfield) + + def insertBefore(self, name, *new_fields): + self._insert(name, new_fields, False) + + def insertAfter(self, name, *new_fields): + self._insert(name, new_fields, True) + + def insert(self, *new_fields): + self._insert(None, new_fields, True) + + def _insert(self, key, new_fields, next): + """ + key is the name of the field before which new_fields + will be inserted. If next is True, the fields will be inserted + _after_ this field. + """ + # Set unique field name + for field in new_fields: + if field._name.endswith("[]"): + self.input.setUniqueFieldName(field) + + # Check that there is no duplicate in inserted fields + new_names = list(field.name for field in new_fields) + names_set = set(new_names) + if len(names_set) != len(new_fields): + duplicates = (name for name in names_set if 1 < new_names.count(name)) + raise UniqKeyError(_("Duplicates in inserted fields: %s") % ", ".join(duplicates)) + + # Check that field names are not in input + if self.input: # Write special version for NewFieldSet? + for name in new_names: + if name in self.input and name not in self._deleted: + raise UniqKeyError(_("Field name '%s' already exists") % name) + + # Check that field names are not in inserted fields + for fields in self._inserted.itervalues(): + for field in fields: + if field.name in new_names: + raise UniqKeyError(_("Field name '%s' already exists") % field.name) + + # Input have already inserted field? + if key in self._inserted: + if next: + self._inserted[key].extend( reversed(new_fields) ) + else: + self._inserted[key].extendleft( reversed(new_fields) ) + return + + # Whould like to insert in inserted fields? + if key: + for fields in self._inserted.itervalues(): + names = [item.name for item in fields] + try: + pos = names.index(key) + except ValueError: + continue + if 0 <= pos: + if next: + pos += 1 + fields.rotate(-pos) + fields.extendleft( reversed(new_fields) ) + fields.rotate(pos) + return + + # Get next field. Use None if we are at the end. + if next: + index = self.input[key].index + 1 + try: + key = self.input[index].name + except IndexError: + key = None + + # Check that field names are not in input + if key not in self.input: + raise MissingField(self, key) + + # Insert in original input + self._inserted[key]= deque(new_fields) + + def _getDescription(self): + return self.input.description + description = property(_getDescription) + + def _getStream(self): + # FIXME: This property is maybe a bad idea since address may be differents + return self.input.stream + stream = property(_getStream) + + def _getName(self): + return self.input.name + name = property(_getName) + + def _getEndian(self): + return self.input.endian + endian = property(_getEndian) + + def _getAddress(self): + if self._parent: + return self._parent._getFieldAddress(self.name) + else: + return 0 + address = property(_getAddress) + + def _getAbsoluteAddress(self): + address = self.address + current = self._parent + while current: + address += current.address + current = current._parent + return address + absolute_address = property(_getAbsoluteAddress) + + def hasValue(self): + return False +# return self._parent.input[self.name].hasValue() + + def _getSize(self): + if self.is_altered: + return sum(field.size for field in self) + else: + return self.input.size + size = property(_getSize) + + def _getPath(self): + return self.input.path + path = property(_getPath) + + def _getOriginalField(self, name): + assert name in self.input + return self.input[name] + + def _getFieldInputAddress(self, name): + """ + Absolute address of a field from the input field set. + """ + assert name in self.input + return self.input[name].absolute_address + + def _getFieldAddress(self, name): + """ + Compute relative address of a field. The operation takes care of + deleted and resized fields. + """ + #assert name not in self._deleted + addr = 0 + for field in self: + if field.name == name: + return addr + addr += field.size + raise MissingField(self, name) + + def _getItemByPath(self, path): + if not path[0]: + path = path[1:] + field = self + for name in path: + field = field[name] + return field + + def __contains__(self, name): + try: + field = self[name] + return (field is not None) + except MissingField: + return False + + def __getitem__(self, key): + """ + Create a weak reference to an editable field (EditableField) for the + field with specified name. If the field is removed later, using the + editable field will raise a weakref.ReferenceError exception. + + May raise a MissingField error if the field doesn't exist in original + field set or it has been deleted. + """ + if "/" in key: + return self._getItemByPath(key.split("/")) + if isinstance(key, (int, long)): + raise EditorError("Integer index are not supported") + + if (key in self._deleted) or (key not in self.input): + raise MissingField(self, key) + if key not in self._fields: + field = self.input[key] + if field.is_field_set: + self._fields[key] = createEditableFieldSet(self, field) + else: + self._fields[key] = createEditableField(self, field) + return weakref.proxy(self._fields[key]) + + def __delitem__(self, name): + """ + Remove a field from the field set. May raise an MissingField exception + if the field has already been deleted. + """ + parts = name.partition('/') + if parts[2]: + fieldset = self[parts[0]] + del fieldset[parts[2]] + return + if name in self._deleted: + raise MissingField(self, name) + self._deleted.add(name) + if name in self._fields: + del self._fields[name] + + def writeInto(self, output): + """ + Write the content if this field set into the output stream + (OutputStream). + """ + if not self.is_altered: + # Not altered: just copy bits/bytes + input = self.input + if input.size % 8: + output.copyBitsFrom(input.stream, + input.absolute_address, input.size, input.endian) + else: + output.copyBytesFrom(input.stream, + input.absolute_address, input.size//8) + else: + # Altered: call writeInto() method of each field + realaddr = 0 + for field in self: + field.writeInto(output) + realaddr += field.size + + def _getValue(self): + raise EditorError('Field set "%s" has no value' % self.path) + def _setValue(self, value): + raise EditorError('Field set "%s" value is read only' % self.path) + value = property(_getValue, _setValue, "Value of field") + +class EditableFloat(EditableFieldSet): + _value = None + + def _isAltered(self): + return (self._value is not None) + is_altered = property(_isAltered) + + def writeInto(self, output): + if self._value is not None: + self._write(output) + else: + EditableFieldSet.writeInto(self, output) + + def _write(self, output): + format = self.input.struct_format + raw = struct.pack(format, self._value) + output.writeBytes(raw) + + def _setValue(self, value): + self.parent._is_altered = True + self._value = value + value = property(EditableFieldSet._getValue, _setValue) + +def createEditableFieldSet(parent, field): + cls = field.__class__ + # FIXME: Support Float80 + if cls in (Float32, Float64): + return EditableFloat(parent, field) + else: + return EditableFieldSet(parent, field) + +class NewFieldSet(EditableFieldSet): + def __init__(self, parent, name): + EditableFieldSet.__init__(self, parent, None) + self._name = name + self._endian = parent.endian + + def __iter__(self): + if None in self._inserted: + return iter(self._inserted[None]) + else: + raise StopIteration() + + def _getName(self): + return self._name + name = property(_getName) + + def _getEndian(self): + return self._endian + endian = property(_getEndian) + + is_altered = property(lambda self: True) + +def createEditor(fieldset): + return EditableFieldSet(None, fieldset) + diff --git a/libmat/hachoir_editor/typed_field.py b/libmat/hachoir_editor/typed_field.py new file mode 100644 index 0000000..0f0427b --- /dev/null +++ b/libmat/hachoir_editor/typed_field.py @@ -0,0 +1,253 @@ +from hachoir_core.field import ( + RawBits, Bit, Bits, PaddingBits, + RawBytes, Bytes, PaddingBytes, + GenericString, Character, + isInteger, isString) +from field import FakeField + +class EditableField(FakeField): + """ + Pure virtual class used to write editable field class. + """ + + _is_altered = False + def __init__(self, parent, name, value=None): + FakeField.__init__(self, parent, name) + self._value = value + + def _isAltered(self): + return self._is_altered + is_altered = property(_isAltered) + + def hasValue(self): + return True + + def _computeSize(self): + raise NotImplementedError() + def _getValue(self): + return self._value + def _setValue(self, value): + self._value = value + + def _propGetValue(self): + if self._value is not None: + return self._getValue() + else: + return FakeField._getValue(self) + def _propSetValue(self, value): + self._setValue(value) + self._is_altered = True + value = property(_propGetValue, _propSetValue) + + def _getSize(self): + if self._value is not None: + return self._computeSize() + else: + return FakeField._getSize(self) + size = property(_getSize) + + def _write(self, output): + raise NotImplementedError() + + def writeInto(self, output): + if self._is_altered: + self._write(output) + else: + return FakeField.writeInto(self, output) + +class EditableFixedField(EditableField): + """ + Editable field with fixed size. + """ + + def __init__(self, parent, name, value=None, size=None): + EditableField.__init__(self, parent, name, value) + if size is not None: + self._size = size + else: + self._size = self._parent._getOriginalField(self._name).size + + def _getSize(self): + return self._size + size = property(_getSize) + +class EditableBits(EditableFixedField): + def __init__(self, parent, name, *args): + if args: + if len(args) != 2: + raise TypeError( + "Wrong argument count, EditableBits constructor prototype is: " + "(parent, name, [size, value])") + size = args[0] + value = args[1] + assert isinstance(value, (int, long)) + else: + size = None + value = None + EditableFixedField.__init__(self, parent, name, value, size) + if args: + self._setValue(args[1]) + self._is_altered = True + + def _setValue(self, value): + if not(0 <= value < (1 << self._size)): + raise ValueError("Invalid value, must be in range %s..%s" + % (0, (1 << self._size) - 1)) + self._value = value + + def _write(self, output): + output.writeBits(self._size, self._value, self._parent.endian) + +class EditableBytes(EditableField): + def _setValue(self, value): + if not value: raise ValueError( + "Unable to set empty string to a EditableBytes field") + self._value = value + + def _computeSize(self): + return len(self._value) * 8 + + def _write(self, output): + output.writeBytes(self._value) + +class EditableString(EditableField): + MAX_SIZE = { + "Pascal8": (1 << 8)-1, + "Pascal16": (1 << 16)-1, + "Pascal32": (1 << 32)-1, + } + + def __init__(self, parent, name, *args, **kw): + if len(args) == 2: + value = args[1] + assert isinstance(value, str) # TODO: support Unicode + elif not args: + value = None + else: + raise TypeError( + "Wrong argument count, EditableString constructor prototype is:" + "(parent, name, [format, value])") + EditableField.__init__(self, parent, name, value) + if len(args) == 2: + self._charset = kw.get('charset', None) + self._format = args[0] + if self._format in GenericString.PASCAL_FORMATS: + self._prefix_size = GenericString.PASCAL_FORMATS[self._format] + else: + self._prefix_size = 0 + self._suffix_str = GenericString.staticSuffixStr( + self._format, self._charset, self._parent.endian) + self._is_altered = True + else: + orig = self._parent._getOriginalField(name) + self._charset = orig.charset + self._format = orig.format + self._prefix_size = orig.content_offset + self._suffix_str = orig.suffix_str + + def _setValue(self, value): + size = len(value) + if self._format in self.MAX_SIZE and self.MAX_SIZE[self._format] < size: + raise ValueError("String is too big") + self._value = value + + def _computeSize(self): + return (self._prefix_size + len(self._value) + len(self._suffix_str))*8 + + def _write(self, output): + if self._format in GenericString.SUFFIX_FORMAT: + output.writeBytes(self._value) + output.writeBytes(self._suffix_str) + elif self._format == "fixed": + output.writeBytes(self._value) + else: + assert self._format in GenericString.PASCAL_FORMATS + size = GenericString.PASCAL_FORMATS[self._format] + output.writeInteger(len(self._value), False, size, self._parent.endian) + output.writeBytes(self._value) + +class EditableCharacter(EditableFixedField): + def __init__(self, parent, name, *args): + if args: + if len(args) != 3: + raise TypeError( + "Wrong argument count, EditableCharacter " + "constructor prototype is: (parent, name, [value])") + value = args[0] + if not isinstance(value, str) or len(value) != 1: + raise TypeError("EditableCharacter needs a character") + else: + value = None + EditableFixedField.__init__(self, parent, name, value, 8) + if args: + self._is_altered = True + + def _setValue(self, value): + if not isinstance(value, str) or len(value) != 1: + raise TypeError("EditableCharacter needs a character") + self._value = value + + def _write(self, output): + output.writeBytes(self._value) + +class EditableInteger(EditableFixedField): + VALID_VALUE_SIGNED = { + 8: (-(1 << 8), (1 << 8)-1), + 16: (-(1 << 15), (1 << 15)-1), + 32: (-(1 << 31), (1 << 31)-1), + } + VALID_VALUE_UNSIGNED = { + 8: (0, (1 << 8)-1), + 16: (0, (1 << 16)-1), + 32: (0, (1 << 32)-1) + } + + def __init__(self, parent, name, *args): + if args: + if len(args) != 3: + raise TypeError( + "Wrong argument count, EditableInteger constructor prototype is: " + "(parent, name, [signed, size, value])") + size = args[1] + value = args[2] + assert isinstance(value, (int, long)) + else: + size = None + value = None + EditableFixedField.__init__(self, parent, name, value, size) + if args: + self._signed = args[0] + self._is_altered = True + else: + self._signed = self._parent._getOriginalField(self._name).signed + + def _setValue(self, value): + if self._signed: + valid = self.VALID_VALUE_SIGNED + else: + valid = self.VALID_VALUE_UNSIGNED + minval, maxval = valid[self._size] + if not(minval <= value <= maxval): + raise ValueError("Invalid value, must be in range %s..%s" + % (minval, maxval)) + self._value = value + + def _write(self, output): + output.writeInteger( + self.value, self._signed, self._size//8, self._parent.endian) + +def createEditableField(fieldset, field): + if isInteger(field): + cls = EditableInteger + elif isString(field): + cls = EditableString + elif field.__class__ in (RawBytes, Bytes, PaddingBytes): + cls = EditableBytes + elif field.__class__ in (RawBits, Bits, Bit, PaddingBits): + cls = EditableBits + elif field.__class__ == Character: + cls = EditableCharacter + else: + cls = FakeField + return cls(fieldset, field.name) + diff --git a/libmat/images.py b/libmat/images.py new file mode 100644 index 0000000..67c710f --- /dev/null +++ b/libmat/images.py @@ -0,0 +1,52 @@ +''' Takes care about pictures formats + +References: + - JFIF: http://www.ecma-international.org/publications/techreports/E-TR-098.htm + - PNG: http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html + - PNG: http://www.w3.org/TR/PNG-Chunks.html +''' + +import parser + + +class JpegStripper(parser.GenericParser): + ''' Represents a jpeg file. + Custom Huffman and Quantization tables + are stripped: they may leak + some info, and the quality loss is minor. + ''' + def _should_remove(self, field): + ''' Return True if the field is compromising + ''' + field_list = frozenset([ + 'start_image', # start of the image + 'app0', # JFIF data + 'start_frame', # specify width, height, number of components + 'start_scan', # specify which slice of data the top-to-bottom scan contains + 'data', # actual data + 'end_image']) # end of the image + if field.name in field_list: + return False + elif field.name.startswith('quantization['): # custom Quant. tables + return False + elif field.name.startswith('huffman['): # custom Huffman tables + return False + return True + + +class PngStripper(parser.GenericParser): + ''' Represents a png file + ''' + def _should_remove(self, field): + ''' Return True if the field is compromising + ''' + field_list = frozenset([ + 'id', + 'header', # PNG header + 'physical', # the intended pixel size or aspect ratio + 'end']) # end of the image + if field.name in field_list: + return False + if field.name.startswith('data['): # data + return False + return True diff --git a/libmat/mat.py b/libmat/mat.py new file mode 100644 index 0000000..8dfc2dc --- /dev/null +++ b/libmat/mat.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python + +''' Metadata anonymisation toolkit library +''' + +import logging +import mimetypes +import os +import subprocess +import xml.sax + +import hachoir_core.cmd_line +import hachoir_parser + +import libmat.exceptions + +__version__ = '0.5.2' +__author__ = 'jvoisin' + +#Silence +LOGGING_LEVEL = logging.CRITICAL +hachoir_core.config.quiet = True +fname = '' + +#Verbose +#LOGGING_LEVEL = logging.DEBUG +#hachoir_core.config.quiet = False +#logname = 'report.log' + +logging.basicConfig(filename=fname, level=LOGGING_LEVEL) + +import strippers # this is loaded here because we need LOGGING_LEVEL + + +def get_logo(): + ''' Return the path to the logo + ''' + if os.path.isfile('./data/mat.png'): + return './data/mat.png' + elif os.path.isfile('/usr/share/pixmaps/mat.png'): + return '/usr/share/pixmaps/mat.png' + elif os.path.isfile('/usr/local/share/pixmaps/mat.png'): + return '/usr/local/share/pixmaps/mat.png' + + +def get_datadir(): + ''' Return the path to the data directory + ''' + if os.path.isdir('./data/'): + return './data/' + elif os.path.isdir('/usr/local/share/mat/'): + return '/usr/local/share/mat/' + elif os.path.isdir('/usr/share/mat/'): + return '/usr/share/mat/' + + +def list_supported_formats(): + ''' Return a list of all locally supported fileformat. + It parses that FORMATS file, and removes locally + non-supported formats. + ''' + handler = XMLParser() + parser = xml.sax.make_parser() + parser.setContentHandler(handler) + path = os.path.join(get_datadir(), 'FORMATS') + with open(path, 'r') as xmlfile: + parser.parse(xmlfile) + + localy_supported = [] + for item in handler.list: + if item['mimetype'].split(',')[0] in strippers.STRIPPERS: + localy_supported.append(item) + + return localy_supported + + +class XMLParser(xml.sax.handler.ContentHandler): + ''' Parse the supported format xml, and return a corresponding + list of dict + ''' + def __init__(self): + self.dict = {} + self.list = [] + self.content, self.key = '', '' + self.between = False + + def startElement(self, name, attrs): + ''' Called when entering into xml tag + ''' + self.between = True + self.key = name + self.content = '' + + def endElement(self, name): + ''' Called when exiting a xml tag + ''' + if name == 'format': # leaving a fileformat section + self.list.append(self.dict.copy()) + self.dict.clear() + else: + content = self.content.replace('\s', ' ') + self.dict[self.key] = content + self.between = False + + def characters(self, characters): + ''' Concatenate the content between opening and closing tags + ''' + if self.between: + self.content += characters + + +def secure_remove(filename): + ''' Securely remove the file + ''' + # I want the file removed, even if it's ro + try: + os.chmod(filename, 220) + except OSError: + logging.error('Unable to add write rights to %s' % filename) + raise libmat.exceptions.UnableToWriteFile + + try: + if not subprocess.call(['shred', '--remove', filename]): + return True + else: + raise OSError + except OSError: + logging.error('Unable to securely remove %s' % filename) + + try: + os.remove(filename) + except OSError: + logging.error('Unable to remove %s' % filename) + raise libmat.exceptions.UnableToRemoveFile + + return True + + +def create_class_file(name, backup, **kwargs): + ''' Return a $FILETYPEStripper() class, + corresponding to the filetype of the given file + ''' + if not os.path.isfile(name): # check if the file exists + logging.error('%s is not a valid file' % name) + return None + + if not os.access(name, os.R_OK): # check read permissions + logging.error('%s is is not readable' % name) + return None + + if not os.path.getsize(name): + #check if the file is not empty (hachoir crash on empty files) + logging.error('%s is empty' % name) + return None + + filename = '' + try: + filename = hachoir_core.cmd_line.unicodeFilename(name) + except TypeError: # get rid of "decoding Unicode is not supported" + filename = name + + parser = hachoir_parser.createParser(filename) + if not parser: + logging.info('Unable to parse %s' % filename) + return None + + mime = parser.mime_type + + if mime == 'application/zip': # some formats are zipped stuff + if mimetypes.guess_type(name)[0]: + mime = mimetypes.guess_type(name)[0] + + if mime.startswith('application/vnd.oasis.opendocument'): + mime = 'application/opendocument' # opendocument fileformat + elif mime.startswith('application/vnd.openxmlformats-officedocument'): + mime = 'application/officeopenxml' # office openxml + + is_writable = os.access(name, os.W_OK) + + try: + stripper_class = strippers.STRIPPERS[mime] + except KeyError: + logging.info('Don\'t have stripper for %s format' % mime) + return None + + return stripper_class(filename, parser, mime, backup, is_writable, **kwargs) diff --git a/libmat/misc.py b/libmat/misc.py new file mode 100644 index 0000000..450f381 --- /dev/null +++ b/libmat/misc.py @@ -0,0 +1,76 @@ +''' Care about misc formats +''' + +import parser + +from bencode import bencode + + +class TorrentStripper(parser.GenericParser): + ''' Represent a torrent file with the help + of the bencode lib from Petru Paler + ''' + def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): + super(TorrentStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) + self.fields = frozenset(['announce', 'info', 'name', 'path', 'piece length', 'pieces', + 'length', 'files', 'announce-list', 'nodes', 'httpseeds', 'private', 'root hash']) + + def __get_key_recursively(self, dictionary): + ''' Get recursively all keys from a dict and + its subdicts + ''' + for i, j in list(dictionary.items()): + if isinstance(j, dict): + return set([i]).union(self.__get_key_recursively(j)) + return set([i]) + + def is_clean(self): + ''' Check if the file is clean from harmful metadata + ''' + with open(self.filename, 'r') as f: + decoded = bencode.bdecode(f.read()) + return self.fields.issuperset(self.__get_key_recursively(decoded)) + + def __get_meta_recursively(self, dictionary): + ''' Get recursively all harmful metadata + ''' + d = dict() + for i, j in list(dictionary.items()): + if i not in self.fields: + d[i] = j + elif isinstance(j, dict): + d = dict(d.items() + list(self.__get_meta_recursively(j).items())) + return d + + def get_meta(self): + ''' Return a dict with all the meta of the file + ''' + with open(self.filename, 'r') as f: + decoded = bencode.bdecode(f.read()) + return self.__get_meta_recursively(decoded) + + def __remove_all_recursively(self, dictionary): + ''' Remove recursively all compromizing fields + ''' + d = dict() + for i, j in [i for i in list(dictionary.items()) if i in self.fields]: + if isinstance(j, dict): + d = dict(list(d.items()) + list(self.__get_meta_recursively(j).items())) + else: + d[i] = j + return d + + def remove_all(self): + ''' Remove all comprimizing fields + ''' + decoded = '' + with open(self.filename, 'r') as f: + decoded = bencode.bdecode(f.read()) + + cleaned = {i: j for i, j in list(decoded.items()) if i in self.fields} + + with open(self.output, 'w') as f: # encode the decoded torrent + f.write(bencode.bencode(cleaned)) # and write it in self.output + + self.do_backup() + return True diff --git a/libmat/mutagenstripper.py b/libmat/mutagenstripper.py new file mode 100644 index 0000000..403c9a7 --- /dev/null +++ b/libmat/mutagenstripper.py @@ -0,0 +1,33 @@ +''' Take care of mutagen-supported formats (audio) +''' + +import parser + + +class MutagenStripper(parser.GenericParser): + def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): + super(MutagenStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) + self._create_mfile() + + def _create_mfile(self): + raise NotImplementedError + + def is_clean(self): + return not self.mfile.tags + + def remove_all(self): + if self.backup: + self.create_backup_copy() + self.mfile.delete() + self.mfile.save() + return True + + def get_meta(self): + ''' + Return the content of the metadata block is present + ''' + metadata = {} + if self.mfile.tags: + for key, value in self.mfile.tags: + metadata[key] = value + return metadata diff --git a/libmat/office.py b/libmat/office.py new file mode 100644 index 0000000..0ca1ff1 --- /dev/null +++ b/libmat/office.py @@ -0,0 +1,191 @@ +''' Care about office's formats + +''' + +import logging +import os +import shutil +import tempfile +import xml.dom.minidom as minidom +import zipfile + +try: + import cairo + from gi.repository import Poppler +except ImportError: + logging.info('office.py loaded without PDF support') + pass + +import parser +import archive + + +class OpenDocumentStripper(archive.TerminalZipStripper): + ''' An open document file is a zip, with xml file into. + The one that interest us is meta.xml + ''' + + def get_meta(self): + ''' Return a dict with all the meta of the file by + trying to read the meta.xml file. + ''' + metadata = super(OpenDocumentStripper, self).get_meta() + zipin = zipfile.ZipFile(self.filename, 'r') + try: + content = zipin.read('meta.xml') + dom1 = minidom.parseString(content) + elements = dom1.getElementsByTagName('office:meta') + for i in elements[0].childNodes: + if i.tagName != 'meta:document-statistic': + nodename = ''.join(i.nodeName.split(':')[1:]) + metadata[nodename] = ''.join([j.data for j in i.childNodes]) + else: + # thank you w3c for not providing a nice + # method to get all attributes of a node + pass + except KeyError: # no meta.xml file found + logging.debug('%s has no opendocument metadata' % self.filename) + zipin.close() + return metadata + + def remove_all(self): + ''' Removes metadata + ''' + return super(OpenDocumentStripper, self).remove_all(ending_blacklist=['meta.xml']) + + def is_clean(self): + ''' Check if the file is clean from harmful metadatas + ''' + clean_super = super(OpenDocumentStripper, self).is_clean() + if clean_super is False: + return False + + zipin = zipfile.ZipFile(self.filename, 'r') + try: + zipin.getinfo('meta.xml') + except KeyError: # no meta.xml in the file + return True + zipin.close() + return False + + +class OpenXmlStripper(archive.TerminalZipStripper): + ''' Represent an office openxml document, which is like + an opendocument format, with some tricky stuff added. + It contains mostly xml, but can have media blobs, crap, ... + (I don't like this format.) + ''' + def remove_all(self): + return super(OpenXmlStripper, self).remove_all( + beginning_blacklist=('docProps/'), whitelist=('.rels')) + + def is_clean(self): + ''' Check if the file is clean from harmful metadatas. + This implementation is faster than something like + "return this.get_meta() == {}". + ''' + clean_super = super(OpenXmlStripper, self).is_clean() + if clean_super is False: + return False + + zipin = zipfile.ZipFile(self.filename, 'r') + for item in zipin.namelist(): + if item.startswith('docProps/'): + return False + zipin.close() + return True + + def get_meta(self): + ''' Return a dict with all the meta of the file + ''' + metadata = super(OpenXmlStripper, self).get_meta() + + zipin = zipfile.ZipFile(self.filename, 'r') + for item in zipin.namelist(): + if item.startswith('docProps/'): + metadata[item] = 'harmful content' + zipin.close() + return metadata + + +class PdfStripper(parser.GenericParser): + ''' Represent a PDF file + ''' + def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): + super(PdfStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) + self.uri = 'file://' + os.path.abspath(self.filename) + self.password = None + try: + self.pdf_quality = kwargs['low_pdf_quality'] + except KeyError: + self.pdf_quality = False + + self.meta_list = frozenset(['title', 'author', 'subject', + 'keywords', 'creator', 'producer', 'metadata']) + + def is_clean(self): + ''' Check if the file is clean from harmful metadatas + ''' + document = Poppler.Document.new_from_file(self.uri, self.password) + for key in self.meta_list: + if document.get_property(key): + return False + return True + + def remove_all(self): + ''' Opening the PDF with poppler, then doing a render + on a cairo pdfsurface for each pages. + + http://cairographics.org/documentation/pycairo/2/ + + The use of an intermediate tempfile is necessary because + python-cairo segfaults on unicode. + See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=699457 + ''' + document = Poppler.Document.new_from_file(self.uri, self.password) + try: + output = tempfile.mkstemp()[1] + page = document.get_page(0) + # assume that every pages are the same size + page_width, page_height = page.get_size() + surface = cairo.PDFSurface(output, page_width, page_height) + context = cairo.Context(surface) # context draws on the surface + logging.debug('PDF rendering of %s' % self.filename) + for pagenum in range(document.get_n_pages()): + page = document.get_page(pagenum) + context.translate(0, 0) + if self.pdf_quality: + page.render(context) # render the page on context + else: + page.render_for_printing(context) # render the page on context + context.show_page() # draw context on surface + surface.finish() + shutil.move(output, self.output) + except: + logging.error('Something went wrong when cleaning %s.' % self.filename) + return False + + try: + import pdfrw # For now, poppler cannot write meta, so we must use pdfrw + logging.debug('Removing %s\'s superficial metadata' % self.filename) + trailer = pdfrw.PdfReader(self.output) + trailer.Info.Producer = None + trailer.Info.Creator = None + writer = pdfrw.PdfWriter() + writer.trailer = trailer + writer.write(self.output) + self.do_backup() + except: + logging.error('Unable to remove all metadata from %s, please install pdfrw' % self.output) + return False + return True + + def get_meta(self): + ''' Return a dict with all the meta of the file + ''' + document = Poppler.Document.new_from_file(self.uri, self.password) + metadata = {} + for key in self.meta_list: + if document.get_property(key): + metadata[key] = document.get_property(key) + return metadata diff --git a/libmat/parser.py b/libmat/parser.py new file mode 100644 index 0000000..1765da8 --- /dev/null +++ b/libmat/parser.py @@ -0,0 +1,135 @@ +''' Parent class of all parser +''' + +import os +import shutil +import tempfile + +import hachoir_core +import hachoir_editor + +import mat + +NOMETA = frozenset(( + '.bmp', # "raw" image + '.rdf', # text + '.txt', # plain text + '.xml', # formated text (XML) + '.rels', # openXML formated text +)) + +FIELD = object() + + +class GenericParser(object): + ''' Parent class of all parsers + ''' + def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): + self.filename = '' + self.parser = parser + self.mime = mime + self.backup = backup + self.is_writable = is_writable + self.editor = hachoir_editor.createEditor(parser) + try: + self.filename = hachoir_core.cmd_line.unicodeFilename(filename) + except TypeError: # get rid of "decoding Unicode is not supported" + self.filename = filename + self.basename = os.path.basename(filename) + _, output = tempfile.mkstemp() + self.output = hachoir_core.cmd_line.unicodeFilename(output) + + def __del__(self): + ''' Remove tempfile if it was not used + ''' + if os.path.exists(self.output): + mat.secure_remove(self.output) + + def is_clean(self): + ''' + Check if the file is clean from harmful metadatas + ''' + for field in self.editor: + if self._should_remove(field): + return self._is_clean(self.editor) + return True + + def _is_clean(self, fieldset): + for field in fieldset: + remove = self._should_remove(field) + if remove is True: + return False + if remove is FIELD: + if not self._is_clean(field): + return False + return True + + def remove_all(self): + ''' Remove all compromising fields + ''' + state = self._remove_all(self.editor) + hachoir_core.field.writeIntoFile(self.editor, self.output) + self.do_backup() + return state + + def _remove_all(self, fieldset): + ''' Recursive way to handle tree metadatas + ''' + try: + for field in fieldset: + remove = self._should_remove(field) + if remove is True: + self._remove(fieldset, field.name) + if remove is FIELD: + self._remove_all(field) + return True + except: + return False + + def _remove(self, fieldset, field): + ''' Delete the given field + ''' + del fieldset[field] + + def get_meta(self): + ''' Return a dict with all the meta of the file + ''' + metadata = {} + self._get_meta(self.editor, metadata) + return metadata + + def _get_meta(self, fieldset, metadata): + ''' Recursive way to handle tree metadatas + ''' + for field in fieldset: + remove = self._should_remove(field) + if remove: + try: + metadata[field.name] = field.value + except: + metadata[field.name] = 'harmful content' + if remove is FIELD: + self._get_meta(field, None) + + def _should_remove(self, key): + ''' Return True if the field is compromising + abstract method + ''' + raise NotImplementedError + + def create_backup_copy(self): + ''' Create a backup copy + ''' + shutil.copy2(self.filename, self.filename + '.bak') + + def do_backup(self): + ''' Keep a backup of the file if asked. + + The process of double-renaming is not very elegant, + but it greatly simplify new strippers implementation. + ''' + if self.backup: + shutil.move(self.filename, self.filename + '.bak') + else: + mat.secure_remove(self.filename) + shutil.move(self.output, self.filename) diff --git a/libmat/strippers.py b/libmat/strippers.py new file mode 100644 index 0000000..aea98da --- /dev/null +++ b/libmat/strippers.py @@ -0,0 +1,70 @@ +''' Manage which fileformat can be processed +''' + +import archive +import audio +import gi +import images +import logging +import mat +import misc +import office +import subprocess + +STRIPPERS = { + 'application/x-tar': archive.TarStripper, + 'application/x-bzip2': archive.Bzip2Stripper, + 'application/x-gzip': archive.GzipStripper, + 'application/zip': archive.ZipStripper, + 'audio/mpeg': audio.MpegAudioStripper, + 'application/x-bittorrent': misc.TorrentStripper, + 'application/opendocument': office.OpenDocumentStripper, + 'application/officeopenxml': office.OpenXmlStripper, +} + +logging.basicConfig(level=mat.LOGGING_LEVEL) + +# PDF support +pdfSupport = True +try: + from gi.repository import Poppler +except ImportError: + logging.info('Unable to import Poppler: no PDF support') + pdfSupport = False + +try: + import cairo +except ImportError: + logging.info('Unable to import python-cairo: no PDF support') + pdfSupport = False + +try: + import pdfrw +except ImportError: + logging.info('Unable to import python-pdfrw: no PDf support') + pdfSupport = False + +if pdfSupport: + STRIPPERS['application/x-pdf'] = office.PdfStripper + STRIPPERS['application/pdf'] = office.PdfStripper + + +# audio format support with mutagen-python +try: + import mutagen + STRIPPERS['audio/x-flac'] = audio.FlacStripper + STRIPPERS['audio/vorbis'] = audio.OggStripper + STRIPPERS['audio/mpeg'] = audio.MpegAudioStripper +except ImportError: + logging.info('Unable to import python-mutagen: limited audio format support') + +# exiftool +try: + subprocess.check_output(['exiftool', '-ver']) + import exiftool + STRIPPERS['image/jpeg'] = exiftool.JpegStripper + STRIPPERS['image/png'] = exiftool.PngStripper +except OSError: # if exiftool is not installed, use hachoir instead + logging.info('Unable to find exiftool: limited images support') + STRIPPERS['image/jpeg'] = images.JpegStripper + STRIPPERS['image/png'] = images.PngStripper diff --git a/mat b/mat index d1a2f06..03a7367 100755 --- a/mat +++ b/mat @@ -10,9 +10,9 @@ import os import hachoir_core -from MAT import mat -from MAT import strippers -from MAT import archive +from libmat import mat +from libmat import strippers +from libmat import archive def parse(): diff --git a/mat-gui b/mat-gui index 96c9e53..5de5865 100755 --- a/mat-gui +++ b/mat-gui @@ -13,10 +13,10 @@ import sys import xml.sax import urllib2 -from MAT import mat -from MAT import strippers -from MAT import parser -from MAT import archive +from libmat import mat +from libmat import strippers +from libmat import parser +from libmat import archive logging.basicConfig(level=mat.LOGGING_LEVEL) diff --git a/nautilus/nautilus-mat.py b/nautilus/nautilus-mat.py index 19b601e..f270ed8 100644 --- a/nautilus/nautilus-mat.py +++ b/nautilus/nautilus-mat.py @@ -16,8 +16,8 @@ except: from gi.repository import Nautilus, GObject, Gtk -import MAT.mat -import MAT.strippers +import libmat.mat +import libmat.strippers class MatExtension(GObject.GObject, Nautilus.MenuProvider): @@ -33,7 +33,7 @@ class MatExtension(GObject.GObject, Nautilus.MenuProvider): # We're only going to put ourselves on supported mimetypes' context menus if not (file.get_mime_type() - in [i["mimetype"] for i in MAT.mat.list_supported_formats()]): + in [i["mimetype"] for i in libmat.mat.list_supported_formats()]): logging.debug("%s is not supported by MAT" % file.get_mime_type()) return @@ -70,7 +70,7 @@ class MatExtension(GObject.GObject, Nautilus.MenuProvider): file_path = urllib.unquote(file.get_uri()[7:]) - class_file = MAT.mat.create_class_file(file_path, + class_file = libmat.mat.create_class_file(file_path, backup=True, add2archive=False) if class_file: diff --git a/setup.py b/setup.py index 61a602e..6694d44 100755 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ setup( platforms = 'linux', license = 'GPLv2', url = 'https://mat.boum.org', - packages = ['MAT', 'MAT.hachoir_editor', 'MAT.bencode'], + packages = ['libmat', 'libmat.hachoir_editor', 'libmat.bencode'], scripts = ['mat', 'mat-gui'], data_files = [ ( 'share/applications', ['mat.desktop'] ), diff --git a/test/clitest.py b/test/clitest.py index ad895f5..13d545a 100644 --- a/test/clitest.py +++ b/test/clitest.py @@ -12,7 +12,7 @@ import sys import tarfile sys.path.append('..') -from MAT import mat +from libmat import mat import test diff --git a/test/libtest.py b/test/libtest.py index fdef091..1b25f86 100644 --- a/test/libtest.py +++ b/test/libtest.py @@ -14,7 +14,7 @@ import test import unittest sys.path.append('..') -import MAT +import libmat class TestRemovelib(test.MATTest): @@ -23,17 +23,17 @@ class TestRemovelib(test.MATTest): def test_remove(self): '''make sure that the lib remove all compromizing meta''' for _, dirty in self.file_list: - current_file = MAT.mat.create_class_file(dirty, False, add2archive=True) + current_file = libmat.mat.create_class_file(dirty, False, add2archive=True) current_file.remove_all() - current_file = MAT.mat.create_class_file(dirty, False, add2archive=True) + current_file = libmat.mat.create_class_file(dirty, False, add2archive=True) self.assertTrue(current_file.is_clean()) def test_remove_empty(self): '''Test removal with clean files''' for clean, _ in self.file_list: - current_file = MAT.mat.create_class_file(clean, False, add2archive=True) + current_file = libmat.mat.create_class_file(clean, False, add2archive=True) current_file.remove_all() - current_file = MAT.mat.create_class_file(clean, False, add2archive=True) + current_file = libmat.mat.create_class_file(clean, False, add2archive=True) self.assertTrue(current_file.is_clean()) @@ -43,13 +43,13 @@ class TestListlib(test.MATTest): def test_list(self): '''check if get_meta returns metadata''' for _, dirty in self.file_list: - current_file = MAT.mat.create_class_file(dirty, False, add2archive=True) + current_file = libmat.mat.create_class_file(dirty, False, add2archive=True) self.assertIsNotNone(current_file.get_meta()) def testlist_list_empty(self): '''check that a listing of a clean file returns an empty dict''' for clean, _ in self.file_list: - current_file = MAT.mat.create_class_file(clean, False, add2archive=True) + current_file = libmat.mat.create_class_file(clean, False, add2archive=True) self.assertEqual(current_file.get_meta(), dict()) @@ -59,13 +59,13 @@ class TestisCleanlib(test.MATTest): def test_dirty(self): '''test is_clean on dirty files''' for _, dirty in self.file_list: - current_file = MAT.mat.create_class_file(dirty, False, add2archive=True) + current_file = libmat.mat.create_class_file(dirty, False, add2archive=True) self.assertFalse(current_file.is_clean()) def test_clean(self): '''test is_clean on clean files''' for clean, _ in self.file_list: - current_file = MAT.mat.create_class_file(clean, False, add2archive=True) + current_file = libmat.mat.create_class_file(clean, False, add2archive=True) self.assertTrue(current_file.is_clean()) @@ -75,12 +75,12 @@ class TestFileAttributes(unittest.TestCase): ''' def test_not_exist(self): ''' test MAT's behaviour on non-existent file''' - self.assertFalse(MAT.mat.create_class_file('non_existent_file', False, add2archive=True)) + self.assertFalse(libmat.mat.create_class_file('non_existent_file', False, add2archive=True)) def test_empty(self): ''' test MAT's behaviour on empty file''' open('empty_file', 'a').close() - self.assertFalse(MAT.mat.create_class_file('empty_file', False, add2archive=True)) + self.assertFalse(libmat.mat.create_class_file('empty_file', False, add2archive=True)) os.remove('empty_file') @@ -91,12 +91,12 @@ class TestSecureRemove(unittest.TestCase): ''' test the secure removal of an existing file ''' _, file_to_remove = tempfile.mkstemp() - self.assertTrue(MAT.mat.secure_remove(file_to_remove)) + self.assertTrue(libmat.mat.secure_remove(file_to_remove)) def test_remove_fail(self): ''' test the secure removal of an non-removable file ''' - self.assertRaises(MAT.exceptions.UnableToWriteFile, MAT.mat.secure_remove, '/NOTREMOVABLE') + self.assertRaises(libmat.exceptions.UnableToWriteFile, libmat.mat.secure_remove, '/NOTREMOVABLE') class TestArchiveProcessing(test.MATTest): @@ -111,9 +111,9 @@ class TestArchiveProcessing(test.MATTest): tar.add(dirty) tar.add(clean) tar.close() - current_file = MAT.mat.create_class_file(tarpath, False, add2archive=False) + current_file = libmat.mat.create_class_file(tarpath, False, add2archive=False) current_file.remove_all() - current_file = MAT.mat.create_class_file(tarpath, False, add2archive=False) + current_file = libmat.mat.create_class_file(tarpath, False, add2archive=False) self.assertTrue(current_file.is_clean()) def test_remove_tar(self): @@ -125,9 +125,9 @@ class TestArchiveProcessing(test.MATTest): tar.add(dirty) tar.add(clean) tar.close() - current_file = MAT.mat.create_class_file(tarpath, False, add2archive=False) + current_file = libmat.mat.create_class_file(tarpath, False, add2archive=False) current_file.remove_all() - current_file = MAT.mat.create_class_file(tarpath, False, add2archive=False) + current_file = libmat.mat.create_class_file(tarpath, False, add2archive=False) self.assertTrue(current_file.is_clean()) def test_remove_gz(self): @@ -139,9 +139,9 @@ class TestArchiveProcessing(test.MATTest): tar.add(dirty) tar.add(clean) tar.close() - current_file = MAT.mat.create_class_file(tarpath, False, add2archive=False) + current_file = libmat.mat.create_class_file(tarpath, False, add2archive=False) current_file.remove_all() - current_file = MAT.mat.create_class_file(tarpath, False, add2archive=False) + current_file = libmat.mat.create_class_file(tarpath, False, add2archive=False) self.assertTrue(current_file.is_clean()) def test_get_unsupported(self): @@ -152,16 +152,16 @@ class TestArchiveProcessing(test.MATTest): for f in ('../mat.desktop', '../README.security', '../setup.py'): tar.add(f, f[3:]) # trim '../' tar.close() - current_file = MAT.mat.create_class_file(tarpath, False, add2archive=False) + current_file = libmat.mat.create_class_file(tarpath, False, add2archive=False) unsupported_files = set(current_file.is_clean(list_unsupported=True)) self.assertEqual(unsupported_files, set(('mat.desktop', 'README.security', 'setup.py'))) def test_archive_unwritable_content(self): path = os.path.join(self.tmpdir, './unwritable_content.zip') shutil.copy2('./unwritable_content.zip', self.tmpdir) - current_file = MAT.mat.create_class_file(path, False, add2archive=False) + current_file = libmat.mat.create_class_file(path, False, add2archive=False) current_file.remove_all() - current_file = MAT.mat.create_class_file(path, False, add2archive=False) + current_file = libmat.mat.create_class_file(path, False, add2archive=False) self.assertTrue(current_file.is_clean()) def get_tests(): -- cgit v1.3