From 173449009769ce86493a179acb9c66c87125dce3 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Fri, 31 Jan 2014 03:54:28 +0000 Subject: Fix office-format handling by the GUI --- MAT/archive.py | 10 +++++++++- MAT/office.py | 5 +++-- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'MAT') diff --git a/MAT/archive.py b/MAT/archive.py index 3c6a139..62f4ca7 100644 --- a/MAT/archive.py +++ b/MAT/archive.py @@ -39,7 +39,7 @@ class GenericArchiveStripper(parser.GenericParser): mat.secure_remove(path_file) shutil.rmtree(self.tempdir) - def is_clean(self, list_unsupported): + def is_clean(self, list_unsupported=False): ''' Virtual method to check for harmul metadata ''' raise NotImplementedError @@ -312,6 +312,14 @@ class TarStripper(GenericArchiveStripper): return metadata +class TerminalZipStripper(ZipStripper): + ''' Represent a terminal level archive. + This type of archive can not contain nested archives. + It is used for formats like docx, which are basically + ziped xml. + ''' + + class GzipStripper(TarStripper): ''' Represent a tar.gz archive ''' diff --git a/MAT/office.py b/MAT/office.py index 97405b3..e4b9567 100644 --- a/MAT/office.py +++ b/MAT/office.py @@ -1,4 +1,5 @@ ''' Care about office's formats + ''' import logging @@ -19,7 +20,7 @@ import parser import archive -class OpenDocumentStripper(archive.ZipStripper): +class OpenDocumentStripper(archive.TerminalZipStripper): ''' An open document file is a zip, with xml file into. The one that interest us is meta.xml ''' @@ -68,7 +69,7 @@ class OpenDocumentStripper(archive.ZipStripper): return False -class OpenXmlStripper(archive.ZipStripper): +class OpenXmlStripper(archive.TerminalZipStripper): ''' Represent an office openxml document, which is like an opendocument format, with some tricky stuff added. It contains mostly xml, but can have media blobs, crap, ... -- cgit v1.3