From ea21231d6d332b5e27e85c0ce0c103cfb56433f8 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Mon, 25 Jul 2011 03:21:40 +0200 Subject: Correct handling of unknown fileformat into zipfile (and fixe many previous-commit-bugs) --- lib/archive.py | 6 ++++-- lib/office.py | 4 ++-- lib/parser.py | 4 ++-- 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/archive.py b/lib/archive.py index 21bc5c5..6e3efc0 100644 --- a/lib/archive.py +++ b/lib/archive.py @@ -59,9 +59,11 @@ class ZipStripper(GenericArchiveStripper): cfile = mat.create_class_file(name, False, self.add2archive) except: + #best solution I have found logging.error('%s is not supported' % item.filename) - #Returning false is the best solution imho - return False + _, ext = os.path.splitext(name) + if ext not in parser.NOMETA: + return False mat.secure_remove(name) zipin.close() return False diff --git a/lib/office.py b/lib/office.py index 2302dbc..9a5be3e 100644 --- a/lib/office.py +++ b/lib/office.py @@ -27,7 +27,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): method here : http://bugs.python.org/issue6818 ''' zipin = zipfile.ZipFile(self.filename, 'r') - zipout = zipfile.ZipFile(self.basename + parser.POSTFIX + self.ext, 'w', + zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True) for item in zipin.namelist(): name = os.path.join(self.tempdir, item) @@ -134,7 +134,7 @@ class PdfStripper(parser.Generic_parser): ''' def __init__(self, filename, realname, backup): name, path = os.path.splitext(filename) - self.output = name + '.cleaned.' + ext + self.output = name + '.cleaned' + ext self.filename = filename self.backup = backup self.realname = realname diff --git a/lib/parser.py b/lib/parser.py index 11e776e..04d7625 100644 --- a/lib/parser.py +++ b/lib/parser.py @@ -13,12 +13,12 @@ import mimetypes import mat -NOMETA = ('*.txt', '*.bmp', '*.py') +NOMETA = ('.txt', '.bmp', '.py', '.xml') class Generic_parser(object): def __init__(self, realname, filename, parser, editor, backup, add2archive): basename, ext = os.path.splitext(filename) - self.output = basename + '.cleaned.' + ext + self.output = basename + '.cleaned' + ext self.filename = filename #path + filename self.realname = realname #path + filename self.shortname = os.path.basename(filename) #only filename -- cgit v1.3