From f6e3d57173604dab7228c830e84415ead02e169b Mon Sep 17 00:00:00 2001 From: jvoisin Date: Tue, 26 Jul 2011 04:55:59 +0200 Subject: Tests are green for opendocument format --- lib/archive.py | 9 ++++++--- lib/mat.py | 15 +++++++-------- lib/office.py | 3 +-- lib/parser.py | 4 ++-- 4 files changed, 16 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/archive.py b/lib/archive.py index 8a8f7f4..f22af39 100644 --- a/lib/archive.py +++ b/lib/archive.py @@ -69,10 +69,13 @@ class ZipStripper(GenericArchiveStripper): return False except: #best solution I have found - logging.error('%s is not supported' % item.filename) - _, ext = os.path.splitext(name) + logging.info('%s\'s fileformat is not supported, or is a \ +harmless format' % item.filename) + base, ext = os.path.splitext(name) + bname = os.path.basename(item.filename) if ext not in parser.NOMETA: - return False + if bname != 'mimetype': + return False mat.secure_remove(name) zipin.close() return True diff --git a/lib/mat.py b/lib/mat.py index 5dcdbc2..ccf653f 100644 --- a/lib/mat.py +++ b/lib/mat.py @@ -18,8 +18,8 @@ import audio import office import archive -__version__ = "0.1" -__author__ = "jvoisin" +__version__ = '0.1' +__author__ = 'jvoisin' LOGGING_LEVEL = logging.DEBUG @@ -53,7 +53,7 @@ def is_secure(filename): ''' if not(os.path.isfile(filename)): #check if the file exist - logging.error("Error: %s is not a valid file" % filename) + logging.error('Error: %s is not a valid file' % filename) return False def create_class_file(name, backup, add2archive): @@ -64,7 +64,7 @@ def create_class_file(name, backup, add2archive): if is_secure(name): return - filename = "" + filename = '' realname = name try: filename = hachoir_core.cmd_line.unicodeFilename(name) @@ -72,7 +72,7 @@ def create_class_file(name, backup, add2archive): filename = name parser = hachoir_parser.createParser(filename) if not parser: - logging.error("Unable to parse %s" % filename) + logging.info('Unable to parse %s' % filename) return editor = hachoir_editor.createEditor(parser) @@ -85,7 +85,7 @@ def create_class_file(name, backup, add2archive): stripper_class = strippers[editor.input.__class__] except KeyError: #Place for another lib than hachoir - logging.error("Don't have stripper for format %s" % editor.description) + logging.info('Don\'t have stripper for format %s' % editor.description) return if editor.input.__class__ == hachoir_parser.misc.PDFDocument:#pdf @@ -95,8 +95,7 @@ def create_class_file(name, backup, add2archive): #zip based format mime = mimetypes.guess_type(filename)[0] try:#Ugly workaround, cleaning open document delete mime (wtf?) - if mime.startswith(#Open document format - 'application/vnd.oasis.opendocument'): + if mime.startswith('application/vnd.oasis.opendocument'): return office.OpenDocumentStripper(realname, filename, parser, editor, backup, add2archive) else:#normal zip diff --git a/lib/office.py b/lib/office.py index b7fa555..27677d2 100644 --- a/lib/office.py +++ b/lib/office.py @@ -34,7 +34,6 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): metadata[key] = node.text except KeyError:#no meta.xml file found logging.debug('%s has no opendocument metadata' % self.filename) - metadata[self.filename] = '' return metadata @@ -76,7 +75,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): except: logging.info('%s\' fileformat is not supported' % item) if self.add2archive: - zipout.write(item, name) + zipout.write(name, item) mat.secure_remove(name) zipout.comment = '' logging.info('%s treated' % self.filename) diff --git a/lib/parser.py b/lib/parser.py index 04d7625..aa7e7f1 100644 --- a/lib/parser.py +++ b/lib/parser.py @@ -13,7 +13,7 @@ import mimetypes import mat -NOMETA = ('.txt', '.bmp', '.py', '.xml') +NOMETA = ('.txt', '.bmp', '.py', '.xml', '.rdf') class Generic_parser(object): def __init__(self, realname, filename, parser, editor, backup, add2archive): @@ -21,7 +21,7 @@ class Generic_parser(object): self.output = basename + '.cleaned' + ext self.filename = filename #path + filename self.realname = realname #path + filename - self.shortname = os.path.basename(filename) #only filename + self.basename = os.path.basename(filename) #only filename self.mime = mimetypes.guess_type(filename)[0] #mimetype self.parser = parser self.editor = editor -- cgit v1.3