From c3ce1dd99ec1671d50a7cf89dc1b287fbbdf96aa Mon Sep 17 00:00:00 2001 From: jvoisin Date: Tue, 26 Jul 2011 21:12:58 +0200 Subject: Simplification of the __init__() method --- lib/archive.py | 21 +++++++++------------ lib/audio.py | 3 +++ lib/mat.py | 9 +++------ lib/office.py | 8 ++++---- lib/parser.py | 22 ++++++++++++++-------- 5 files changed, 33 insertions(+), 30 deletions(-) (limited to 'lib') diff --git a/lib/archive.py b/lib/archive.py index 1aaf74b..f9e4dba 100644 --- a/lib/archive.py +++ b/lib/archive.py @@ -14,10 +14,9 @@ class GenericArchiveStripper(parser.GenericParser): ''' Represent a generic archive ''' - def __init__(self, realname, filename, parser, editor, backup, - add2archive): - super(GenericArchiveStripper, self).__init__(realname, - filename, parser, editor, backup, add2archive) + def __init__(self, filename, parser, mime, backup, add2archive): + super(GenericArchiveStripper, self).__init__(filename, parser, mime, + backup, add2archive) self.compression = '' self.add2archive = add2archive self.tempdir = tempfile.mkdtemp() @@ -254,10 +253,9 @@ class GzipStripper(TarStripper): ''' Represent a tar.gz archive ''' - def __init__(self, realname, filename, parser, editor, backup, - add2archive): - super(GzipStripper, self).__init__(realname, - filename, parser, editor, backup, add2archive) + def __init__(self, filename, parser, mime, backup, add2archive): + super(GzipStripper, self).__init__(filename, parser, mime, backup, + add2archive) self.compression = ':gz' @@ -265,8 +263,7 @@ class Bzip2Stripper(TarStripper): ''' Represents a tar.bz2 archive ''' - def __init__(self, realname, filename, parser, editor, backup, - add2archive): - super(Bzip2Stripper, self).__init__(realname, - filename, parser, editor, backup, add2archive) + def __init__(self, filename, parser, mime, backup, add2archive): + super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, + add2archive) self.compression = ':bz2' diff --git a/lib/audio.py b/lib/audio.py index d77efd9..d146fad 100644 --- a/lib/audio.py +++ b/lib/audio.py @@ -10,3 +10,6 @@ class MpegAudioStripper(parser.GenericParser): return True else: return False + +class FlacStripper(parser.GenericParser): + pass diff --git a/lib/mat.py b/lib/mat.py index ea4fefd..3d1687f 100644 --- a/lib/mat.py +++ b/lib/mat.py @@ -11,7 +11,6 @@ import mimetypes import hachoir_core.cmd_line import hachoir_parser -import hachoir_editor import images import audio @@ -39,7 +38,7 @@ STRIPPERS = { try: import mutagen - STRIPPERS[hachoir_parser.audio.FlacParser] = audio.FlacStripper + STRIPPERS['audio/x-flac'] = audio.FlacStripper except ImportError: print('unable to import python-mutagen : limited audio format support') @@ -85,7 +84,6 @@ def create_class_file(name, backup, add2archive): logging.info('Unable to parse %s' % filename) return - editor = hachoir_editor.createEditor(parser) mime = parser.mime_type if mime.startswith('application/vnd.oasis.opendocument'): @@ -94,8 +92,7 @@ def create_class_file(name, backup, add2archive): try: stripper_class = STRIPPERS[mime] except KeyError: - logging.info('Don\'t have stripper for format %s' % editor.description) + logging.info('Don\'t have stripper for %s\' format' % filename) return - return stripper_class(realname, filename, parser, editor, backup, - add2archive) + return stripper_class(filename, parser, mime, backup, add2archive) diff --git a/lib/office.py b/lib/office.py index 5fa475d..00fce3c 100644 --- a/lib/office.py +++ b/lib/office.py @@ -89,8 +89,8 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): return False except KeyError: # no meta.xml in the file zipin.close() - czf = archive.ZipStripper(self.realname, self.filename, - self.parser, self.editor, self.backup, self.add2archive) + czf = archive.ZipStripper(self.filename, self.parser, + 'application/zip', self.backup, self.add2archive) if czf.is_clean(): return True else: @@ -102,14 +102,14 @@ class PdfStripper(parser.GenericParser): ''' Represent a pdf file, with the help of pdfrw ''' - def __init__(self, filename, realname, backup): + def __init__(self, filename, parser, mime, backup, add2archive): name, ext = os.path.splitext(filename) self.output = name + '.cleaned' + ext self.filename = filename self.backup = backup self.realname = realname self.shortname = os.path.basename(filename) - self.mime = mimetypes.guess_type(filename)[0] + self.mime = mime self.tempdir = tempfile.mkdtemp() self.trailer = pdfrw.PdfReader(self.filename) self.writer = pdfrw.PdfWriter() diff --git a/lib/parser.py b/lib/parser.py index ae647fe..385dd78 100644 --- a/lib/parser.py +++ b/lib/parser.py @@ -3,6 +3,7 @@ ''' import hachoir_core +import hachoir_editor import os import mimetypes @@ -13,17 +14,22 @@ NOMETA = ('.bmp', 'html', '.py', '.rdf', '.txt', '.xml') class GenericParser(object): - def __init__(self, realname, filename, parser, editor, backup, - add2archive): + def __init__(self, filename, parser, mime, backup, add2archive): + self.filename = '' + self.parser = parser + self.mime = mime + self.backup = backup + self.editor = hachoir_editor.createEditor(parser) + self.realname = filename + try: + self.filename = hachoir_core.cmd_line.unicodeFilename(filename) + except TypeError: # get rid of "decoding Unicode is not supported" + self.filename = filename basename, ext = os.path.splitext(filename) self.output = basename + '.cleaned' + ext - self.filename = filename # path + filename - self.realname = realname # path + filename self.basename = os.path.basename(filename) # only filename - self.mime = mimetypes.guess_type(filename)[0] # mimetype - self.parser = parser - self.editor = editor - self.backup = backup + + def is_clean(self): ''' -- cgit v1.3