diff options
| author | jvoisin | 2011-07-26 14:06:38 +0200 |
|---|---|---|
| committer | jvoisin | 2011-07-26 14:06:38 +0200 |
| commit | e62ae6a87f630cbd389cf1b75672b06cd56973c8 (patch) | |
| tree | 5433e5bde0d0448795626190f8014c61b38ac1c5 /lib | |
| parent | f6e3d57173604dab7228c830e84415ead02e169b (diff) | |
Pyflakes and pep8 validation
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/archive.py | 20 | ||||
| -rw-r--r-- | lib/audio.py | 4 | ||||
| -rw-r--r-- | lib/images.py | 4 | ||||
| -rw-r--r-- | lib/mat.py | 19 | ||||
| -rw-r--r-- | lib/misc.py | 48 | ||||
| -rw-r--r-- | lib/office.py | 24 | ||||
| -rw-r--r-- | lib/parser.py | 21 |
7 files changed, 95 insertions, 45 deletions
diff --git a/lib/archive.py b/lib/archive.py index f22af39..f11506a 100644 --- a/lib/archive.py +++ b/lib/archive.py | |||
| @@ -9,11 +9,13 @@ import tempfile | |||
| 9 | import parser | 9 | import parser |
| 10 | import mat | 10 | import mat |
| 11 | 11 | ||
| 12 | |||
| 12 | class GenericArchiveStripper(parser.Generic_parser): | 13 | class GenericArchiveStripper(parser.Generic_parser): |
| 13 | ''' | 14 | ''' |
| 14 | Represent a generic archive | 15 | Represent a generic archive |
| 15 | ''' | 16 | ''' |
| 16 | def __init__(self, realname, filename, parser, editor, backup, add2archive): | 17 | def __init__(self, realname, filename, parser, editor, backup, |
| 18 | add2archive): | ||
| 17 | super(GenericArchiveStripper, self).__init__(realname, | 19 | super(GenericArchiveStripper, self).__init__(realname, |
| 18 | filename, parser, editor, backup, add2archive) | 20 | filename, parser, editor, backup, add2archive) |
| 19 | self.compression = '' | 21 | self.compression = '' |
| @@ -32,6 +34,7 @@ class GenericArchiveStripper(parser.Generic_parser): | |||
| 32 | def remove_all_ugly(self): | 34 | def remove_all_ugly(self): |
| 33 | self._remove_all('ugly') | 35 | self._remove_all('ugly') |
| 34 | 36 | ||
| 37 | |||
| 35 | class ZipStripper(GenericArchiveStripper): | 38 | class ZipStripper(GenericArchiveStripper): |
| 36 | ''' | 39 | ''' |
| 37 | Represent a zip file | 40 | Represent a zip file |
| @@ -94,7 +97,6 @@ harmless format' % item.filename) | |||
| 94 | zipin.close() | 97 | zipin.close() |
| 95 | return metadata | 98 | return metadata |
| 96 | 99 | ||
| 97 | |||
| 98 | def _remove_all(self, method): | 100 | def _remove_all(self, method): |
| 99 | ''' | 101 | ''' |
| 100 | So far, the zipfile module does not allow to write a ZipInfo | 102 | So far, the zipfile module does not allow to write a ZipInfo |
| @@ -150,7 +152,7 @@ class TarStripper(GenericArchiveStripper): | |||
| 150 | for item in tarin.getmembers(): | 152 | for item in tarin.getmembers(): |
| 151 | tarin.extract(item, self.tempdir) | 153 | tarin.extract(item, self.tempdir) |
| 152 | name = os.path.join(self.tempdir, item.name) | 154 | name = os.path.join(self.tempdir, item.name) |
| 153 | if item.type is '0': #is item a regular file ? | 155 | if item.type is '0': # is item a regular file ? |
| 154 | #no backup file | 156 | #no backup file |
| 155 | try: | 157 | try: |
| 156 | cfile = mat.create_class_file(name, False, | 158 | cfile = mat.create_class_file(name, False, |
| @@ -164,7 +166,7 @@ class TarStripper(GenericArchiveStripper): | |||
| 164 | logging.info('%s\' format is not supported' % | 166 | logging.info('%s\' format is not supported' % |
| 165 | item.name) | 167 | item.name) |
| 166 | if self.add2archive: | 168 | if self.add2archive: |
| 167 | tarout.add(name, item.name,filter=self._remove) | 169 | tarout.add(name, item.name, filter=self._remove) |
| 168 | mat.secure_remove(name) | 170 | mat.secure_remove(name) |
| 169 | tarin.close() | 171 | tarin.close() |
| 170 | tarout.close() | 172 | tarout.close() |
| @@ -194,7 +196,7 @@ class TarStripper(GenericArchiveStripper): | |||
| 194 | return False | 196 | return False |
| 195 | tarin.extract(item, self.tempdir) | 197 | tarin.extract(item, self.tempdir) |
| 196 | name = os.path.join(self.tempdir, item.name) | 198 | name = os.path.join(self.tempdir, item.name) |
| 197 | if item.type is '0': #is item a regular file ? | 199 | if item.type is '0': # is item a regular file ? |
| 198 | #no backup file | 200 | #no backup file |
| 199 | try: | 201 | try: |
| 200 | class_file = mat.create_class_file(name, | 202 | class_file = mat.create_class_file(name, |
| @@ -216,7 +218,7 @@ class TarStripper(GenericArchiveStripper): | |||
| 216 | metadata = {} | 218 | metadata = {} |
| 217 | for current_file in tarin.getmembers(): | 219 | for current_file in tarin.getmembers(): |
| 218 | if current_file.type is '0': | 220 | if current_file.type is '0': |
| 219 | if not self.is_file_clean(current_file):#if there is meta | 221 | if not self.is_file_clean(current_file): # if there is meta |
| 220 | current_meta = {} | 222 | current_meta = {} |
| 221 | current_meta['mtime'] = current_file.mtime | 223 | current_meta['mtime'] = current_file.mtime |
| 222 | current_meta['uid'] = current_file.uid | 224 | current_meta['uid'] = current_file.uid |
| @@ -229,14 +231,16 @@ class TarStripper(GenericArchiveStripper): | |||
| 229 | 231 | ||
| 230 | 232 | ||
| 231 | class GzipStripper(TarStripper): | 233 | class GzipStripper(TarStripper): |
| 232 | def __init__(self, realname, filename, parser, editor, backup, add2archive): | 234 | def __init__(self, realname, filename, parser, editor, backup, |
| 235 | add2archive): | ||
| 233 | super(GzipStripper, self).__init__(realname, | 236 | super(GzipStripper, self).__init__(realname, |
| 234 | filename, parser, editor, backup, add2archive) | 237 | filename, parser, editor, backup, add2archive) |
| 235 | self.compression = ':gz' | 238 | self.compression = ':gz' |
| 236 | 239 | ||
| 237 | 240 | ||
| 238 | class Bzip2Stripper(TarStripper): | 241 | class Bzip2Stripper(TarStripper): |
| 239 | def __init__(self, realname, filename, parser, editor, backup, add2archive): | 242 | def __init__(self, realname, filename, parser, editor, backup, |
| 243 | add2archive): | ||
| 240 | super(Bzip2Stripper, self).__init__(realname, | 244 | super(Bzip2Stripper, self).__init__(realname, |
| 241 | filename, parser, editor, backup, add2archive) | 245 | filename, parser, editor, backup, add2archive) |
| 242 | self.compression = ':bz2' | 246 | self.compression = ':bz2' |
diff --git a/lib/audio.py b/lib/audio.py index 6d653bc..35d4fde 100644 --- a/lib/audio.py +++ b/lib/audio.py | |||
| @@ -1,6 +1,10 @@ | |||
| 1 | import parser | 1 | import parser |
| 2 | 2 | ||
| 3 | |||
| 3 | class MpegAudioStripper(parser.Generic_parser): | 4 | class MpegAudioStripper(parser.Generic_parser): |
| 5 | ''' | ||
| 6 | mpeg audio file (mp3, ...) | ||
| 7 | ''' | ||
| 4 | def _should_remove(self, field): | 8 | def _should_remove(self, field): |
| 5 | if field.name in ("id3v1", "id3v2"): | 9 | if field.name in ("id3v1", "id3v2"): |
| 6 | return True | 10 | return True |
diff --git a/lib/images.py b/lib/images.py index 4441b70..bab0bfb 100644 --- a/lib/images.py +++ b/lib/images.py | |||
| @@ -1,8 +1,5 @@ | |||
| 1 | import parser | 1 | import parser |
| 2 | 2 | ||
| 3 | class BmpStripper(parser.Generic_parser): | ||
| 4 | def _should_remove(self, field): | ||
| 5 | return False | ||
| 6 | 3 | ||
| 7 | class JpegStripper(parser.Generic_parser): | 4 | class JpegStripper(parser.Generic_parser): |
| 8 | def _should_remove(self, field): | 5 | def _should_remove(self, field): |
| @@ -13,6 +10,7 @@ class JpegStripper(parser.Generic_parser): | |||
| 13 | else: | 10 | else: |
| 14 | return False | 11 | return False |
| 15 | 12 | ||
| 13 | |||
| 16 | class PngStripper(parser.Generic_parser): | 14 | class PngStripper(parser.Generic_parser): |
| 17 | def _should_remove(self, field): | 15 | def _should_remove(self, field): |
| 18 | if field.name.startswith("text["): | 16 | if field.name.startswith("text["): |
| @@ -23,12 +23,11 @@ __author__ = 'jvoisin' | |||
| 23 | 23 | ||
| 24 | LOGGING_LEVEL = logging.DEBUG | 24 | LOGGING_LEVEL = logging.DEBUG |
| 25 | 25 | ||
| 26 | logging.basicConfig(level = LOGGING_LEVEL) | 26 | logging.basicConfig(level=LOGGING_LEVEL) |
| 27 | 27 | ||
| 28 | strippers = { | 28 | strippers = { |
| 29 | hachoir_parser.image.JpegFile: images.JpegStripper, | 29 | hachoir_parser.image.JpegFile: images.JpegStripper, |
| 30 | hachoir_parser.image.PngFile: images.PngStripper, | 30 | hachoir_parser.image.PngFile: images.PngStripper, |
| 31 | hachoir_parser.image.bmp.BmpFile: images.BmpStripper, | ||
| 32 | hachoir_parser.audio.MpegAudioFile: audio.MpegAudioStripper, | 31 | hachoir_parser.audio.MpegAudioFile: audio.MpegAudioStripper, |
| 33 | hachoir_parser.misc.PDFDocument: office.PdfStripper, | 32 | hachoir_parser.misc.PDFDocument: office.PdfStripper, |
| 34 | hachoir_parser.archive.TarFile: archive.TarStripper, | 33 | hachoir_parser.archive.TarFile: archive.TarStripper, |
| @@ -37,6 +36,7 @@ strippers = { | |||
| 37 | hachoir_parser.archive.zip.ZipFile: archive.ZipStripper, | 36 | hachoir_parser.archive.zip.ZipFile: archive.ZipStripper, |
| 38 | } | 37 | } |
| 39 | 38 | ||
| 39 | |||
| 40 | def secure_remove(filename): | 40 | def secure_remove(filename): |
| 41 | ''' | 41 | ''' |
| 42 | securely remove the file | 42 | securely remove the file |
| @@ -52,10 +52,11 @@ def is_secure(filename): | |||
| 52 | Prevent shell injection | 52 | Prevent shell injection |
| 53 | ''' | 53 | ''' |
| 54 | 54 | ||
| 55 | if not(os.path.isfile(filename)): #check if the file exist | 55 | if not(os.path.isfile(filename)): # check if the file exist |
| 56 | logging.error('Error: %s is not a valid file' % filename) | 56 | logging.error('Error: %s is not a valid file' % filename) |
| 57 | return False | 57 | return False |
| 58 | 58 | ||
| 59 | |||
| 59 | def create_class_file(name, backup, add2archive): | 60 | def create_class_file(name, backup, add2archive): |
| 60 | ''' | 61 | ''' |
| 61 | return a $FILETYPEStripper() class, | 62 | return a $FILETYPEStripper() class, |
| @@ -68,7 +69,7 @@ def create_class_file(name, backup, add2archive): | |||
| 68 | realname = name | 69 | realname = name |
| 69 | try: | 70 | try: |
| 70 | filename = hachoir_core.cmd_line.unicodeFilename(name) | 71 | filename = hachoir_core.cmd_line.unicodeFilename(name) |
| 71 | except TypeError:# get rid of "TypeError: decoding Unicode is not supported" | 72 | except TypeError: # get rid of "decoding Unicode is not supported" |
| 72 | filename = name | 73 | filename = name |
| 73 | parser = hachoir_parser.createParser(filename) | 74 | parser = hachoir_parser.createParser(filename) |
| 74 | if not parser: | 75 | if not parser: |
| @@ -88,22 +89,22 @@ def create_class_file(name, backup, add2archive): | |||
| 88 | logging.info('Don\'t have stripper for format %s' % editor.description) | 89 | logging.info('Don\'t have stripper for format %s' % editor.description) |
| 89 | return | 90 | return |
| 90 | 91 | ||
| 91 | if editor.input.__class__ == hachoir_parser.misc.PDFDocument:#pdf | 92 | if editor.input.__class__ == hachoir_parser.misc.PDFDocument: # pdf |
| 92 | return stripper_class(filename, realname, backup) | 93 | return stripper_class(filename, realname, backup) |
| 93 | 94 | ||
| 94 | elif editor.input.__class__ == hachoir_parser.archive.zip.ZipFile: | 95 | elif editor.input.__class__ == hachoir_parser.archive.zip.ZipFile: |
| 95 | #zip based format | 96 | #zip based format |
| 96 | mime = mimetypes.guess_type(filename)[0] | 97 | mime = mimetypes.guess_type(filename)[0] |
| 97 | try:#Ugly workaround, cleaning open document delete mime (wtf?) | 98 | try: # ugly workaround, cleaning open document delete mime (wtf?) |
| 98 | if mime.startswith('application/vnd.oasis.opendocument'): | 99 | if mime.startswith('application/vnd.oasis.opendocument'): |
| 99 | return office.OpenDocumentStripper(realname, filename, parser, | 100 | return office.OpenDocumentStripper(realname, filename, parser, |
| 100 | editor, backup, add2archive) | 101 | editor, backup, add2archive) |
| 101 | else:#normal zip | 102 | else: # normal zip |
| 102 | return stripper_class(realname, filename, parser, editor, | 103 | return stripper_class(realname, filename, parser, editor, |
| 103 | backup, add2archive) | 104 | backup, add2archive) |
| 104 | except:#normal zip file | 105 | except: # normal zip |
| 105 | return stripper_class(realname, filename, parser, editor, backup, | 106 | return stripper_class(realname, filename, parser, editor, backup, |
| 106 | add2archive) | 107 | add2archive) |
| 107 | else:#normal handling | 108 | else: # normal handling |
| 108 | return stripper_class(realname, filename, parser, editor, backup, | 109 | return stripper_class(realname, filename, parser, editor, backup, |
| 109 | add2archive) | 110 | add2archive) |
diff --git a/lib/misc.py b/lib/misc.py new file mode 100644 index 0000000..ce14313 --- /dev/null +++ b/lib/misc.py | |||
| @@ -0,0 +1,48 @@ | |||
| 1 | import hachoir_core | ||
| 2 | import parser | ||
| 3 | |||
| 4 | |||
| 5 | class TorrentStripper(parser.Generic_parser): | ||
| 6 | ''' | ||
| 7 | A torrent file looks like: | ||
| 8 | -root | ||
| 9 | -start | ||
| 10 | -announce | ||
| 11 | -announce-list | ||
| 12 | -comment | ||
| 13 | -created_by | ||
| 14 | -creation_date | ||
| 15 | -encoding | ||
| 16 | -info | ||
| 17 | -end | ||
| 18 | ''' | ||
| 19 | def remove_all(self): | ||
| 20 | for field in self.editor['root']: | ||
| 21 | if self._should_remove(field): | ||
| 22 | #FIXME : hachoir does not support torrent metadata editing :< | ||
| 23 | del self.editor['/root/' + field.name] | ||
| 24 | hachoir_core.field.writeIntoFile(self.editor, | ||
| 25 | self.filename + parser.POSTFIX) | ||
| 26 | self.do_backup() | ||
| 27 | |||
| 28 | def is_clean(self): | ||
| 29 | for field in self.editor['root']: | ||
| 30 | if self._should_remove(field): | ||
| 31 | return False | ||
| 32 | return True | ||
| 33 | |||
| 34 | def get_meta(self): | ||
| 35 | metadata = {} | ||
| 36 | for field in self.editor['root']: | ||
| 37 | if self._should_remove(field): | ||
| 38 | try: # FIXME | ||
| 39 | metadata[field.name] = field.value | ||
| 40 | except: | ||
| 41 | metadata[field.name] = 'harmful content' | ||
| 42 | return metadata | ||
| 43 | |||
| 44 | def _should_remove(self, field): | ||
| 45 | if field.name in ('comment', 'created_by', 'creation_date', 'info'): | ||
| 46 | return True | ||
| 47 | else: | ||
| 48 | return False | ||
diff --git a/lib/office.py b/lib/office.py index 27677d2..432bc0b 100644 --- a/lib/office.py +++ b/lib/office.py | |||
| @@ -5,17 +5,16 @@ import tempfile | |||
| 5 | import glob | 5 | import glob |
| 6 | import logging | 6 | import logging |
| 7 | import zipfile | 7 | import zipfile |
| 8 | import shutil | ||
| 9 | import re | 8 | import re |
| 10 | from xml.etree import ElementTree | 9 | from xml.etree import ElementTree |
| 11 | 10 | ||
| 12 | import hachoir_core | ||
| 13 | 11 | ||
| 14 | import pdfrw | 12 | import pdfrw |
| 15 | import mat | 13 | import mat |
| 16 | import parser | 14 | import parser |
| 17 | import archive | 15 | import archive |
| 18 | 16 | ||
| 17 | |||
| 19 | class OpenDocumentStripper(archive.GenericArchiveStripper): | 18 | class OpenDocumentStripper(archive.GenericArchiveStripper): |
| 20 | ''' | 19 | ''' |
| 21 | An open document file is a zip, with xml file into. | 20 | An open document file is a zip, with xml file into. |
| @@ -32,11 +31,10 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): | |||
| 32 | for node in tree.iter(): | 31 | for node in tree.iter(): |
| 33 | key = re.sub('{.*}', '', node.tag) | 32 | key = re.sub('{.*}', '', node.tag) |
| 34 | metadata[key] = node.text | 33 | metadata[key] = node.text |
| 35 | except KeyError:#no meta.xml file found | 34 | except KeyError: # no meta.xml file found |
| 36 | logging.debug('%s has no opendocument metadata' % self.filename) | 35 | logging.debug('%s has no opendocument metadata' % self.filename) |
| 37 | return metadata | 36 | return metadata |
| 38 | 37 | ||
| 39 | |||
| 40 | def _remove_all(self, method): | 38 | def _remove_all(self, method): |
| 41 | ''' | 39 | ''' |
| 42 | FIXME ? | 40 | FIXME ? |
| @@ -50,7 +48,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): | |||
| 50 | name = os.path.join(self.tempdir, item) | 48 | name = os.path.join(self.tempdir, item) |
| 51 | if item.endswith('.xml') or item == 'mimetype': | 49 | if item.endswith('.xml') or item == 'mimetype': |
| 52 | #keep .xml files, and the "manifest" file | 50 | #keep .xml files, and the "manifest" file |
| 53 | if item != 'meta.xml':#contains the metadata | 51 | if item != 'meta.xml': # contains the metadata |
| 54 | zipin.extract(item, self.tempdir) | 52 | zipin.extract(item, self.tempdir) |
| 55 | zipout.write(name, item) | 53 | zipout.write(name, item) |
| 56 | mat.secure_remove(name) | 54 | mat.secure_remove(name) |
| @@ -73,7 +71,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): | |||
| 73 | self.filename)) | 71 | self.filename)) |
| 74 | zipout.write(name, item) | 72 | zipout.write(name, item) |
| 75 | except: | 73 | except: |
| 76 | logging.info('%s\' fileformat is not supported' % item) | 74 | logging.info('%s\' fileformat is not supported' % item) |
| 77 | if self.add2archive: | 75 | if self.add2archive: |
| 78 | zipout.write(name, item) | 76 | zipout.write(name, item) |
| 79 | mat.secure_remove(name) | 77 | mat.secure_remove(name) |
| @@ -88,7 +86,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): | |||
| 88 | try: | 86 | try: |
| 89 | zipin.getinfo('meta.xml') | 87 | zipin.getinfo('meta.xml') |
| 90 | return False | 88 | return False |
| 91 | except KeyError:#no meta.xml in the file | 89 | except KeyError: # no meta.xml in the file |
| 92 | zipin.close() | 90 | zipin.close() |
| 93 | czf = archive.ZipStripper(self.realname, self.filename, | 91 | czf = archive.ZipStripper(self.realname, self.filename, |
| 94 | self.parser, self.editor, self.backup, self.add2archive) | 92 | self.parser, self.editor, self.backup, self.add2archive) |
| @@ -104,7 +102,7 @@ class PdfStripper(parser.Generic_parser): | |||
| 104 | Represent a pdf file, with the help of pdfrw | 102 | Represent a pdf file, with the help of pdfrw |
| 105 | ''' | 103 | ''' |
| 106 | def __init__(self, filename, realname, backup): | 104 | def __init__(self, filename, realname, backup): |
| 107 | name, path = os.path.splitext(filename) | 105 | name, ext = os.path.splitext(filename) |
| 108 | self.output = name + '.cleaned' + ext | 106 | self.output = name + '.cleaned' + ext |
| 109 | self.filename = filename | 107 | self.filename = filename |
| 110 | self.backup = backup | 108 | self.backup = backup |
| @@ -137,7 +135,7 @@ class PdfStripper(parser.Generic_parser): | |||
| 137 | ''' | 135 | ''' |
| 138 | _, self.tmpdir = tempfile.mkstemp() | 136 | _, self.tmpdir = tempfile.mkstemp() |
| 139 | subprocess.call(self.convert % (self.filename, self.tmpdir + | 137 | subprocess.call(self.convert % (self.filename, self.tmpdir + |
| 140 | 'temp.jpg'), shell=True)#Convert pages to jpg | 138 | 'temp.jpg'), shell=True) # Convert pages to jpg |
| 141 | 139 | ||
| 142 | for current_file in glob.glob(self.tmpdir + 'temp*'): | 140 | for current_file in glob.glob(self.tmpdir + 'temp*'): |
| 143 | #Clean every jpg image | 141 | #Clean every jpg image |
| @@ -145,18 +143,18 @@ class PdfStripper(parser.Generic_parser): | |||
| 145 | class_file.remove_all() | 143 | class_file.remove_all() |
| 146 | 144 | ||
| 147 | subprocess.call(self.convert % (self.tmpdir + | 145 | subprocess.call(self.convert % (self.tmpdir + |
| 148 | 'temp.jpg*', self.output), shell=True)#Assemble jpg into pdf | 146 | 'temp.jpg*', self.output), shell=True) # Assemble jpg into pdf |
| 149 | 147 | ||
| 150 | for current_file in glob.glob(self.tmpdir + 'temp*'): | 148 | for current_file in glob.glob(self.tmpdir + 'temp*'): |
| 151 | #remove jpg files | 149 | #remove jpg files |
| 152 | mat.secure_remove(current_file) | 150 | mat.secure_remove(current_file) |
| 153 | 151 | ||
| 154 | if self.backup is False: | 152 | if self.backup is False: |
| 155 | mat.secure_remove(self.filename) #remove the old file | 153 | mat.secure_remove(self.filename) # remove the old file |
| 156 | os.rename(self.output, self.filename)#rename the new | 154 | os.rename(self.output, self.filename) # rename the new |
| 157 | name = self.realname | 155 | name = self.realname |
| 158 | else: | 156 | else: |
| 159 | name = output_file | 157 | name = self.output |
| 160 | class_file = mat.create_class_file(name, False) | 158 | class_file = mat.create_class_file(name, False) |
| 161 | class_file.remove_all() | 159 | class_file.remove_all() |
| 162 | 160 | ||
diff --git a/lib/parser.py b/lib/parser.py index aa7e7f1..28e0849 100644 --- a/lib/parser.py +++ b/lib/parser.py | |||
| @@ -2,27 +2,25 @@ | |||
| 2 | Parent class of all parser | 2 | Parent class of all parser |
| 3 | ''' | 3 | ''' |
| 4 | 4 | ||
| 5 | import hachoir_core.error | 5 | import hachoir_core |
| 6 | import hachoir_parser | ||
| 7 | import hachoir_editor | ||
| 8 | 6 | ||
| 9 | import sys | ||
| 10 | import os | 7 | import os |
| 11 | import subprocess | ||
| 12 | import mimetypes | 8 | import mimetypes |
| 13 | 9 | ||
| 14 | import mat | 10 | import mat |
| 15 | 11 | ||
| 16 | NOMETA = ('.txt', '.bmp', '.py', '.xml', '.rdf') | 12 | NOMETA = ('.bmp', 'html', '.py', '.rdf', '.txt', '.xml') |
| 13 | |||
| 17 | 14 | ||
| 18 | class Generic_parser(object): | 15 | class Generic_parser(object): |
| 19 | def __init__(self, realname, filename, parser, editor, backup, add2archive): | 16 | def __init__(self, realname, filename, parser, editor, backup, |
| 17 | add2archive): | ||
| 20 | basename, ext = os.path.splitext(filename) | 18 | basename, ext = os.path.splitext(filename) |
| 21 | self.output = basename + '.cleaned' + ext | 19 | self.output = basename + '.cleaned' + ext |
| 22 | self.filename = filename #path + filename | 20 | self.filename = filename # path + filename |
| 23 | self.realname = realname #path + filename | 21 | self.realname = realname # path + filename |
| 24 | self.basename = os.path.basename(filename) #only filename | 22 | self.basename = os.path.basename(filename) # only filename |
| 25 | self.mime = mimetypes.guess_type(filename)[0] #mimetype | 23 | self.mime = mimetypes.guess_type(filename)[0] # mimetype |
| 26 | self.parser = parser | 24 | self.parser = parser |
| 27 | self.editor = editor | 25 | self.editor = editor |
| 28 | self.backup = backup | 26 | self.backup = backup |
| @@ -56,7 +54,6 @@ class Generic_parser(object): | |||
| 56 | ''' | 54 | ''' |
| 57 | self.remove_all() | 55 | self.remove_all() |
| 58 | 56 | ||
| 59 | |||
| 60 | def _remove(self, field): | 57 | def _remove(self, field): |
| 61 | ''' | 58 | ''' |
| 62 | Delete the given field | 59 | Delete the given field |
