diff options
| author | jvoisin | 2011-07-24 02:30:50 +0200 |
|---|---|---|
| committer | jvoisin | 2011-07-24 02:30:50 +0200 |
| commit | ace3d8213921a9308d30afc057fc21221420e12e (patch) | |
| tree | be506ef8b0534127ad080f7d1ad7d4ca9a020a67 /lib | |
| parent | bcc0ad2e7491c212ef35ca250fb8c5f2c53572da (diff) | |
First implementation of open document format
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/mat.py | 36 | ||||
| -rw-r--r-- | lib/office.py | 86 |
2 files changed, 114 insertions, 8 deletions
| @@ -7,6 +7,7 @@ | |||
| 7 | import os | 7 | import os |
| 8 | import subprocess | 8 | import subprocess |
| 9 | import logging | 9 | import logging |
| 10 | import mimetypes | ||
| 10 | 11 | ||
| 11 | import hachoir_core.cmd_line | 12 | import hachoir_core.cmd_line |
| 12 | import hachoir_parser | 13 | import hachoir_parser |
| @@ -14,7 +15,7 @@ import hachoir_editor | |||
| 14 | 15 | ||
| 15 | import images | 16 | import images |
| 16 | import audio | 17 | import audio |
| 17 | import misc | 18 | import office |
| 18 | import archive | 19 | import archive |
| 19 | 20 | ||
| 20 | __version__ = "0.1" | 21 | __version__ = "0.1" |
| @@ -29,7 +30,7 @@ strippers = { | |||
| 29 | hachoir_parser.image.PngFile: images.PngStripper, | 30 | hachoir_parser.image.PngFile: images.PngStripper, |
| 30 | hachoir_parser.image.bmp.BmpFile: images.BmpStripper, | 31 | hachoir_parser.image.bmp.BmpFile: images.BmpStripper, |
| 31 | hachoir_parser.audio.MpegAudioFile: audio.MpegAudioStripper, | 32 | hachoir_parser.audio.MpegAudioFile: audio.MpegAudioStripper, |
| 32 | hachoir_parser.misc.PDFDocument: misc.PdfStripper, | 33 | hachoir_parser.misc.PDFDocument: office.PdfStripper, |
| 33 | hachoir_parser.archive.TarFile: archive.TarStripper, | 34 | hachoir_parser.archive.TarFile: archive.TarStripper, |
| 34 | hachoir_parser.archive.gzip_parser.GzipParser: archive.GzipStripper, | 35 | hachoir_parser.archive.gzip_parser.GzipParser: archive.GzipStripper, |
| 35 | hachoir_parser.archive.bzip2_parser.Bzip2Parser: archive.Bzip2Stripper, | 36 | hachoir_parser.archive.bzip2_parser.Bzip2Parser: archive.Bzip2Stripper, |
| @@ -61,12 +62,14 @@ def create_class_file(name, backup, add2archive): | |||
| 61 | corresponding to the filetype of the given file | 62 | corresponding to the filetype of the given file |
| 62 | ''' | 63 | ''' |
| 63 | if is_secure(name): | 64 | if is_secure(name): |
| 64 | print 'a' | ||
| 65 | return | 65 | return |
| 66 | 66 | ||
| 67 | filename = "" | 67 | filename = "" |
| 68 | realname = name | 68 | realname = name |
| 69 | filename = hachoir_core.cmd_line.unicodeFilename(name) | 69 | try: |
| 70 | filename = hachoir_core.cmd_line.unicodeFilename(name) | ||
| 71 | except TypeError:# get rid of "TypeError: decoding Unicode is not supported" | ||
| 72 | filename = name | ||
| 70 | parser = hachoir_parser.createParser(filename) | 73 | parser = hachoir_parser.createParser(filename) |
| 71 | if not parser: | 74 | if not parser: |
| 72 | logging.error("Unable to parse %s" % filename) | 75 | logging.error("Unable to parse %s" % filename) |
| @@ -82,9 +85,26 @@ def create_class_file(name, backup, add2archive): | |||
| 82 | stripper_class = strippers[editor.input.__class__] | 85 | stripper_class = strippers[editor.input.__class__] |
| 83 | except KeyError: | 86 | except KeyError: |
| 84 | #Place for another lib than hachoir | 87 | #Place for another lib than hachoir |
| 85 | logging.error("Don't have stripper for file type %s" % editor.description) | 88 | logging.error("Don't have stripper for format %s" % editor.description) |
| 86 | return | 89 | return |
| 87 | if editor.input.__class__ == hachoir_parser.misc.PDFDocument: | 90 | |
| 91 | if editor.input.__class__ == hachoir_parser.misc.PDFDocument:#pdf | ||
| 88 | return stripper_class(filename, realname, backup) | 92 | return stripper_class(filename, realname, backup) |
| 89 | return stripper_class(realname, filename, parser, editor, backup, | 93 | |
| 90 | add2archive) | 94 | elif editor.input.__class__ == hachoir_parser.archive.zip.ZipFile: |
| 95 | #zip based format | ||
| 96 | mime = mimetypes.guess_type(filename)[0] | ||
| 97 | try:#Ugly workaround, cleaning open document delete mime (wtf?) | ||
| 98 | if mime.startswith(#Open document format | ||
| 99 | 'application/vnd.oasis.opendocument'): | ||
| 100 | return office.OpenDocumentStripper(realname, filename, parser, | ||
| 101 | editor, backup, add2archive) | ||
| 102 | else:#normal zip | ||
| 103 | return stripper_class(realname, filename, parser, editor, | ||
| 104 | backup, add2archive) | ||
| 105 | except:#normal zip file | ||
| 106 | return stripper_class(realname, filename, parser, editor, backup, | ||
| 107 | add2archive) | ||
| 108 | else:#normal handling | ||
| 109 | return stripper_class(realname, filename, parser, editor, backup, | ||
| 110 | add2archive) | ||
diff --git a/lib/office.py b/lib/office.py index de38129..5d62732 100644 --- a/lib/office.py +++ b/lib/office.py | |||
| @@ -3,12 +3,98 @@ import mimetypes | |||
| 3 | import subprocess | 3 | import subprocess |
| 4 | import tempfile | 4 | import tempfile |
| 5 | import glob | 5 | import glob |
| 6 | import logging | ||
| 7 | import zipfile | ||
| 8 | import shutil | ||
| 6 | 9 | ||
| 7 | import hachoir_core | 10 | import hachoir_core |
| 8 | 11 | ||
| 9 | import pdfrw | 12 | import pdfrw |
| 10 | import mat | 13 | import mat |
| 11 | import parser | 14 | import parser |
| 15 | import archive | ||
| 16 | |||
| 17 | class OpenDocumentStripper(archive.GenericArchiveStripper): | ||
| 18 | ''' | ||
| 19 | An open document file is a zip, with xml file into. | ||
| 20 | The one that interest us is meta.xml | ||
| 21 | ''' | ||
| 22 | |||
| 23 | def remove_folder(self, folder_list): | ||
| 24 | for folder in folder_list: | ||
| 25 | dirname = folder.split('/')[0] | ||
| 26 | try: | ||
| 27 | shutil.rmtree(dirname) | ||
| 28 | except:#Some folder or open document format are buggies | ||
| 29 | pass | ||
| 30 | self.folder_list = [] | ||
| 31 | |||
| 32 | def _remove_all(self, method): | ||
| 33 | ''' | ||
| 34 | FIXME ? | ||
| 35 | There is a patch implementing the Zipfile.remove() | ||
| 36 | method here : http://bugs.python.org/issue6818 | ||
| 37 | ''' | ||
| 38 | zipin = zipfile.ZipFile(self.filename, 'r') | ||
| 39 | zipout = zipfile.ZipFile(self.filename + parser.POSTFIX, 'w', | ||
| 40 | allowZip64=True) | ||
| 41 | folder_list = [] | ||
| 42 | for item in zipin.namelist(): | ||
| 43 | if os.path.basename(item) is not item:#add folders to folder_list | ||
| 44 | folder_list.insert(0, os.path.dirname(item)) | ||
| 45 | if item.endswith('.xml') or item.startswith('manifest'): | ||
| 46 | if item != 'meta.xml':#contains the metadata | ||
| 47 | zipin.extract(item) | ||
| 48 | zipout.write(item) | ||
| 49 | mat.secure_remove(item) | ||
| 50 | elif item == 'mimetype': | ||
| 51 | zipin.extract(item) | ||
| 52 | #remove line meta.xml | ||
| 53 | zipout.write(item) | ||
| 54 | mat.secure_remove(item) | ||
| 55 | else: | ||
| 56 | zipin.extract(item) | ||
| 57 | if os.path.isfile(item): | ||
| 58 | try: | ||
| 59 | cfile = mat.create_class_file(item, False, | ||
| 60 | self.add2archive) | ||
| 61 | if method == 'normal': | ||
| 62 | cfile.remove_all() | ||
| 63 | else: | ||
| 64 | cfile.remove_all_ugly() | ||
| 65 | logging.debug('Processing %s from %s' % (item, | ||
| 66 | self.filename)) | ||
| 67 | zipout.write(item) | ||
| 68 | except: | ||
| 69 | logging.info('%s\' fileformat is not supported' % | ||
| 70 | item) | ||
| 71 | if self.add2archive: | ||
| 72 | zipout.write(item) | ||
| 73 | mat.secure_remove(item) | ||
| 74 | zipout.comment = '' | ||
| 75 | logging.info('%s treated' % self.filename) | ||
| 76 | zipin.close() | ||
| 77 | zipout.close() | ||
| 78 | self.remove_folder(folder_list) | ||
| 79 | |||
| 80 | if self.backup is False: | ||
| 81 | mat.secure_remove(self.filename) #remove the old file | ||
| 82 | os.rename(self.filename + parser.POSTFIX, self.filename) | ||
| 83 | |||
| 84 | def is_clean(self): | ||
| 85 | zipin = zipfile.ZipFile(self.filename, 'r') | ||
| 86 | try: | ||
| 87 | zipin.getinfo('meta.xml') | ||
| 88 | except KeyError:#no meta.xml in the file | ||
| 89 | zipin.close() | ||
| 90 | czf = archive.ZipStripper(self.realname, self.filename, | ||
| 91 | self.parser, self.editor, self.backup, self.add2archive) | ||
| 92 | if czf.is_clean(): | ||
| 93 | return True | ||
| 94 | else: | ||
| 95 | return False | ||
| 96 | return False | ||
| 97 | |||
| 12 | 98 | ||
| 13 | class TorrentStripper(parser.Generic_parser): | 99 | class TorrentStripper(parser.Generic_parser): |
| 14 | ''' | 100 | ''' |
