summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorjvoisin2011-07-24 02:30:50 +0200
committerjvoisin2011-07-24 02:30:50 +0200
commitace3d8213921a9308d30afc057fc21221420e12e (patch)
treebe506ef8b0534127ad080f7d1ad7d4ca9a020a67 /lib
parentbcc0ad2e7491c212ef35ca250fb8c5f2c53572da (diff)
First implementation of open document format
Diffstat (limited to 'lib')
-rw-r--r--lib/mat.py36
-rw-r--r--lib/office.py86
2 files changed, 114 insertions, 8 deletions
diff --git a/lib/mat.py b/lib/mat.py
index 2903ed9..5dcdbc2 100644
--- a/lib/mat.py
+++ b/lib/mat.py
@@ -7,6 +7,7 @@
7import os 7import os
8import subprocess 8import subprocess
9import logging 9import logging
10import mimetypes
10 11
11import hachoir_core.cmd_line 12import hachoir_core.cmd_line
12import hachoir_parser 13import hachoir_parser
@@ -14,7 +15,7 @@ import hachoir_editor
14 15
15import images 16import images
16import audio 17import audio
17import misc 18import office
18import archive 19import archive
19 20
20__version__ = "0.1" 21__version__ = "0.1"
@@ -29,7 +30,7 @@ strippers = {
29 hachoir_parser.image.PngFile: images.PngStripper, 30 hachoir_parser.image.PngFile: images.PngStripper,
30 hachoir_parser.image.bmp.BmpFile: images.BmpStripper, 31 hachoir_parser.image.bmp.BmpFile: images.BmpStripper,
31 hachoir_parser.audio.MpegAudioFile: audio.MpegAudioStripper, 32 hachoir_parser.audio.MpegAudioFile: audio.MpegAudioStripper,
32 hachoir_parser.misc.PDFDocument: misc.PdfStripper, 33 hachoir_parser.misc.PDFDocument: office.PdfStripper,
33 hachoir_parser.archive.TarFile: archive.TarStripper, 34 hachoir_parser.archive.TarFile: archive.TarStripper,
34 hachoir_parser.archive.gzip_parser.GzipParser: archive.GzipStripper, 35 hachoir_parser.archive.gzip_parser.GzipParser: archive.GzipStripper,
35 hachoir_parser.archive.bzip2_parser.Bzip2Parser: archive.Bzip2Stripper, 36 hachoir_parser.archive.bzip2_parser.Bzip2Parser: archive.Bzip2Stripper,
@@ -61,12 +62,14 @@ def create_class_file(name, backup, add2archive):
61 corresponding to the filetype of the given file 62 corresponding to the filetype of the given file
62 ''' 63 '''
63 if is_secure(name): 64 if is_secure(name):
64 print 'a'
65 return 65 return
66 66
67 filename = "" 67 filename = ""
68 realname = name 68 realname = name
69 filename = hachoir_core.cmd_line.unicodeFilename(name) 69 try:
70 filename = hachoir_core.cmd_line.unicodeFilename(name)
71 except TypeError:# get rid of "TypeError: decoding Unicode is not supported"
72 filename = name
70 parser = hachoir_parser.createParser(filename) 73 parser = hachoir_parser.createParser(filename)
71 if not parser: 74 if not parser:
72 logging.error("Unable to parse %s" % filename) 75 logging.error("Unable to parse %s" % filename)
@@ -82,9 +85,26 @@ def create_class_file(name, backup, add2archive):
82 stripper_class = strippers[editor.input.__class__] 85 stripper_class = strippers[editor.input.__class__]
83 except KeyError: 86 except KeyError:
84 #Place for another lib than hachoir 87 #Place for another lib than hachoir
85 logging.error("Don't have stripper for file type %s" % editor.description) 88 logging.error("Don't have stripper for format %s" % editor.description)
86 return 89 return
87 if editor.input.__class__ == hachoir_parser.misc.PDFDocument: 90
91 if editor.input.__class__ == hachoir_parser.misc.PDFDocument:#pdf
88 return stripper_class(filename, realname, backup) 92 return stripper_class(filename, realname, backup)
89 return stripper_class(realname, filename, parser, editor, backup, 93
90 add2archive) 94 elif editor.input.__class__ == hachoir_parser.archive.zip.ZipFile:
95 #zip based format
96 mime = mimetypes.guess_type(filename)[0]
97 try:#Ugly workaround, cleaning open document delete mime (wtf?)
98 if mime.startswith(#Open document format
99 'application/vnd.oasis.opendocument'):
100 return office.OpenDocumentStripper(realname, filename, parser,
101 editor, backup, add2archive)
102 else:#normal zip
103 return stripper_class(realname, filename, parser, editor,
104 backup, add2archive)
105 except:#normal zip file
106 return stripper_class(realname, filename, parser, editor, backup,
107 add2archive)
108 else:#normal handling
109 return stripper_class(realname, filename, parser, editor, backup,
110 add2archive)
diff --git a/lib/office.py b/lib/office.py
index de38129..5d62732 100644
--- a/lib/office.py
+++ b/lib/office.py
@@ -3,12 +3,98 @@ import mimetypes
3import subprocess 3import subprocess
4import tempfile 4import tempfile
5import glob 5import glob
6import logging
7import zipfile
8import shutil
6 9
7import hachoir_core 10import hachoir_core
8 11
9import pdfrw 12import pdfrw
10import mat 13import mat
11import parser 14import parser
15import archive
16
17class OpenDocumentStripper(archive.GenericArchiveStripper):
18 '''
19 An open document file is a zip, with xml file into.
20 The one that interest us is meta.xml
21 '''
22
23 def remove_folder(self, folder_list):
24 for folder in folder_list:
25 dirname = folder.split('/')[0]
26 try:
27 shutil.rmtree(dirname)
28 except:#Some folder or open document format are buggies
29 pass
30 self.folder_list = []
31
32 def _remove_all(self, method):
33 '''
34 FIXME ?
35 There is a patch implementing the Zipfile.remove()
36 method here : http://bugs.python.org/issue6818
37 '''
38 zipin = zipfile.ZipFile(self.filename, 'r')
39 zipout = zipfile.ZipFile(self.filename + parser.POSTFIX, 'w',
40 allowZip64=True)
41 folder_list = []
42 for item in zipin.namelist():
43 if os.path.basename(item) is not item:#add folders to folder_list
44 folder_list.insert(0, os.path.dirname(item))
45 if item.endswith('.xml') or item.startswith('manifest'):
46 if item != 'meta.xml':#contains the metadata
47 zipin.extract(item)
48 zipout.write(item)
49 mat.secure_remove(item)
50 elif item == 'mimetype':
51 zipin.extract(item)
52 #remove line meta.xml
53 zipout.write(item)
54 mat.secure_remove(item)
55 else:
56 zipin.extract(item)
57 if os.path.isfile(item):
58 try:
59 cfile = mat.create_class_file(item, False,
60 self.add2archive)
61 if method == 'normal':
62 cfile.remove_all()
63 else:
64 cfile.remove_all_ugly()
65 logging.debug('Processing %s from %s' % (item,
66 self.filename))
67 zipout.write(item)
68 except:
69 logging.info('%s\' fileformat is not supported' %
70 item)
71 if self.add2archive:
72 zipout.write(item)
73 mat.secure_remove(item)
74 zipout.comment = ''
75 logging.info('%s treated' % self.filename)
76 zipin.close()
77 zipout.close()
78 self.remove_folder(folder_list)
79
80 if self.backup is False:
81 mat.secure_remove(self.filename) #remove the old file
82 os.rename(self.filename + parser.POSTFIX, self.filename)
83
84 def is_clean(self):
85 zipin = zipfile.ZipFile(self.filename, 'r')
86 try:
87 zipin.getinfo('meta.xml')
88 except KeyError:#no meta.xml in the file
89 zipin.close()
90 czf = archive.ZipStripper(self.realname, self.filename,
91 self.parser, self.editor, self.backup, self.add2archive)
92 if czf.is_clean():
93 return True
94 else:
95 return False
96 return False
97
12 98
13class TorrentStripper(parser.Generic_parser): 99class TorrentStripper(parser.Generic_parser):
14 ''' 100 '''