summaryrefslogtreecommitdiff
path: root/lib/office.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/office.py')
-rw-r--r--lib/office.py86
1 files changed, 86 insertions, 0 deletions
diff --git a/lib/office.py b/lib/office.py
index de38129..5d62732 100644
--- a/lib/office.py
+++ b/lib/office.py
@@ -3,12 +3,98 @@ import mimetypes
3import subprocess 3import subprocess
4import tempfile 4import tempfile
5import glob 5import glob
6import logging
7import zipfile
8import shutil
6 9
7import hachoir_core 10import hachoir_core
8 11
9import pdfrw 12import pdfrw
10import mat 13import mat
11import parser 14import parser
15import archive
16
17class OpenDocumentStripper(archive.GenericArchiveStripper):
18 '''
19 An open document file is a zip, with xml file into.
20 The one that interest us is meta.xml
21 '''
22
23 def remove_folder(self, folder_list):
24 for folder in folder_list:
25 dirname = folder.split('/')[0]
26 try:
27 shutil.rmtree(dirname)
28 except:#Some folder or open document format are buggies
29 pass
30 self.folder_list = []
31
32 def _remove_all(self, method):
33 '''
34 FIXME ?
35 There is a patch implementing the Zipfile.remove()
36 method here : http://bugs.python.org/issue6818
37 '''
38 zipin = zipfile.ZipFile(self.filename, 'r')
39 zipout = zipfile.ZipFile(self.filename + parser.POSTFIX, 'w',
40 allowZip64=True)
41 folder_list = []
42 for item in zipin.namelist():
43 if os.path.basename(item) is not item:#add folders to folder_list
44 folder_list.insert(0, os.path.dirname(item))
45 if item.endswith('.xml') or item.startswith('manifest'):
46 if item != 'meta.xml':#contains the metadata
47 zipin.extract(item)
48 zipout.write(item)
49 mat.secure_remove(item)
50 elif item == 'mimetype':
51 zipin.extract(item)
52 #remove line meta.xml
53 zipout.write(item)
54 mat.secure_remove(item)
55 else:
56 zipin.extract(item)
57 if os.path.isfile(item):
58 try:
59 cfile = mat.create_class_file(item, False,
60 self.add2archive)
61 if method == 'normal':
62 cfile.remove_all()
63 else:
64 cfile.remove_all_ugly()
65 logging.debug('Processing %s from %s' % (item,
66 self.filename))
67 zipout.write(item)
68 except:
69 logging.info('%s\' fileformat is not supported' %
70 item)
71 if self.add2archive:
72 zipout.write(item)
73 mat.secure_remove(item)
74 zipout.comment = ''
75 logging.info('%s treated' % self.filename)
76 zipin.close()
77 zipout.close()
78 self.remove_folder(folder_list)
79
80 if self.backup is False:
81 mat.secure_remove(self.filename) #remove the old file
82 os.rename(self.filename + parser.POSTFIX, self.filename)
83
84 def is_clean(self):
85 zipin = zipfile.ZipFile(self.filename, 'r')
86 try:
87 zipin.getinfo('meta.xml')
88 except KeyError:#no meta.xml in the file
89 zipin.close()
90 czf = archive.ZipStripper(self.realname, self.filename,
91 self.parser, self.editor, self.backup, self.add2archive)
92 if czf.is_clean():
93 return True
94 else:
95 return False
96 return False
97
12 98
13class TorrentStripper(parser.Generic_parser): 99class TorrentStripper(parser.Generic_parser):
14 ''' 100 '''