summaryrefslogtreecommitdiff
path: root/lib/archive.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/archive.py')
-rw-r--r--lib/archive.py291
1 files changed, 291 insertions, 0 deletions
diff --git a/lib/archive.py b/lib/archive.py
new file mode 100644
index 0000000..9993102
--- /dev/null
+++ b/lib/archive.py
@@ -0,0 +1,291 @@
1'''
2 Take care of archives formats
3'''
4
5import zipfile
6import shutil
7import os
8import logging
9import tempfile
10
11import parser
12import mat
13from tarfile import tarfile
14
15
16class GenericArchiveStripper(parser.GenericParser):
17 '''
18 Represent a generic archive
19 '''
20 def __init__(self, filename, parser, mime, backup, add2archive):
21 super(GenericArchiveStripper, self).__init__(filename, parser, mime,
22 backup, add2archive)
23 self.compression = ''
24 self.add2archive = add2archive
25 self.tempdir = tempfile.mkdtemp()
26
27 def __del__(self):
28 '''
29 Remove the files inside the temp dir,
30 then remove the temp dir
31 '''
32 for root, dirs, files in os.walk(self.tempdir):
33 for item in files:
34 path_file = os.path.join(root, item)
35 mat.secure_remove(path_file)
36 shutil.rmtree(self.tempdir)
37
38 def remove_all(self):
39 '''
40 Call _remove_all() with in argument : "normal"
41 '''
42 return self._remove_all('normal')
43
44 def remove_all_strict(self):
45 '''
46 call remove_all() with in argument : "strict"
47 '''
48 return self._remove_all('strict')
49
50 def _remove_all(self, method):
51 '''
52 Remove all meta, normal way if method is "normal",
53 else, use the strict way (with possible data loss)
54 '''
55 raise NotImplementedError
56
57
58class ZipStripper(GenericArchiveStripper):
59 '''
60 Represent a zip file
61 '''
62 def is_file_clean(self, fileinfo):
63 '''
64 Check if a ZipInfo object is clean of metadatas added
65 by zip itself, independently of the corresponding file metadatas
66 '''
67 if fileinfo.comment is not '':
68 return False
69 elif fileinfo.date_time is not 0:
70 return False
71 elif fileinfo.create_system is not 0:
72 return False
73 elif fileinfo.create_version is not 0:
74 return False
75 else:
76 return True
77
78 def is_clean(self):
79 '''
80 Check if the given file is clean from harmful metadata
81 '''
82 zipin = zipfile.ZipFile(self.filename, 'r')
83 if zipin.comment != '':
84 logging.debug('%s has a comment' % self.filename)
85 return False
86 for item in zipin.infolist():
87 #I have not found a way to remove the crap added by zipfile :/
88 #if not self.is_file_clean(item):
89 # logging.debug('%s from %s has compromizing zipinfo' %
90 # (item.filename, self.filename))
91 # return False
92 zipin.extract(item, self.tempdir)
93 name = os.path.join(self.tempdir, item.filename)
94 if os.path.isfile(name):
95 try:
96 cfile = mat.create_class_file(name, False,
97 self.add2archive)
98 if not cfile.is_clean():
99 return False
100 except:
101 #best solution I have found
102 logging.info('%s\'s fileformat is not supported, or is a \
103harmless format' % item.filename)
104 _, ext = os.path.splitext(name)
105 bname = os.path.basename(item.filename)
106 if ext not in parser.NOMETA:
107 if bname != 'mimetype' and bname != '.rels':
108 return False
109 zipin.close()
110 return True
111
112 def get_meta(self):
113 '''
114 Return all the metadata of a ZipFile (don't return metadatas
115 of contained files : should it ?)
116 '''
117 zipin = zipfile.ZipFile(self.filename, 'r')
118 metadata = {}
119 for field in zipin.infolist():
120 zipmeta = {}
121 zipmeta['comment'] = field.comment
122 zipmeta['modified'] = field.date_time
123 zipmeta['system'] = field.create_system
124 zipmeta['zip_version'] = field.create_version
125 metadata[field.filename] = zipmeta
126 metadata["%s comment" % self.filename] = zipin.comment
127 zipin.close()
128 return metadata
129
130 def _remove_all(self, method):
131 '''
132 So far, the zipfile module does not allow to write a ZipInfo
133 object into a zipfile (and it's a shame !) : so data added
134 by zipfile itself could not be removed. It's a big concern.
135 Is shiping a patched version of zipfile.py a good idea ?
136 '''
137 zipin = zipfile.ZipFile(self.filename, 'r')
138 zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True)
139 for item in zipin.infolist():
140 zipin.extract(item, self.tempdir)
141 name = os.path.join(self.tempdir, item.filename)
142 if os.path.isfile(name):
143 try:
144 cfile = mat.create_class_file(name, False,
145 self.add2archive)
146 if method is 'normal':
147 cfile.remove_all()
148 else:
149 cfile.remove_all_strict()
150 logging.debug('Processing %s from %s' % (item.filename,
151 self.filename))
152 zipout.write(name, item.filename)
153 except:
154 logging.info('%s\'s format is not supported or harmless' %
155 item.filename)
156 _, ext = os.path.splitext(name)
157 if self.add2archive or ext in parser.NOMETA:
158 zipout.write(name, item.filename)
159 zipout.comment = ''
160 zipin.close()
161 zipout.close()
162 logging.info('%s treated' % self.filename)
163 self.do_backup()
164 return True
165
166
167class TarStripper(GenericArchiveStripper):
168 '''
169 Represent a tarfile archive
170 '''
171 def _remove(self, current_file):
172 '''
173 remove the meta added by tar itself to the file
174 '''
175 current_file.mtime = 0
176 current_file.uid = 0
177 current_file.gid = 0
178 current_file.uname = ''
179 current_file.gname = ''
180 return current_file
181
182 def _remove_all(self, method):
183 tarin = tarfile.open(self.filename, 'r' + self.compression)
184 tarout = tarfile.open(self.output, 'w' + self.compression)
185 for item in tarin.getmembers():
186 tarin.extract(item, self.tempdir)
187 name = os.path.join(self.tempdir, item.name)
188 if item.type is '0': # is item a regular file ?
189 #no backup file
190 try:
191 cfile = mat.create_class_file(name, False,
192 self.add2archive)
193 if method is 'normal':
194 cfile.remove_all()
195 else:
196 cfile.remove_all_strict()
197 tarout.add(name, item.name, filter=self._remove)
198 except:
199 logging.info('%s\' format is not supported or harmless' %
200 item.name)
201 _, ext = os.path.splitext(name)
202 if self.add2archive or ext in parser.NOMETA:
203 tarout.add(name, item.name, filter=self._remove)
204 tarin.close()
205 tarout.close()
206 self.do_backup()
207 return True
208
209 def is_file_clean(self, current_file):
210 '''
211 Check metadatas added by tar
212 '''
213 if current_file.mtime is not 0:
214 return False
215 elif current_file.uid is not 0:
216 return False
217 elif current_file.gid is not 0:
218 return False
219 elif current_file.uname is not '':
220 return False
221 elif current_file.gname is not '':
222 return False
223 else:
224 return True
225
226 def is_clean(self):
227 '''
228 Check if the file is clean from harmful metadatas
229 '''
230 tarin = tarfile.open(self.filename, 'r' + self.compression)
231 for item in tarin.getmembers():
232 if not self.is_file_clean(item):
233 tarin.close()
234 return False
235 tarin.extract(item, self.tempdir)
236 name = os.path.join(self.tempdir, item.name)
237 if item.type is '0': # is item a regular file ?
238 try:
239 class_file = mat.create_class_file(name,
240 False, self.add2archive) # no backup file
241 if not class_file.is_clean():
242 tarin.close()
243 return False
244 except:
245 logging.error('%s\'s foramt is not supported or harmless' %
246 item.filename)
247 _, ext = os.path.splitext(name)
248 if ext not in parser.NOMETA:
249 tarin.close()
250 return False
251 tarin.close()
252 return True
253
254 def get_meta(self):
255 '''
256 Return a dict with all the meta of the file
257 '''
258 tarin = tarfile.open(self.filename, 'r' + self.compression)
259 metadata = {}
260 for current_file in tarin.getmembers():
261 if current_file.type is '0':
262 if not self.is_file_clean(current_file): # if there is meta
263 current_meta = {}
264 current_meta['mtime'] = current_file.mtime
265 current_meta['uid'] = current_file.uid
266 current_meta['gid'] = current_file.gid
267 current_meta['uname'] = current_file.uname
268 current_meta['gname'] = current_file.gname
269 metadata[current_file.name] = current_meta
270 tarin.close()
271 return metadata
272
273
274class GzipStripper(TarStripper):
275 '''
276 Represent a tar.gz archive
277 '''
278 def __init__(self, filename, parser, mime, backup, add2archive):
279 super(GzipStripper, self).__init__(filename, parser, mime, backup,
280 add2archive)
281 self.compression = ':gz'
282
283
284class Bzip2Stripper(TarStripper):
285 '''
286 Represents a tar.bz2 archive
287 '''
288 def __init__(self, filename, parser, mime, backup, add2archive):
289 super(Bzip2Stripper, self).__init__(filename, parser, mime, backup,
290 add2archive)
291 self.compression = ':bz2'