summaryrefslogtreecommitdiff
path: root/MAT/archive.py
diff options
context:
space:
mode:
Diffstat (limited to 'MAT/archive.py')
-rw-r--r--MAT/archive.py272
1 files changed, 272 insertions, 0 deletions
diff --git a/MAT/archive.py b/MAT/archive.py
new file mode 100644
index 0000000..a749b29
--- /dev/null
+++ b/MAT/archive.py
@@ -0,0 +1,272 @@
1'''
2 Take care of archives formats
3'''
4
5import zipfile
6import shutil
7import os
8import logging
9import tempfile
10
11import parser
12import mat
13from tarfile import tarfile
14
15
16class GenericArchiveStripper(parser.GenericParser):
17 '''
18 Represent a generic archive
19 '''
20 def __init__(self, filename, parser, mime, backup, add2archive):
21 super(GenericArchiveStripper, self).__init__(filename, parser, mime,
22 backup, add2archive)
23 self.compression = ''
24 self.add2archive = add2archive
25 self.tempdir = tempfile.mkdtemp()
26
27 def __del__(self):
28 '''
29 Remove the files inside the temp dir,
30 then remove the temp dir
31 '''
32 for root, dirs, files in os.walk(self.tempdir):
33 for item in files:
34 path_file = os.path.join(root, item)
35 mat.secure_remove(path_file)
36 shutil.rmtree(self.tempdir)
37
38 def remove_all(self):
39 return self._remove_all()
40
41 def _remove_all(self):
42 raise NotImplementedError
43
44
45class ZipStripper(GenericArchiveStripper):
46 '''
47 Represent a zip file
48 '''
49 def is_file_clean(self, fileinfo):
50 '''
51 Check if a ZipInfo object is clean of metadatas added
52 by zip itself, independently of the corresponding file metadatas
53 '''
54 if fileinfo.comment is not '':
55 return False
56 elif fileinfo.date_time is not 0:
57 return False
58 elif fileinfo.create_system is not 0:
59 return False
60 elif fileinfo.create_version is not 0:
61 return False
62 else:
63 return True
64
65 def is_clean(self):
66 '''
67 Check if the given file is clean from harmful metadata
68 '''
69 zipin = zipfile.ZipFile(self.filename, 'r')
70 if zipin.comment != '':
71 logging.debug('%s has a comment' % self.filename)
72 return False
73 for item in zipin.infolist():
74 #I have not found a way to remove the crap added by zipfile :/
75 #if not self.is_file_clean(item):
76 # logging.debug('%s from %s has compromizing zipinfo' %
77 # (item.filename, self.filename))
78 # return False
79 zipin.extract(item, self.tempdir)
80 name = os.path.join(self.tempdir, item.filename)
81 if os.path.isfile(name):
82 try:
83 cfile = mat.create_class_file(name, False,
84 self.add2archive)
85 if not cfile.is_clean():
86 return False
87 except:
88 #best solution I have found
89 logging.info('%s\'s fileformat is not supported, or is a \
90harmless format' % item.filename)
91 _, ext = os.path.splitext(name)
92 bname = os.path.basename(item.filename)
93 if ext not in parser.NOMETA:
94 if bname != 'mimetype' and bname != '.rels':
95 return False
96 zipin.close()
97 return True
98
99 def get_meta(self):
100 '''
101 Return all the metadata of a ZipFile (don't return metadatas
102 of contained files : should it ?)
103 '''
104 zipin = zipfile.ZipFile(self.filename, 'r')
105 metadata = {}
106 for field in zipin.infolist():
107 zipmeta = {}
108 zipmeta['comment'] = field.comment
109 zipmeta['modified'] = field.date_time
110 zipmeta['system'] = field.create_system
111 zipmeta['zip_version'] = field.create_version
112 metadata[field.filename] = zipmeta
113 metadata["%s comment" % self.filename] = zipin.comment
114 zipin.close()
115 return metadata
116
117 def _remove_all(self):
118 '''
119 So far, the zipfile module does not allow to write a ZipInfo
120 object into a zipfile (and it's a shame !) : so data added
121 by zipfile itself could not be removed. It's a big concern.
122 Is shiping a patched version of zipfile.py a good idea ?
123 '''
124 zipin = zipfile.ZipFile(self.filename, 'r')
125 zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True)
126 for item in zipin.infolist():
127 zipin.extract(item, self.tempdir)
128 name = os.path.join(self.tempdir, item.filename)
129 if os.path.isfile(name):
130 try:
131 cfile = mat.create_class_file(name, False,
132 self.add2archive)
133 cfile.remove_all()
134 logging.debug('Processing %s from %s' % (item.filename,
135 self.filename))
136 zipout.write(name, item.filename)
137 except:
138 logging.info('%s\'s format is not supported or harmless' %
139 item.filename)
140 _, ext = os.path.splitext(name)
141 if self.add2archive or ext in parser.NOMETA:
142 zipout.write(name, item.filename)
143 zipout.comment = ''
144 zipin.close()
145 zipout.close()
146 logging.info('%s treated' % self.filename)
147 self.do_backup()
148 return True
149
150
151class TarStripper(GenericArchiveStripper):
152 '''
153 Represent a tarfile archive
154 '''
155 def _remove(self, current_file):
156 '''
157 remove the meta added by tar itself to the file
158 '''
159 current_file.mtime = 0
160 current_file.uid = 0
161 current_file.gid = 0
162 current_file.uname = ''
163 current_file.gname = ''
164 return current_file
165
166 def _remove_all(self):
167 tarin = tarfile.open(self.filename, 'r' + self.compression)
168 tarout = tarfile.open(self.output, 'w' + self.compression)
169 for item in tarin.getmembers():
170 tarin.extract(item, self.tempdir)
171 name = os.path.join(self.tempdir, item.name)
172 if item.type is '0': # is item a regular file ?
173 #no backup file
174 try:
175 cfile = mat.create_class_file(name, False,
176 self.add2archive)
177 cfile.remove_all()
178 tarout.add(name, item.name, filter=self._remove)
179 except:
180 logging.info('%s\' format is not supported or harmless' %
181 item.name)
182 _, ext = os.path.splitext(name)
183 if self.add2archive or ext in parser.NOMETA:
184 tarout.add(name, item.name, filter=self._remove)
185 tarin.close()
186 tarout.close()
187 self.do_backup()
188 return True
189
190 def is_file_clean(self, current_file):
191 '''
192 Check metadatas added by tar
193 '''
194 if current_file.mtime is not 0:
195 return False
196 elif current_file.uid is not 0:
197 return False
198 elif current_file.gid is not 0:
199 return False
200 elif current_file.uname is not '':
201 return False
202 elif current_file.gname is not '':
203 return False
204 else:
205 return True
206
207 def is_clean(self):
208 '''
209 Check if the file is clean from harmful metadatas
210 '''
211 tarin = tarfile.open(self.filename, 'r' + self.compression)
212 for item in tarin.getmembers():
213 if not self.is_file_clean(item):
214 tarin.close()
215 return False
216 tarin.extract(item, self.tempdir)
217 name = os.path.join(self.tempdir, item.name)
218 if item.type is '0': # is item a regular file ?
219 try:
220 class_file = mat.create_class_file(name,
221 False, self.add2archive) # no backup file
222 if not class_file.is_clean():
223 tarin.close()
224 return False
225 except:
226 logging.error('%s\'s foramt is not supported or harmless' %
227 item.filename)
228 _, ext = os.path.splitext(name)
229 if ext not in parser.NOMETA:
230 tarin.close()
231 return False
232 tarin.close()
233 return True
234
235 def get_meta(self):
236 '''
237 Return a dict with all the meta of the file
238 '''
239 tarin = tarfile.open(self.filename, 'r' + self.compression)
240 metadata = {}
241 for current_file in tarin.getmembers():
242 if current_file.type is '0':
243 if not self.is_file_clean(current_file): # if there is meta
244 current_meta = {}
245 current_meta['mtime'] = current_file.mtime
246 current_meta['uid'] = current_file.uid
247 current_meta['gid'] = current_file.gid
248 current_meta['uname'] = current_file.uname
249 current_meta['gname'] = current_file.gname
250 metadata[current_file.name] = current_meta
251 tarin.close()
252 return metadata
253
254
255class GzipStripper(TarStripper):
256 '''
257 Represent a tar.gz archive
258 '''
259 def __init__(self, filename, parser, mime, backup, add2archive):
260 super(GzipStripper, self).__init__(filename, parser, mime, backup,
261 add2archive)
262 self.compression = ':gz'
263
264
265class Bzip2Stripper(TarStripper):
266 '''
267 Represents a tar.bz2 archive
268 '''
269 def __init__(self, filename, parser, mime, backup, add2archive):
270 super(Bzip2Stripper, self).__init__(filename, parser, mime, backup,
271 add2archive)
272 self.compression = ':bz2'