summaryrefslogtreecommitdiff
path: root/libmat/archive.py
diff options
context:
space:
mode:
authorjvoisin2015-07-25 17:14:23 +0200
committerjvoisin2015-07-25 17:14:23 +0200
commit6ba3e3f20d7d52895bc44f9fc35b068cfce47133 (patch)
tree15df2aca17d56d941c6376ef729e0c1fea4c396f /libmat/archive.py
parent85e6279d16af063e5150c7cf4bd491185b8ae788 (diff)
_MASSIVE_ pep8 revamp
Thank you so much PyCharm
Diffstat (limited to 'libmat/archive.py')
-rw-r--r--libmat/archive.py128
1 files changed, 72 insertions, 56 deletions
diff --git a/libmat/archive.py b/libmat/archive.py
index d483dcc..4c62dc8 100644
--- a/libmat/archive.py
+++ b/libmat/archive.py
@@ -1,5 +1,5 @@
1''' Take care of archives formats 1""" Take care of archives formats
2''' 2"""
3 3
4import datetime 4import datetime
5import logging 5import logging
@@ -16,23 +16,24 @@ import parser
16# Zip files do not support dates older than 01/01/1980 16# Zip files do not support dates older than 01/01/1980
17ZIP_EPOCH = (1980, 1, 1, 0, 0, 0) 17ZIP_EPOCH = (1980, 1, 1, 0, 0, 0)
18ZIP_EPOCH_SECONDS = (datetime.datetime(1980, 1, 1, 0, 0, 0) 18ZIP_EPOCH_SECONDS = (datetime.datetime(1980, 1, 1, 0, 0, 0)
19 - datetime.datetime(1970, 1, 1, 1, 0, 0)).total_seconds() 19 - datetime.datetime(1970, 1, 1, 1, 0, 0)).total_seconds()
20 20
21 21
22class GenericArchiveStripper(parser.GenericParser): 22class GenericArchiveStripper(parser.GenericParser):
23 ''' Represent a generic archive 23 """ Represent a generic archive
24 ''' 24 """
25
25 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): 26 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs):
26 super(GenericArchiveStripper, self).__init__(filename, 27 super(GenericArchiveStripper, self).__init__(filename,
27 parser, mime, backup, is_writable, **kwargs) 28 parser, mime, backup, is_writable, **kwargs)
28 self.compression = '' 29 self.compression = ''
29 self.add2archive = kwargs['add2archive'] 30 self.add2archive = kwargs['add2archive']
30 self.tempdir = tempfile.mkdtemp() 31 self.tempdir = tempfile.mkdtemp()
31 32
32 def __del__(self): 33 def __del__(self):
33 ''' Remove the files inside the temp dir, 34 """ Remove the files inside the temp dir,
34 then remove the temp dir 35 then remove the temp dir
35 ''' 36 """
36 for root, dirs, files in os.walk(self.tempdir): 37 for root, dirs, files in os.walk(self.tempdir):
37 for item in files: 38 for item in files:
38 path_file = os.path.join(root, item) 39 path_file = os.path.join(root, item)
@@ -40,28 +41,30 @@ class GenericArchiveStripper(parser.GenericParser):
40 shutil.rmtree(self.tempdir) 41 shutil.rmtree(self.tempdir)
41 42
42 def is_clean(self, list_unsupported=False): 43 def is_clean(self, list_unsupported=False):
43 ''' Virtual method to check for harmul metadata 44 """ Virtual method to check for harmul metadata
44 ''' 45 """
45 raise NotImplementedError 46 raise NotImplementedError
46 47
47 def list_unsupported(self): 48 def list_unsupported(self):
48 ''' Get a list of every non-supported files present in the archive 49 """ Get a list of every non-supported files present in the archive
49 ''' 50 """
50 return self.is_clean(list_unsupported=True) 51 return self.is_clean(list_unsupported=True)
51 52
52 def remove_all(self): 53 def remove_all(self):
53 ''' Virtual method to remove all metadata 54 """ Virtual method to remove all metadata
54 ''' 55 """
55 raise NotImplementedError 56 raise NotImplementedError
56 57
57 58
58class ZipStripper(GenericArchiveStripper): 59class ZipStripper(GenericArchiveStripper):
59 ''' Represent a zip file 60 """ Represent a zip file
60 ''' 61 """
61 def __is_zipfile_clean(self, fileinfo): 62
62 ''' Check if a ZipInfo object is clean of metadata added 63 @staticmethod
64 def __is_zipfile_clean(fileinfo):
65 """ Check if a ZipInfo object is clean of metadata added
63 by zip itself, independently of the corresponding file metadata 66 by zip itself, independently of the corresponding file metadata
64 ''' 67 """
65 if fileinfo.comment != '': 68 if fileinfo.comment != '':
66 return False 69 return False
67 elif fileinfo.date_time != ZIP_EPOCH: 70 elif fileinfo.date_time != ZIP_EPOCH:
@@ -71,11 +74,11 @@ class ZipStripper(GenericArchiveStripper):
71 return True 74 return True
72 75
73 def is_clean(self, list_unsupported=False): 76 def is_clean(self, list_unsupported=False):
74 ''' Check if the given file is clean from harmful metadata 77 """ Check if the given file is clean from harmful metadata
75 When list_unsupported is True, the method returns a list 78 When list_unsupported is True, the method returns a list
76 of all non-supported/archives files contained in the 79 of all non-supported/archives files contained in the
77 archive. 80 archive.
78 ''' 81 """
79 ret_list = [] 82 ret_list = []
80 zipin = zipfile.ZipFile(self.filename, 'r') 83 zipin = zipfile.ZipFile(self.filename, 'r')
81 if zipin.comment != '' and not list_unsupported: 84 if zipin.comment != '' and not list_unsupported:
@@ -86,7 +89,7 @@ class ZipStripper(GenericArchiveStripper):
86 path = os.path.join(self.tempdir, item.filename) 89 path = os.path.join(self.tempdir, item.filename)
87 if not self.__is_zipfile_clean(item) and not list_unsupported: 90 if not self.__is_zipfile_clean(item) and not list_unsupported:
88 logging.debug('%s from %s has compromising zipinfo' % 91 logging.debug('%s from %s has compromising zipinfo' %
89 (item.filename, self.filename)) 92 (item.filename, self.filename))
90 return False 93 return False
91 if os.path.isfile(path): 94 if os.path.isfile(path):
92 cfile = mat.create_class_file(path, False, add2archive=self.add2archive) 95 cfile = mat.create_class_file(path, False, add2archive=self.add2archive)
@@ -97,7 +100,7 @@ class ZipStripper(GenericArchiveStripper):
97 return False 100 return False
98 else: 101 else:
99 logging.info('%s\'s fileformat is not supported or harmless.' 102 logging.info('%s\'s fileformat is not supported or harmless.'
100 % item.filename) 103 % item.filename)
101 basename, ext = os.path.splitext(path) 104 basename, ext = os.path.splitext(path)
102 if os.path.basename(item.filename) not in ('mimetype', '.rels'): 105 if os.path.basename(item.filename) not in ('mimetype', '.rels'):
103 if ext not in parser.NOMETA: 106 if ext not in parser.NOMETA:
@@ -110,7 +113,7 @@ class ZipStripper(GenericArchiveStripper):
110 return True 113 return True
111 114
112 def get_meta(self): 115 def get_meta(self):
113 ''' Return all the metadata of a zip archive''' 116 """ Return all the metadata of a zip archive"""
114 zipin = zipfile.ZipFile(self.filename, 'r') 117 zipin = zipfile.ZipFile(self.filename, 'r')
115 metadata = {} 118 metadata = {}
116 if zipin.comment != '': 119 if zipin.comment != '':
@@ -129,13 +132,14 @@ class ZipStripper(GenericArchiveStripper):
129 metadata[item.filename] = str(cfile_meta) 132 metadata[item.filename] = str(cfile_meta)
130 else: 133 else:
131 logging.info('%s\'s fileformat is not supported or harmless' 134 logging.info('%s\'s fileformat is not supported or harmless'
132 % item.filename) 135 % item.filename)
133 zipin.close() 136 zipin.close()
134 return metadata 137 return metadata
135 138
136 def __get_zipinfo_meta(self, zipinfo): 139 @staticmethod
137 ''' Return all the metadata of a ZipInfo 140 def __get_zipinfo_meta(zipinfo):
138 ''' 141 """ Return all the metadata of a ZipInfo
142 """
139 metadata = {} 143 metadata = {}
140 if zipinfo.comment != '': 144 if zipinfo.comment != '':
141 metadata['comment'] = zipinfo.comment 145 metadata['comment'] = zipinfo.comment
@@ -145,13 +149,19 @@ class ZipStripper(GenericArchiveStripper):
145 metadata['system'] = "windows" if zipinfo.create_system == 2 else "unknown" 149 metadata['system'] = "windows" if zipinfo.create_system == 2 else "unknown"
146 return metadata 150 return metadata
147 151
148 def remove_all(self, whitelist=[], beginning_blacklist=[], ending_blacklist=[]): 152 def remove_all(self, whitelist=None, beginning_blacklist=None, ending_blacklist=None):
149 ''' Remove all metadata from a zip archive, even thoses 153 """ Remove all metadata from a zip archive, even thoses
150 added by Python's zipfile itself. It will not add 154 added by Python's zipfile itself. It will not add
151 files starting with "begining_blacklist", or ending with 155 files starting with "begining_blacklist", or ending with
152 "ending_blacklist". This method also add files present in 156 "ending_blacklist". This method also add files present in
153 whitelist to the archive. 157 whitelist to the archive.
154 ''' 158 """
159 if not ending_blacklist:
160 ending_blacklist = []
161 if not beginning_blacklist:
162 beginning_blacklist = []
163 if not whitelist:
164 whitelist = []
155 zipin = zipfile.ZipFile(self.filename, 'r') 165 zipin = zipfile.ZipFile(self.filename, 'r')
156 zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True) 166 zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True)
157 for item in zipin.infolist(): 167 for item in zipin.infolist():
@@ -166,7 +176,7 @@ class ZipStripper(GenericArchiveStripper):
166 if cfile is not None: 176 if cfile is not None:
167 # Handle read-only files inside archive 177 # Handle read-only files inside archive
168 old_stat = os.stat(path).st_mode 178 old_stat = os.stat(path).st_mode
169 os.chmod(path, old_stat|stat.S_IWUSR) 179 os.chmod(path, old_stat | stat.S_IWUSR)
170 cfile.remove_all() 180 cfile.remove_all()
171 os.chmod(path, old_stat) 181 os.chmod(path, old_stat)
172 logging.debug('Processing %s from %s' % (item.filename, self.filename)) 182 logging.debug('Processing %s from %s' % (item.filename, self.filename))
@@ -186,11 +196,12 @@ class ZipStripper(GenericArchiveStripper):
186 196
187 197
188class TarStripper(GenericArchiveStripper): 198class TarStripper(GenericArchiveStripper):
189 ''' Represent a tarfile archive 199 """ Represent a tarfile archive
190 ''' 200 """
201
191 def _remove(self, current_file): 202 def _remove(self, current_file):
192 ''' Remove the meta added by tarfile itself to the file 203 """ Remove the meta added by tarfile itself to the file
193 ''' 204 """
194 current_file.mtime = 0 205 current_file.mtime = 0
195 current_file.uid = 0 206 current_file.uid = 0
196 current_file.gid = 0 207 current_file.gid = 0
@@ -198,11 +209,13 @@ class TarStripper(GenericArchiveStripper):
198 current_file.gname = '' 209 current_file.gname = ''
199 return current_file 210 return current_file
200 211
201 def remove_all(self, whitelist=[]): 212 def remove_all(self, whitelist=None):
202 ''' Remove all harmful metadata from the tarfile. 213 """ Remove all harmful metadata from the tarfile.
203 The method will also add every files matching 214 The method will also add every files matching
204 whitelist in the produced archive. 215 whitelist in the produced archive.
205 ''' 216 """
217 if not whitelist:
218 whitelist = []
206 tarin = tarfile.open(self.filename, 'r' + self.compression, encoding='utf-8') 219 tarin = tarfile.open(self.filename, 'r' + self.compression, encoding='utf-8')
207 tarout = tarfile.open(self.output, 'w' + self.compression, encoding='utf-8') 220 tarout = tarfile.open(self.output, 'w' + self.compression, encoding='utf-8')
208 for item in tarin.getmembers(): 221 for item in tarin.getmembers():
@@ -213,14 +226,14 @@ class TarStripper(GenericArchiveStripper):
213 if cfile is not None: 226 if cfile is not None:
214 # Handle read-only files inside archive 227 # Handle read-only files inside archive
215 old_stat = os.stat(path).st_mode 228 old_stat = os.stat(path).st_mode
216 os.chmod(path, old_stat|stat.S_IWUSR) 229 os.chmod(path, old_stat | stat.S_IWUSR)
217 cfile.remove_all() 230 cfile.remove_all()
218 os.chmod(path, old_stat) 231 os.chmod(path, old_stat)
219 elif self.add2archive or os.path.splitext(item.name)[1] in parser.NOMETA: 232 elif self.add2archive or os.path.splitext(item.name)[1] in parser.NOMETA:
220 logging.debug('%s\' format is either not supported or harmless' % item.name) 233 logging.debug('%s\' format is either not supported or harmless' % item.name)
221 elif item.name in whitelist: 234 elif item.name in whitelist:
222 logging.debug('%s is not supported, but MAT was told to add it anyway.' 235 logging.debug('%s is not supported, but MAT was told to add it anyway.'
223 % item.name) 236 % item.name)
224 else: # Don't add the file to the archive 237 else: # Don't add the file to the archive
225 logging.debug('%s will not be added' % item.name) 238 logging.debug('%s will not be added' % item.name)
226 continue 239 continue
@@ -230,9 +243,10 @@ class TarStripper(GenericArchiveStripper):
230 self.do_backup() 243 self.do_backup()
231 return True 244 return True
232 245
233 def is_file_clean(self, current_file): 246 @staticmethod
234 ''' Check metadatas added by tarfile 247 def is_file_clean(current_file):
235 ''' 248 """ Check metadatas added by tarfile
249 """
236 if current_file.mtime != 0: 250 if current_file.mtime != 0:
237 return False 251 return False
238 elif current_file.uid != 0: 252 elif current_file.uid != 0:
@@ -246,17 +260,17 @@ class TarStripper(GenericArchiveStripper):
246 return True 260 return True
247 261
248 def is_clean(self, list_unsupported=False): 262 def is_clean(self, list_unsupported=False):
249 ''' Check if the file is clean from harmful metadatas 263 """ Check if the file is clean from harmful metadatas
250 When list_unsupported is True, the method returns a list 264 When list_unsupported is True, the method returns a list
251 of all non-supported/archives files contained in the 265 of all non-supported/archives files contained in the
252 archive. 266 archive.
253 ''' 267 """
254 ret_list = [] 268 ret_list = []
255 tarin = tarfile.open(self.filename, 'r' + self.compression) 269 tarin = tarfile.open(self.filename, 'r' + self.compression)
256 for item in tarin.getmembers(): 270 for item in tarin.getmembers():
257 if not self.is_file_clean(item) and not list_unsupported: 271 if not self.is_file_clean(item) and not list_unsupported:
258 logging.debug('%s from %s has compromising tarinfo' % 272 logging.debug('%s from %s has compromising tarinfo' %
259 (item.name, self.filename)) 273 (item.name, self.filename))
260 return False 274 return False
261 tarin.extract(item, self.tempdir) 275 tarin.extract(item, self.tempdir)
262 path = os.path.join(self.tempdir, item.name) 276 path = os.path.join(self.tempdir, item.name)
@@ -265,7 +279,7 @@ class TarStripper(GenericArchiveStripper):
265 if cfile is not None: 279 if cfile is not None:
266 if not cfile.is_clean(): 280 if not cfile.is_clean():
267 logging.debug('%s from %s has metadata' % 281 logging.debug('%s from %s has metadata' %
268 (item.name.decode("utf8"), self.filename)) 282 (item.name.decode("utf8"), self.filename))
269 if not list_unsupported: 283 if not list_unsupported:
270 return False 284 return False
271 # Nested archives are treated like unsupported files 285 # Nested archives are treated like unsupported files
@@ -283,8 +297,8 @@ class TarStripper(GenericArchiveStripper):
283 return True 297 return True
284 298
285 def get_meta(self): 299 def get_meta(self):
286 ''' Return a dict with all the meta of the tarfile 300 """ Return a dict with all the meta of the tarfile
287 ''' 301 """
288 tarin = tarfile.open(self.filename, 'r' + self.compression) 302 tarin = tarfile.open(self.filename, 'r' + self.compression)
289 metadata = {} 303 metadata = {}
290 for item in tarin.getmembers(): 304 for item in tarin.getmembers():
@@ -312,24 +326,26 @@ class TarStripper(GenericArchiveStripper):
312 326
313 327
314class TerminalZipStripper(ZipStripper): 328class TerminalZipStripper(ZipStripper):
315 ''' Represent a terminal level archive. 329 """ Represent a terminal level archive.
316 This type of archive can not contain nested archives. 330 This type of archive can not contain nested archives.
317 It is used for formats like docx, which are basically 331 It is used for formats like docx, which are basically
318 ziped xml. 332 ziped xml.
319 ''' 333 """
320 334
321 335
322class GzipStripper(TarStripper): 336class GzipStripper(TarStripper):
323 ''' Represent a tar.gz archive 337 """ Represent a tar.gz archive
324 ''' 338 """
339
325 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): 340 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs):
326 super(GzipStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) 341 super(GzipStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs)
327 self.compression = ':gz' 342 self.compression = ':gz'
328 343
329 344
330class Bzip2Stripper(TarStripper): 345class Bzip2Stripper(TarStripper):
331 ''' Represent a tar.bz2 archive 346 """ Represent a tar.bz2 archive
332 ''' 347 """
348
333 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): 349 def __init__(self, filename, parser, mime, backup, is_writable, **kwargs):
334 super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) 350 super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs)
335 self.compression = ':bz2' 351 self.compression = ':bz2'