summaryrefslogtreecommitdiff
path: root/libmat2
diff options
context:
space:
mode:
Diffstat (limited to 'libmat2')
-rw-r--r--libmat2/abstract.py5
-rw-r--r--libmat2/archive.py28
-rw-r--r--libmat2/parser_factory.py4
3 files changed, 35 insertions, 2 deletions
diff --git a/libmat2/abstract.py b/libmat2/abstract.py
index aaf00d7..a7c5fa5 100644
--- a/libmat2/abstract.py
+++ b/libmat2/abstract.py
@@ -25,6 +25,11 @@ class AbstractParser(abc.ABC):
25 25
26 self.filename = filename 26 self.filename = filename
27 fname, extension = os.path.splitext(filename) 27 fname, extension = os.path.splitext(filename)
28
29 # Special case for tar.gz, tar.bz2, … files
30 if fname.endswith('.tar') and len(fname) > 4:
31 fname, extension = fname[:-4], '.tar' + extension
32
28 self.output_filename = fname + '.cleaned' + extension 33 self.output_filename = fname + '.cleaned' + extension
29 self.lightweight_cleaning = False 34 self.lightweight_cleaning = False
30 35
diff --git a/libmat2/archive.py b/libmat2/archive.py
index 2936f39..d295afe 100644
--- a/libmat2/archive.py
+++ b/libmat2/archive.py
@@ -40,6 +40,10 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
40 the Union[A, B] constrain, hence the weird `# type: ignore` 40 the Union[A, B] constrain, hence the weird `# type: ignore`
41 annotations. 41 annotations.
42 """ 42 """
43 # Tarfiles can optionally support compression
44 # https://docs.python.org/3/library/tarfile.html#tarfile.open
45 compression = ''
46
43 def __init__(self, filename): 47 def __init__(self, filename):
44 super().__init__(filename) 48 super().__init__(filename)
45 self.archive_class = None # type: Optional[ArchiveClass] 49 self.archive_class = None # type: Optional[ArchiveClass]
@@ -134,7 +138,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
134 # pylint: disable=too-many-branches 138 # pylint: disable=too-many-branches
135 139
136 with self.archive_class(self.filename) as zin,\ 140 with self.archive_class(self.filename) as zin,\
137 self.archive_class(self.output_filename, 'w') as zout: 141 self.archive_class(self.output_filename, 'w' + self.compression) as zout:
138 142
139 temp_folder = tempfile.mkdtemp() 143 temp_folder = tempfile.mkdtemp()
140 abort = False 144 abort = False
@@ -212,7 +216,11 @@ class TarParser(ArchiveBasedAbstractParser):
212 mimetypes = {'application/x-tar'} 216 mimetypes = {'application/x-tar'}
213 def __init__(self, filename): 217 def __init__(self, filename):
214 super().__init__(filename) 218 super().__init__(filename)
215 self.archive_class = tarfile.TarFile 219 # yes, it's tarfile.TarFile.open and not tarfile.TarFile,
220 # as stated in the documentation:
221 # https://docs.python.org/3/library/tarfile.html#tarfile.TarFile
222 # This is required to support compressed archives.
223 self.archive_class = tarfile.TarFile.open
216 self.member_class = tarfile.TarInfo 224 self.member_class = tarfile.TarInfo
217 225
218 def is_archive_valid(self): 226 def is_archive_valid(self):
@@ -259,6 +267,22 @@ class TarParser(ArchiveBasedAbstractParser):
259 assert isinstance(member, tarfile.TarInfo) # please mypy 267 assert isinstance(member, tarfile.TarInfo) # please mypy
260 return member.name 268 return member.name
261 269
270
271class TarGzParser(TarParser):
272 compression = ':gz'
273 mimetypes = {'application/x-tar+gz'}
274
275
276class TarBz2Parser(TarParser):
277 compression = ':bz2'
278 mimetypes = {'application/x-tar+bz2'}
279
280
281class TarXzParser(TarParser):
282 compression = ':xz'
283 mimetypes = {'application/x-tar+xz'}
284
285
262class ZipParser(ArchiveBasedAbstractParser): 286class ZipParser(ArchiveBasedAbstractParser):
263 mimetypes = {'application/zip'} 287 mimetypes = {'application/zip'}
264 def __init__(self, filename): 288 def __init__(self, filename):
diff --git a/libmat2/parser_factory.py b/libmat2/parser_factory.py
index e93ee4f..3931903 100644
--- a/libmat2/parser_factory.py
+++ b/libmat2/parser_factory.py
@@ -50,6 +50,10 @@ def get_parser(filename: str) -> Tuple[Optional[T], Optional[str]]:
50 if extension.lower() in UNSUPPORTED_EXTENSIONS: 50 if extension.lower() in UNSUPPORTED_EXTENSIONS:
51 return None, mtype 51 return None, mtype
52 52
53 if mtype == 'application/x-tar':
54 if extension[1:] in ('bz2', 'gz', 'xz'):
55 mtype = mtype + '+' + extension[1:]
56
53 for parser_class in _get_parsers(): # type: ignore 57 for parser_class in _get_parsers(): # type: ignore
54 if mtype in parser_class.mimetypes: 58 if mtype in parser_class.mimetypes:
55 try: 59 try: