From d8331c694a9023d82d702ed39e556d70bfb14869 Mon Sep 17 00:00:00 2001 From: intrigeri Date: Sat, 1 Aug 2015 10:14:22 +0000 Subject: archive: explicitly treat text we got from tarfile as encoded UTF-8 (Closes: MAT#9831). Without this, I see test failures caused by: test_remove_bz2 (libtest.TestArchiveProcessing) Test MAT's ability to process .tar.bz2 ... /usr/lib/python2.7/tarfile.py:1993: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal if self.name is not None and os.path.abspath(name) == self.name: This change seems to make sense because we're creating our tarin and tarout objects with encoding='utf-8' in the first place, so it's no big surprise that what we get from it is UTF-8 -encoded text. --- libmat/archive.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libmat/archive.py b/libmat/archive.py index 4c62dc8..c96c74b 100644 --- a/libmat/archive.py +++ b/libmat/archive.py @@ -237,7 +237,9 @@ class TarStripper(GenericArchiveStripper): else: # Don't add the file to the archive logging.debug('%s will not be added' % item.name) continue - tarout.add(path, item.name, filter=self._remove) + tarout.add(unicode(path.decode('utf-8')), + unicode(item.name.decode('utf-8')), + filter=self._remove) tarin.close() tarout.close() self.do_backup() -- cgit v1.3