From d8331c694a9023d82d702ed39e556d70bfb14869 Mon Sep 17 00:00:00 2001
From: intrigeri
Date: Sat, 1 Aug 2015 10:14:22 +0000
Subject: archive: explicitly treat text we got from tarfile as encoded UTF-8
 (Closes: MAT#9831).

Without this, I see test failures caused by:

  test_remove_bz2 (libtest.TestArchiveProcessing)
  Test MAT's ability to process .tar.bz2 ... /usr/lib/python2.7/tarfile.py:1993: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal
    if self.name is not None and os.path.abspath(name) == self.name:

This change seems to make sense because we're creating our tarin and tarout
objects with encoding='utf-8' in the first place, so it's no big surprise that
what we get from it is UTF-8 -encoded text.
---
 libmat/archive.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libmat/archive.py b/libmat/archive.py
index 4c62dc8..c96c74b 100644
--- a/libmat/archive.py
+++ b/libmat/archive.py
@@ -237,7 +237,9 @@ class TarStripper(GenericArchiveStripper):
                 else:  # Don't add the file to the archive
                     logging.debug('%s will not be added' % item.name)
                     continue
-                tarout.add(path, item.name, filter=self._remove)
+                tarout.add(unicode(path.decode('utf-8')),
+                           unicode(item.name.decode('utf-8')),
+                           filter=self._remove)
         tarin.close()
         tarout.close()
         self.do_backup()
-- 
cgit v1.3