From 55214206b5f0f1c12fec378967f89ef4cb5674f9 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Wed, 27 Feb 2019 23:53:07 +0100 Subject: Improve the previous commit - More tests - More documentation - Minor code cleanup --- tests/test_corrupted_files.py | 4 +--- tests/test_libmat2.py | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) (limited to 'tests') diff --git a/tests/test_corrupted_files.py b/tests/test_corrupted_files.py index b2cec00..4a16d51 100644 --- a/tests/test_corrupted_files.py +++ b/tests/test_corrupted_files.py @@ -269,9 +269,6 @@ class TestCorruptedFiles(unittest.TestCase): os.remove('./tests/data/clean.html') with open('./tests/data/clean.html', 'w') as f: - f.write('') - f.write('<title>pouet') - f.write('<mysupertag/>') f.write('

') p = web.HTMLParser('./tests/data/clean.html') with self.assertRaises(ValueError): @@ -281,6 +278,7 @@ class TestCorruptedFiles(unittest.TestCase): p.remove_all() os.remove('./tests/data/clean.html') + def test_epub(self): with zipfile.ZipFile('./tests/data/clean.epub', 'w') as zout: zout.write('./tests/data/dirty.jpg', 'OEBPS/content.opf') diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index f4b1890..46e234e 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -633,6 +633,33 @@ class TestCleaning(unittest.TestCase): os.remove('./tests/data/clean.cleaned.html') os.remove('./tests/data/clean.cleaned.cleaned.html') + with open('./tests/data/clean.html', 'w') as f: + f.write('<title><pouet/><meta/>') + p = web.HTMLParser('./tests/data/clean.html') + self.assertTrue(p.remove_all()) + with open('./tests/data/clean.cleaned.html', 'r') as f: + self.assertEqual(f.read(), '') + os.remove('./tests/data/clean.html') + os.remove('./tests/data/clean.cleaned.html') + + with open('./tests/data/clean.html', 'w') as f: + f.write('Some<b>metadata</b><br/>') + p = web.HTMLParser('./tests/data/clean.html') + self.assertTrue(p.remove_all()) + with open('./tests/data/clean.cleaned.html', 'r') as f: + self.assertEqual(f.read(), '') + os.remove('./tests/data/clean.html') + os.remove('./tests/data/clean.cleaned.html') + + with open('./tests/data/clean.html', 'w') as f: + f.write('') + p = web.HTMLParser('./tests/data/clean.html') + self.assertTrue(p.remove_all()) + with open('./tests/data/clean.cleaned.html', 'r') as f: + self.assertEqual(f.read(), '') + os.remove('./tests/data/clean.html') + os.remove('./tests/data/clean.cleaned.html') + def test_epub(self): shutil.copy('./tests/data/dirty.epub', './tests/data/clean.epub') -- cgit v1.3