From 6cc034e81bd0cea98dffe4d7311f3bd16178b63e Mon Sep 17 00:00:00 2001 From: jvoisin Date: Fri, 8 Feb 2019 00:26:47 +0100 Subject: Add support for html files --- tests/test_libmat2.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'tests/test_libmat2.py') diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index 548b076..8753e09 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -6,7 +6,7 @@ import os import zipfile from libmat2 import pdf, images, audio, office, parser_factory, torrent, harmless -from libmat2 import check_dependencies, video, archive +from libmat2 import check_dependencies, video, archive, html class TestCheckDependencies(unittest.TestCase): @@ -596,3 +596,21 @@ class TestCleaning(unittest.TestCase): os.remove('./tests/data/clean.gif') os.remove('./tests/data/clean.cleaned.gif') os.remove('./tests/data/clean.cleaned.cleaned.gif') + + def test_html(self): + shutil.copy('./tests/data/dirty.html', './tests/data/clean.html') + p = html.HTMLParser('./tests/data/clean.html') + + meta = p.get_meta() + self.assertEqual(meta['author'], 'jvoisin') + + ret = p.remove_all() + self.assertTrue(ret) + + p = html.HTMLParser('./tests/data/clean.cleaned.html') + self.assertEqual(p.get_meta(), {}) + self.assertTrue(p.remove_all()) + + os.remove('./tests/data/clean.html') + os.remove('./tests/data/clean.cleaned.html') + os.remove('./tests/data/clean.cleaned.cleaned.html') -- cgit v1.3