summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorjvoisin2019-02-20 16:28:11 -0800
committerjvoisin2019-02-20 16:28:11 -0800
commit02ff21b158c76fcd355a74ddb940e1c54fc2d7ed (patch)
tree701c6f5e316265e5a95a162356965ecf2fb8d6b2 /tests
parent6b45064c784d03bb21ffaf7e50c9ba684e6985a9 (diff)
Implement epub support
Diffstat (limited to 'tests')
-rw-r--r--tests/data/dirty.css14
-rw-r--r--tests/data/dirty.epubbin0 -> 296324 bytes
-rw-r--r--tests/dirty.epubbin0 -> 296324 bytes
-rw-r--r--tests/test_corrupted_files.py41
-rw-r--r--tests/test_libmat2.py63
5 files changed, 105 insertions, 13 deletions
diff --git a/tests/data/dirty.css b/tests/data/dirty.css
new file mode 100644
index 0000000..f52caf9
--- /dev/null
+++ b/tests/data/dirty.css
@@ -0,0 +1,14 @@
1/**
2 * This is my super css framework
3 * version: 1.0
4 * author : jvoisin
5 */
6
7body {
8 color: red;
9 background-color: blue;
10}
11
12.underline {
13 text-decoration: underline; /* underline is cool */
14}
diff --git a/tests/data/dirty.epub b/tests/data/dirty.epub
new file mode 100644
index 0000000..6389963
--- /dev/null
+++ b/tests/data/dirty.epub
Binary files differ
diff --git a/tests/dirty.epub b/tests/dirty.epub
new file mode 100644
index 0000000..6389963
--- /dev/null
+++ b/tests/dirty.epub
Binary files differ
diff --git a/tests/test_corrupted_files.py b/tests/test_corrupted_files.py
index 8728cb2..53c856a 100644
--- a/tests/test_corrupted_files.py
+++ b/tests/test_corrupted_files.py
@@ -7,7 +7,7 @@ import logging
7import zipfile 7import zipfile
8 8
9from libmat2 import pdf, images, audio, office, parser_factory, torrent 9from libmat2 import pdf, images, audio, office, parser_factory, torrent
10from libmat2 import harmless, video, html 10from libmat2 import harmless, video, web
11 11
12# No need to logging messages, should something go wrong, 12# No need to logging messages, should something go wrong,
13# the testsuite _will_ fail. 13# the testsuite _will_ fail.
@@ -220,34 +220,34 @@ class TestCorruptedFiles(unittest.TestCase):
220 os.remove('./tests/data/--output.avi') 220 os.remove('./tests/data/--output.avi')
221 221
222 def test_zip(self): 222 def test_zip(self):
223 with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout: 223 with zipfile.ZipFile('./tests/data/clean.zip', 'w') as zout:
224 zout.write('./tests/data/dirty.flac') 224 zout.write('./tests/data/dirty.flac')
225 zout.write('./tests/data/dirty.docx') 225 zout.write('./tests/data/dirty.docx')
226 zout.write('./tests/data/dirty.jpg') 226 zout.write('./tests/data/dirty.jpg')
227 zout.write('./tests/data/embedded_corrupted.docx') 227 zout.write('./tests/data/embedded_corrupted.docx')
228 p, mimetype = parser_factory.get_parser('./tests/data/dirty.zip') 228 p, mimetype = parser_factory.get_parser('./tests/data/clean.zip')
229 self.assertEqual(mimetype, 'application/zip') 229 self.assertEqual(mimetype, 'application/zip')
230 meta = p.get_meta() 230 meta = p.get_meta()
231 self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !') 231 self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !')
232 self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') 232 self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
233 self.assertFalse(p.remove_all()) 233 self.assertFalse(p.remove_all())
234 os.remove('./tests/data/dirty.zip') 234 os.remove('./tests/data/clean.zip')
235 235
236 def test_html(self): 236 def test_html(self):
237 shutil.copy('./tests/data/dirty.html', './tests/data/clean.html') 237 shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
238 with open('./tests/data/clean.html', 'a') as f: 238 with open('./tests/data/clean.html', 'a') as f:
239 f.write('<open>but not</closed>') 239 f.write('<open>but not</closed>')
240 with self.assertRaises(ValueError): 240 with self.assertRaises(ValueError):
241 html.HTMLParser('./tests/data/clean.html') 241 web.HTMLParser('./tests/data/clean.html')
242 os.remove('./tests/data/clean.html') 242 os.remove('./tests/data/clean.html')
243 243
244 # Yes, we're able to deal with malformed html :/ 244 # Yes, we're able to deal with malformed html :/
245 shutil.copy('./tests/data/dirty.html', './tests/data/clean.html') 245 shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
246 with open('./tests/data/clean.html', 'a') as f: 246 with open('./tests/data/clean.html', 'a') as f:
247 f.write('<meta name=\'this" is="weird"/>') 247 f.write('<meta name=\'this" is="weird"/>')
248 p = html.HTMLParser('./tests/data/clean.html') 248 p = web.HTMLParser('./tests/data/clean.html')
249 self.assertTrue(p.remove_all()) 249 self.assertTrue(p.remove_all())
250 p = html.HTMLParser('./tests/data/clean.cleaned.html') 250 p = web.HTMLParser('./tests/data/clean.cleaned.html')
251 self.assertEqual(p.get_meta(), {}) 251 self.assertEqual(p.get_meta(), {})
252 os.remove('./tests/data/clean.html') 252 os.remove('./tests/data/clean.html')
253 os.remove('./tests/data/clean.cleaned.html') 253 os.remove('./tests/data/clean.cleaned.html')
@@ -255,17 +255,38 @@ class TestCorruptedFiles(unittest.TestCase):
255 with open('./tests/data/clean.html', 'w') as f: 255 with open('./tests/data/clean.html', 'w') as f:
256 f.write('</close>') 256 f.write('</close>')
257 with self.assertRaises(ValueError): 257 with self.assertRaises(ValueError):
258 html.HTMLParser('./tests/data/clean.html') 258 web.HTMLParser('./tests/data/clean.html')
259 os.remove('./tests/data/clean.html') 259 os.remove('./tests/data/clean.html')
260 260
261 with open('./tests/data/clean.html', 'w') as f: 261 with open('./tests/data/clean.html', 'w') as f:
262 f.write('<notclosed>') 262 f.write('<notclosed>')
263 p = html.HTMLParser('./tests/data/clean.html') 263 p = web.HTMLParser('./tests/data/clean.html')
264 with self.assertRaises(ValueError): 264 with self.assertRaises(ValueError):
265 p.get_meta() 265 p.get_meta()
266 p = html.HTMLParser('./tests/data/clean.html') 266 p = web.HTMLParser('./tests/data/clean.html')
267 with self.assertRaises(ValueError): 267 with self.assertRaises(ValueError):
268 p.remove_all() 268 p.remove_all()
269 os.remove('./tests/data/clean.html') 269 os.remove('./tests/data/clean.html')
270 270
271 with open('./tests/data/clean.html', 'w') as f:
272 f.write('<doctitle><br/></doctitle><br/><notclosed>')
273 p = web.HTMLParser('./tests/data/clean.html')
274 with self.assertRaises(ValueError):
275 p.get_meta()
276 p = web.HTMLParser('./tests/data/clean.html')
277 with self.assertRaises(ValueError):
278 p.remove_all()
279 os.remove('./tests/data/clean.html')
280
281 def test_epub(self):
282 with zipfile.ZipFile('./tests/data/clean.epub', 'w') as zout:
283 zout.write('./tests/data/dirty.jpg', 'OEBPS/content.opf')
284 p, mimetype = parser_factory.get_parser('./tests/data/clean.epub')
285 self.assertEqual(mimetype, 'application/epub+zip')
286 meta = p.get_meta()
287 self.assertEqual(meta['OEBPS/content.opf']['OEBPS/content.opf'],
288 'harmful content')
289
290 self.assertFalse(p.remove_all())
291 os.remove('./tests/data/clean.epub')
271 292
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py
index 8753e09..249c56d 100644
--- a/tests/test_libmat2.py
+++ b/tests/test_libmat2.py
@@ -6,7 +6,7 @@ import os
6import zipfile 6import zipfile
7 7
8from libmat2 import pdf, images, audio, office, parser_factory, torrent, harmless 8from libmat2 import pdf, images, audio, office, parser_factory, torrent, harmless
9from libmat2 import check_dependencies, video, archive, html 9from libmat2 import check_dependencies, video, archive, web, epub
10 10
11 11
12class TestCheckDependencies(unittest.TestCase): 12class TestCheckDependencies(unittest.TestCase):
@@ -177,6 +177,23 @@ class TestGetMeta(unittest.TestCase):
177 meta = p.get_meta() 177 meta = p.get_meta()
178 self.assertEqual(meta['Comment'], 'this is a test comment') 178 self.assertEqual(meta['Comment'], 'this is a test comment')
179 179
180 def test_epub(self):
181 p, mimetype = parser_factory.get_parser('./tests/data/dirty.epub')
182 self.assertEqual(mimetype, 'application/epub+zip')
183 meta = p.get_meta()
184 self.assertEqual(meta['OEBPS/content.opf']['dc:creator'], 'Dorothy L. Sayers')
185 self.assertEqual(meta['OEBPS/toc.ncx']['dtb:generator'], 'Ebookmaker 0.4.0a5 by Marcello Perathoner <webmaster@gutenberg.org>')
186 self.assertEqual(meta['OEBPS/@public@vhost@g@gutenberg@html@files@58820@58820-h@images@shield25.jpg']['CreatorTool'], 'Adobe Photoshop CS5 Macintosh')
187 self.assertEqual(meta['OEBPS/@public@vhost@g@gutenberg@html@files@58820@58820-h@58820-h-2.htm.html']['generator'], 'Ebookmaker 0.4.0a5 by Marcello Perathoner <webmaster@gutenberg.org>')
188
189 def test_css(self):
190 p, mimetype = parser_factory.get_parser('./tests/data/dirty.css')
191 self.assertEqual(mimetype, 'text/css')
192 meta = p.get_meta()
193 self.assertEqual(meta['author'], 'jvoisin')
194 self.assertEqual(meta['version'], '1.0')
195 self.assertEqual(meta['harmful data'], 'underline is cool')
196
180class TestRemovingThumbnails(unittest.TestCase): 197class TestRemovingThumbnails(unittest.TestCase):
181 def test_odt(self): 198 def test_odt(self):
182 shutil.copy('./tests/data/revision.odt', './tests/data/clean.odt') 199 shutil.copy('./tests/data/revision.odt', './tests/data/clean.odt')
@@ -599,7 +616,7 @@ class TestCleaning(unittest.TestCase):
599 616
600 def test_html(self): 617 def test_html(self):
601 shutil.copy('./tests/data/dirty.html', './tests/data/clean.html') 618 shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
602 p = html.HTMLParser('./tests/data/clean.html') 619 p = web.HTMLParser('./tests/data/clean.html')
603 620
604 meta = p.get_meta() 621 meta = p.get_meta()
605 self.assertEqual(meta['author'], 'jvoisin') 622 self.assertEqual(meta['author'], 'jvoisin')
@@ -607,10 +624,50 @@ class TestCleaning(unittest.TestCase):
607 ret = p.remove_all() 624 ret = p.remove_all()
608 self.assertTrue(ret) 625 self.assertTrue(ret)
609 626
610 p = html.HTMLParser('./tests/data/clean.cleaned.html') 627 p = web.HTMLParser('./tests/data/clean.cleaned.html')
611 self.assertEqual(p.get_meta(), {}) 628 self.assertEqual(p.get_meta(), {})
612 self.assertTrue(p.remove_all()) 629 self.assertTrue(p.remove_all())
613 630
614 os.remove('./tests/data/clean.html') 631 os.remove('./tests/data/clean.html')
615 os.remove('./tests/data/clean.cleaned.html') 632 os.remove('./tests/data/clean.cleaned.html')
616 os.remove('./tests/data/clean.cleaned.cleaned.html') 633 os.remove('./tests/data/clean.cleaned.cleaned.html')
634
635
636 def test_epub(self):
637 shutil.copy('./tests/data/dirty.epub', './tests/data/clean.epub')
638 p = epub.EPUBParser('./tests/data/clean.epub')
639
640 meta = p.get_meta()
641 self.assertEqual(meta['OEBPS/content.opf']['dc:source'], 'http://www.gutenberg.org/files/58820/58820-h/58820-h.htm')
642
643 ret = p.remove_all()
644 self.assertTrue(ret)
645
646 p = epub.EPUBParser('./tests/data/clean.cleaned.epub')
647 self.assertEqual(p.get_meta(), {})
648 self.assertTrue(p.remove_all())
649
650 os.remove('./tests/data/clean.epub')
651 os.remove('./tests/data/clean.cleaned.epub')
652 os.remove('./tests/data/clean.cleaned.cleaned.epub')
653
654
655 def test_css(self):
656 shutil.copy('./tests/data/dirty.css', './tests/data/clean.css')
657 p = web.CSSParser('./tests/data/clean.css')
658
659 self.assertEqual(p.get_meta(), {
660 'harmful data': 'underline is cool',
661 'version': '1.0',
662 'author': 'jvoisin'})
663
664 ret = p.remove_all()
665 self.assertTrue(ret)
666
667 p = web.CSSParser('./tests/data/clean.cleaned.css')
668 self.assertEqual(p.get_meta(), {})
669 self.assertTrue(p.remove_all())
670
671 os.remove('./tests/data/clean.css')
672 os.remove('./tests/data/clean.cleaned.css')
673 os.remove('./tests/data/clean.cleaned.cleaned.css')