summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/implementation_notes.md8
-rw-r--r--src/images.py7
-rw-r--r--src/parser_factory.py5
-rw-r--r--src/pdf.py9
-rw-r--r--tests/test_libmat2.py12
5 files changed, 38 insertions, 3 deletions
diff --git a/doc/implementation_notes.md b/doc/implementation_notes.md
index 60e9081..59e7d94 100644
--- a/doc/implementation_notes.md
+++ b/doc/implementation_notes.md
@@ -9,6 +9,14 @@ that only cleans the superficial metadata of your file, but not
9the ones that might be in **embeded** resources. Like for example, 9the ones that might be in **embeded** resources. Like for example,
10images in a PDF or an office document. 10images in a PDF or an office document.
11 11
12Race conditions
13---------------
14
15MAT2 does its very best to avoid crashing at runtime. This is why it's checking
16if the file is valid __at parser creation__. MAT2 doesn't take any measure to
17ensure that the file is not changed between the time the parser is
18instantiated, and the call to clean or show the metadata.
19
12Symlink attacks 20Symlink attacks
13--------------- 21---------------
14 22
diff --git a/src/images.py b/src/images.py
index 7c1abaa..6cc3dfe 100644
--- a/src/images.py
+++ b/src/images.py
@@ -20,6 +20,13 @@ class PNGParser(abstract.AbstractParser):
20 'Compression', 'Filter', 'Interlace', 'BackgroundColor', 'ImageSize', 20 'Compression', 'Filter', 'Interlace', 'BackgroundColor', 'ImageSize',
21 'Megapixels', 'ImageHeight'} 21 'Megapixels', 'ImageHeight'}
22 22
23 def __init__(self, filename):
24 super().__init__(filename)
25 try: # better fail here than later
26 cairo.ImageSurface.create_from_png(self.filename)
27 except MemoryError:
28 raise ValueError
29
23 def get_meta(self): 30 def get_meta(self):
24 out = subprocess.check_output(['/usr/bin/exiftool', '-json', self.filename]) 31 out = subprocess.check_output(['/usr/bin/exiftool', '-json', self.filename])
25 meta = json.loads(out.decode('utf-8'))[0] 32 meta = json.loads(out.decode('utf-8'))[0]
diff --git a/src/parser_factory.py b/src/parser_factory.py
index 68e9e9c..80aedae 100644
--- a/src/parser_factory.py
+++ b/src/parser_factory.py
@@ -30,5 +30,8 @@ def get_parser(filename: str) -> (T, str):
30 30
31 for c in _get_parsers(): 31 for c in _get_parsers():
32 if mtype in c.mimetypes: 32 if mtype in c.mimetypes:
33 return c(filename), mtype 33 try:
34 return c(filename), mtype
35 except ValueError:
36 return None, mtype
34 return None, mtype 37 return None, mtype
diff --git a/src/pdf.py b/src/pdf.py
index 6e639cd..3ba3d4a 100644
--- a/src/pdf.py
+++ b/src/pdf.py
@@ -11,7 +11,7 @@ import io
11import cairo 11import cairo
12import gi 12import gi
13gi.require_version('Poppler', '0.18') 13gi.require_version('Poppler', '0.18')
14from gi.repository import Poppler 14from gi.repository import Poppler, GLib
15 15
16from . import abstract 16from . import abstract
17 17
@@ -28,6 +28,10 @@ class PDFParser(abstract.AbstractParser):
28 super().__init__(filename) 28 super().__init__(filename)
29 self.uri = 'file://' + os.path.abspath(self.filename) 29 self.uri = 'file://' + os.path.abspath(self.filename)
30 self.__scale = 2 # how much precision do we want for the render 30 self.__scale = 2 # how much precision do we want for the render
31 try: # Check now that the file is valid, to avoid surprises later
32 Poppler.Document.new_from_file(self.uri, None)
33 except GLib.GError: # Invalid PDF
34 raise ValueError
31 35
32 def remove_all_lightweight(self): 36 def remove_all_lightweight(self):
33 """ 37 """
@@ -116,8 +120,9 @@ class PDFParser(abstract.AbstractParser):
116 def get_meta(self): 120 def get_meta(self):
117 """ Return a dict with all the meta of the file 121 """ Return a dict with all the meta of the file
118 """ 122 """
119 document = Poppler.Document.new_from_file(self.uri, None)
120 metadata = {} 123 metadata = {}
124 document = Poppler.Document.new_from_file(self.uri, None)
125
121 for key in self.meta_list: 126 for key in self.meta_list:
122 if document.get_property(key): 127 if document.get_property(key):
123 metadata[key] = document.get_property(key) 128 metadata[key] = document.get_property(key)
diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py
index 1950444..17afaf4 100644
--- a/tests/test_libmat2.py
+++ b/tests/test_libmat2.py
@@ -16,6 +16,18 @@ class TestParserFactory(unittest.TestCase):
16 self.assertEqual(mimetype, 'audio/mpeg') 16 self.assertEqual(mimetype, 'audio/mpeg')
17 self.assertEqual(parser.__class__, audio.MP3Parser) 17 self.assertEqual(parser.__class__, audio.MP3Parser)
18 18
19class TestCorruptedFiles(unittest.TestCase):
20 def test_pdf(self):
21 shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
22 with self.assertRaises(ValueError):
23 pdf.PDFParser('./tests/data/clean.png')
24 os.remove('./tests/data/clean.png')
25
26 def test_png(self):
27 shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')
28 with self.assertRaises(ValueError):
29 images.PNGParser('./tests/data/clean.pdf')
30 os.remove('./tests/data/clean.pdf')
19 31
20class TestGetMeta(unittest.TestCase): 32class TestGetMeta(unittest.TestCase):
21 def test_pdf(self): 33 def test_pdf(self):