diff options
| -rw-r--r-- | doc/implementation_notes.md | 8 | ||||
| -rw-r--r-- | libmat2/office.py | 6 |
2 files changed, 9 insertions, 5 deletions
diff --git a/doc/implementation_notes.md b/doc/implementation_notes.md index b763835..3b8e49d 100644 --- a/doc/implementation_notes.md +++ b/doc/implementation_notes.md | |||
| @@ -61,3 +61,11 @@ Images handling | |||
| 61 | When possible, images are handled like PDF: rendered on a surface, then saved | 61 | When possible, images are handled like PDF: rendered on a surface, then saved |
| 62 | to the filesystem. This ensures that every metadata is removed. | 62 | to the filesystem. This ensures that every metadata is removed. |
| 63 | 63 | ||
| 64 | XML attacks | ||
| 65 | ----------- | ||
| 66 | |||
| 67 | Since our thread model conveniently excludes files crafted to specifically | ||
| 68 | bypass MAT2, fileformats containing harmful XML are out of our scope. | ||
| 69 | But since MAT2 is using [etree](https://docs.python.org/3/library/xml.html#xml-vulnerabilities) | ||
| 70 | to process XML, it's "only" vulnerable to DoS, and not memory corruption: | ||
| 71 | odds are that the user will notice that the cleaning didn't succeed. | ||
diff --git a/libmat2/office.py b/libmat2/office.py index 224067c..29100df 100644 --- a/libmat2/office.py +++ b/libmat2/office.py | |||
| @@ -7,11 +7,7 @@ import zipfile | |||
| 7 | import logging | 7 | import logging |
| 8 | from typing import Dict, Set, Pattern | 8 | from typing import Dict, Set, Pattern |
| 9 | 9 | ||
| 10 | try: # protect against DoS | 10 | import xml.etree.ElementTree as ET # type: ignore |
| 11 | from defusedxml import ElementTree as ET # type: ignore | ||
| 12 | except ImportError: | ||
| 13 | import xml.etree.ElementTree as ET # type: ignore | ||
| 14 | |||
| 15 | 11 | ||
| 16 | from . import abstract, parser_factory | 12 | from . import abstract, parser_factory |
| 17 | 13 | ||
