1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
|
""" Parent class of all parser
"""
import os
import shutil
import tempfile
import hachoir_core
import hachoir_editor
import mat
NOMETA = frozenset((
'.bmp', # "raw" image
'.rdf', # text
'.txt', # plain text
'.xml', # formated text (XML)
'.rels', # openXML formated text
))
FIELD = object()
class GenericParser(object):
""" Parent class of all parsers
"""
def __init__(self, filename, parser, mime, backup, is_writable, **kwargs):
self.filename = ''
self.parser = parser
self.mime = mime
self.backup = backup
self.is_writable = is_writable
self.editor = hachoir_editor.createEditor(parser)
try:
self.filename = hachoir_core.cmd_line.unicodeFilename(filename)
except TypeError: # get rid of "decoding Unicode is not supported"
self.filename = filename
self.basename = os.path.basename(filename)
self.output = hachoir_core.cmd_line.unicodeFilename(tempfile.mkstemp()[1])
def __del__(self):
""" Remove tempfile if it was not used
"""
if os.path.exists(self.output):
mat.secure_remove(self.output)
def is_clean(self):
"""
Check if the file is clean from harmful metadatas
"""
for field in self.editor:
if self._should_remove(field):
return self._is_clean(self.editor)
return True
def _is_clean(self, fieldset):
""" Helper method of the `is_clean` one """
for field in fieldset:
remove = self._should_remove(field)
if remove is True:
return False
if remove is FIELD:
if not self._is_clean(field):
return False
return True
def remove_all(self):
""" Remove all compromising fields
"""
state = self._remove_all(self.editor)
hachoir_core.field.writeIntoFile(self.editor, self.output)
self.do_backup()
return state
def _remove_all(self, fieldset):
""" Recursive way to handle tree metadatas
"""
try:
for field in fieldset:
remove = self._should_remove(field)
if remove is True:
self._remove(fieldset, field.name)
if remove is FIELD:
self._remove_all(field)
return True
except:
return False
@staticmethod
def _remove(fieldset, field):
""" Delete the given field
"""
del fieldset[field]
def get_meta(self):
""" Return a dict with all the meta of the file
"""
metadata = {}
self._get_meta(self.editor, metadata)
return metadata
def _get_meta(self, fieldset, metadata):
""" Recursive way to handle tree metadatas
"""
for field in fieldset:
remove = self._should_remove(field)
if remove:
try:
metadata[field.name] = field.value
except:
metadata[field.name] = 'harmful content'
if remove is FIELD:
self._get_meta(field, None)
def _should_remove(self, key):
""" Return True if the field is compromising
abstract method
"""
raise NotImplementedError
def create_backup_copy(self):
""" Create a backup copy
"""
shutil.copy2(self.filename, os.path.join(self.filename, '.bak'))
def do_backup(self):
""" Keep a backup of the file if asked.
The process of double-renaming is not very elegant,
but it greatly simplify new strippers implementation.
"""
if self.backup:
shutil.move(self.filename, os.path.join(self.filename, '.bak'))
else:
mat.secure_remove(self.filename)
shutil.move(self.output, self.filename)
|