summaryrefslogtreecommitdiff
path: root/MAT/parser.py
diff options
context:
space:
mode:
authorjvoisin2012-12-08 02:02:25 +0100
committerjvoisin2012-12-13 14:24:01 +0100
commitcbf8a2a65928694202e19b6bcf56ec84bcbf613c (patch)
treee106475b0d5c003505336b5ae6416e4508bb768b /MAT/parser.py
parent67d5c1fa6b9ab6e1e7328ee57b15d8e46526d72a (diff)
Reorganize source tree and files installation location, cleanup setup.py (Closes: #689409)
Diffstat (limited to 'MAT/parser.py')
-rw-r--r--MAT/parser.py120
1 files changed, 120 insertions, 0 deletions
diff --git a/MAT/parser.py b/MAT/parser.py
new file mode 100644
index 0000000..d2eaf9c
--- /dev/null
+++ b/MAT/parser.py
@@ -0,0 +1,120 @@
1'''
2 Parent class of all parser
3'''
4
5import hachoir_core
6import hachoir_editor
7
8import os
9
10import mat
11
12NOMETA = ('.bmp', '.rdf', '.txt', '.xml', '.rels')
13#bmp : image
14#rdf : text
15#txt : plain text
16#xml : formated text
17#rels : openxml foramted text
18
19
20FIELD = object()
21
22class GenericParser(object):
23 '''
24 Parent class of all parsers
25 '''
26 def __init__(self, filename, parser, mime, backup, add2archive):
27 self.filename = ''
28 self.parser = parser
29 self.mime = mime
30 self.backup = backup
31 self.editor = hachoir_editor.createEditor(parser)
32 self.realname = filename
33 try:
34 self.filename = hachoir_core.cmd_line.unicodeFilename(filename)
35 except TypeError: # get rid of "decoding Unicode is not supported"
36 self.filename = filename
37 basename, ext = os.path.splitext(filename)
38 self.output = basename + '.cleaned' + ext
39 self.basename = os.path.basename(filename) # only filename
40
41 def is_clean(self):
42 '''
43 Check if the file is clean from harmful metadatas
44 '''
45 for field in self.editor:
46 if self._should_remove(field):
47 return self._is_clean(self.editor)
48 return True
49
50 def _is_clean(self, fieldset):
51 for field in fieldset:
52 remove = self._should_remove(field)
53 if remove is True:
54 return False
55 if remove is FIELD:
56 if not self._is_clean(field):
57 return False
58 return True
59
60 def remove_all(self):
61 '''
62 Remove all the files that are compromizing
63 '''
64 state = self._remove_all(self.editor)
65 hachoir_core.field.writeIntoFile(self.editor, self.output)
66 self.do_backup()
67 return state
68
69 def _remove_all(self, fieldset):
70 try:
71 for field in fieldset:
72 remove = self._should_remove(field)
73 if remove is True:
74 self._remove(fieldset, field.name)
75 if remove is FIELD:
76 self._remove_all(field)
77 return True
78 except:
79 return False
80
81 def _remove(self, fieldset, field):
82 '''
83 Delete the given field
84 '''
85 del fieldset[field]
86
87 def get_meta(self):
88 '''
89 Return a dict with all the meta of the file
90 '''
91 metadata = {}
92 self._get_meta(self.editor, metadata)
93 return metadata
94
95 def _get_meta(self, fieldset, metadata):
96 for field in fieldset:
97 remove = self._should_remove(field)
98 if remove is True:
99 try:
100 metadata[field.name] = field.value
101 except:
102 metadata[field.name] = 'harmful content'
103 if remove is FIELD:
104 self._get_meta(field)
105
106 def _should_remove(self, key):
107 '''
108 return True if the field is compromizing
109 abstract method
110 '''
111 raise NotImplementedError
112
113 def do_backup(self):
114 '''
115 Do a backup of the file if asked,
116 and change his creation/access date
117 '''
118 if self.backup is False:
119 mat.secure_remove(self.filename)
120 os.rename(self.output, self.filename)