summaryrefslogtreecommitdiff
path: root/lib/parser.py
diff options
context:
space:
mode:
authorjvoisin2012-02-01 22:56:04 +0100
committerjvoisin2012-02-01 22:56:04 +0100
commit544fe9bf1782a027b3f31bf4c10a050d783e32ac (patch)
treea8dd60b9ae45efea4875fdb827070531f0199717 /lib/parser.py
parent9ea6dc6960cebfa70d18ba8ee49d775ea91c9b34 (diff)
Rename mat-cli to mat-gui
Diffstat (limited to 'lib/parser.py')
-rw-r--r--lib/parser.py130
1 files changed, 130 insertions, 0 deletions
diff --git a/lib/parser.py b/lib/parser.py
new file mode 100644
index 0000000..6dc5d0b
--- /dev/null
+++ b/lib/parser.py
@@ -0,0 +1,130 @@
1'''
2 Parent class of all parser
3'''
4
5import hachoir_core
6import hachoir_editor
7
8import os
9
10import mat
11
12NOMETA = ('.bmp', '.rdf', '.txt', '.xml', '.rels')
13#bmp : image
14#rdf : text
15#txt : plain text
16#xml : formated text
17#rels : openxml foramted text
18
19
20FIELD = object()
21
22class GenericParser(object):
23 '''
24 Parent class of all parsers
25 '''
26 def __init__(self, filename, parser, mime, backup, add2archive):
27 self.filename = ''
28 self.parser = parser
29 self.mime = mime
30 self.backup = backup
31 self.editor = hachoir_editor.createEditor(parser)
32 self.realname = filename
33 try:
34 self.filename = hachoir_core.cmd_line.unicodeFilename(filename)
35 except TypeError: # get rid of "decoding Unicode is not supported"
36 self.filename = filename
37 basename, ext = os.path.splitext(filename)
38 self.output = basename + '.cleaned' + ext
39 self.basename = os.path.basename(filename) # only filename
40
41 def is_clean(self):
42 '''
43 Check if the file is clean from harmful metadatas
44 '''
45 for field in self.editor:
46 if self._should_remove(field):
47 return self._is_clean(self.editor)
48 return True
49
50 def _is_clean(self, fieldset):
51 for field in fieldset:
52 remove = self._should_remove(field)
53 if remove is True:
54 return False
55 if remove is FIELD:
56 if not self._is_clean(field):
57 return False
58 return True
59
60 def remove_all(self):
61 '''
62 Remove all the files that are compromizing
63 '''
64 state = self._remove_all(self.editor)
65 hachoir_core.field.writeIntoFile(self.editor, self.output)
66 self.do_backup()
67 return state
68
69 def _remove_all(self, fieldset):
70 try:
71 for field in fieldset:
72 remove = self._should_remove(field)
73 if remove is True:
74 self._remove(fieldset, field.name)
75 if remove is FIELD:
76 self._remove_all(field)
77 return True
78 except:
79 return False
80
81 def remove_all_strict(self):
82 '''
83 If the remove_all() is not efficient enough,
84 this method is implemented :
85 It is efficient, but destructive.
86 In a perfect world, with nice fileformat,
87 this method would not exist.
88 '''
89 self.remove_all()
90
91 def _remove(self, fieldset, field):
92 '''
93 Delete the given field
94 '''
95 del fieldset[field]
96
97 def get_meta(self):
98 '''
99 Return a dict with all the meta of the file
100 '''
101 metadata = {}
102 self._get_meta(self.editor, metadata)
103 return metadata
104
105 def _get_meta(self, fieldset, metadata):
106 for field in fieldset:
107 remove = self._should_remove(field)
108 if remove is True:
109 try:
110 metadata[field.name] = field.value
111 except:
112 metadata[field.name] = 'harmful content'
113 if remove is FIELD:
114 self._get_meta(field)
115
116 def _should_remove(self, key):
117 '''
118 return True if the field is compromizing
119 abstract method
120 '''
121 raise NotImplementedError
122
123 def do_backup(self):
124 '''
125 Do a backup of the file if asked,
126 and change his creation/access date
127 '''
128 if self.backup is False:
129 mat.secure_remove(self.filename)
130 os.rename(self.output, self.filename)