summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mat/parser.py40
1 files changed, 32 insertions, 8 deletions
diff --git a/mat/parser.py b/mat/parser.py
index edbdca1..c7c606c 100644
--- a/mat/parser.py
+++ b/mat/parser.py
@@ -17,6 +17,8 @@ NOMETA = ('.bmp', '.rdf', '.txt', '.xml', '.rels')
17#rels : openxml foramted text 17#rels : openxml foramted text
18 18
19 19
20FIELD = object()
21
20class GenericParser(object): 22class GenericParser(object):
21 ''' 23 '''
22 Parent class of all parsers 24 Parent class of all parsers
@@ -42,19 +44,35 @@ class GenericParser(object):
42 ''' 44 '''
43 for field in self.editor: 45 for field in self.editor:
44 if self._should_remove(field): 46 if self._should_remove(field):
47 return self._is_clean(self.editor)
48 return True
49
50 def _is_clean(self, fieldset):
51 for field in fieldset:
52 remove = self._should_remove(field)
53 if remove is True:
45 return False 54 return False
55 if remove is FIELD:
56 if not self._is_clean(field):
57 return False
46 return True 58 return True
47 59
48 def remove_all(self): 60 def remove_all(self):
49 ''' 61 '''
50 Remove all the files that are compromizing 62 Remove all the files that are compromizing
51 ''' 63 '''
52 for field in self.editor: 64 self._remove_all(self.editor)
53 if self._should_remove(field):
54 self._remove(field.name)
55 hachoir_core.field.writeIntoFile(self.editor, self.output) 65 hachoir_core.field.writeIntoFile(self.editor, self.output)
56 self.do_backup() 66 self.do_backup()
57 67
68 def _remove_all(self, fieldset):
69 for field in fieldset:
70 remove = self._should_remove(field)
71 if remove is True:
72 self._remove(fieldset, field.name)
73 if remove is FIELD:
74 self._remove_all(field)
75
58 def remove_all_ugly(self): 76 def remove_all_ugly(self):
59 ''' 77 '''
60 If the remove_all() is not efficient enough, 78 If the remove_all() is not efficient enough,
@@ -65,24 +83,30 @@ class GenericParser(object):
65 ''' 83 '''
66 self.remove_all() 84 self.remove_all()
67 85
68 def _remove(self, field): 86 def _remove(self, fieldset, field):
69 ''' 87 '''
70 Delete the given field 88 Delete the given field
71 ''' 89 '''
72 del self.editor[field] 90 del fieldset[field]
73 91
74 def get_meta(self): 92 def get_meta(self):
75 ''' 93 '''
76 Return a dict with all the meta of the file 94 Return a dict with all the meta of the file
77 ''' 95 '''
78 metadata = {} 96 metadata = {}
79 for field in self.editor: 97 self._get_meta(self.editor, metadata)
80 if self._should_remove(field): 98 return metadata
99
100 def _get_meta(self, fieldset, metadata):
101 for field in fieldset:
102 remove = self._should_remove(field)
103 if remove is True:
81 try: 104 try:
82 metadata[field.name] = field.value 105 metadata[field.name] = field.value
83 except: 106 except:
84 metadata[field.name] = 'harmful content' 107 metadata[field.name] = 'harmful content'
85 return metadata 108 if remove is FIELD:
109 self._get_meta(field)
86 110
87 def _should_remove(self, key): 111 def _should_remove(self, key):
88 ''' 112 '''