summaryrefslogtreecommitdiff
path: root/MAT
diff options
context:
space:
mode:
authorjvoisin2012-12-27 17:01:30 +0100
committerjvoisin2012-12-27 17:01:30 +0100
commitcffdcc1b1e78d48cad62c54432a9d8ce41f4d616 (patch)
treef07e10cdbfb6fa1a5de4edb84435f10cb50ee191 /MAT
parenta36f48b460f7638052f2e8ac3f9ddde8232cf339 (diff)
Refactor the archive parser
Refactoring of the archive and office parser, in order to simplify the code and reduce abstraction
Diffstat (limited to 'MAT')
-rw-r--r--MAT/archive.py7
-rw-r--r--MAT/office.py10
-rw-r--r--MAT/parser.py14
3 files changed, 14 insertions, 17 deletions
diff --git a/MAT/archive.py b/MAT/archive.py
index 69c8f1b..1dcddef 100644
--- a/MAT/archive.py
+++ b/MAT/archive.py
@@ -35,9 +35,6 @@ class GenericArchiveStripper(parser.GenericParser):
35 shutil.rmtree(self.tempdir) 35 shutil.rmtree(self.tempdir)
36 36
37 def remove_all(self): 37 def remove_all(self):
38 return self._remove_all()
39
40 def _remove_all(self):
41 raise NotImplementedError 38 raise NotImplementedError
42 39
43 40
@@ -113,7 +110,7 @@ harmless format' % item.filename)
113 zipin.close() 110 zipin.close()
114 return metadata 111 return metadata
115 112
116 def _remove_all(self): 113 def remove_all(self):
117 ''' 114 '''
118 So far, the zipfile module does not allow to write a ZipInfo 115 So far, the zipfile module does not allow to write a ZipInfo
119 object into a zipfile (and it's a shame !) : so data added 116 object into a zipfile (and it's a shame !) : so data added
@@ -162,7 +159,7 @@ class TarStripper(GenericArchiveStripper):
162 current_file.gname = '' 159 current_file.gname = ''
163 return current_file 160 return current_file
164 161
165 def _remove_all(self): 162 def remove_all(self):
166 tarin = tarfile.open(self.filename, 'r' + self.compression) 163 tarin = tarfile.open(self.filename, 'r' + self.compression)
167 tarout = tarfile.open(self.output, 'w' + self.compression) 164 tarout = tarfile.open(self.output, 'w' + self.compression)
168 for item in tarin.getmembers(): 165 for item in tarin.getmembers():
diff --git a/MAT/office.py b/MAT/office.py
index e7ce661..20664d2 100644
--- a/MAT/office.py
+++ b/MAT/office.py
@@ -49,7 +49,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
49 logging.debug('%s has no opendocument metadata' % self.filename) 49 logging.debug('%s has no opendocument metadata' % self.filename)
50 return metadata 50 return metadata
51 51
52 def _remove_all(self): 52 def remove_all(self):
53 ''' 53 '''
54 FIXME ? 54 FIXME ?
55 There is a patch implementing the Zipfile.remove() 55 There is a patch implementing the Zipfile.remove()
@@ -140,12 +140,6 @@ class PdfStripper(parser.GenericParser):
140 140
141 def remove_all(self): 141 def remove_all(self):
142 ''' 142 '''
143 Remove metadata
144 '''
145 return self._remove_meta()
146
147 def _remove_meta(self):
148 '''
149 Opening the PDF with poppler, then doing a render 143 Opening the PDF with poppler, then doing a render
150 on a cairo pdfsurface for each pages. 144 on a cairo pdfsurface for each pages.
151 145
@@ -202,7 +196,7 @@ class OpenXmlStripper(archive.GenericArchiveStripper):
202 It contains mostly xml, but can have media blobs, crap, ... 196 It contains mostly xml, but can have media blobs, crap, ...
203 (I don't like this format.) 197 (I don't like this format.)
204 ''' 198 '''
205 def _remove_all(self): 199 def remove_all(self):
206 ''' 200 '''
207 FIXME ? 201 FIXME ?
208 There is a patch implementing the Zipfile.remove() 202 There is a patch implementing the Zipfile.remove()
diff --git a/MAT/parser.py b/MAT/parser.py
index 6be2b03..e5acbf8 100644
--- a/MAT/parser.py
+++ b/MAT/parser.py
@@ -59,7 +59,7 @@ class GenericParser(object):
59 59
60 def remove_all(self): 60 def remove_all(self):
61 ''' 61 '''
62 Remove all the files that are compromizing 62 Remove all compromising fields
63 ''' 63 '''
64 state = self._remove_all(self.editor) 64 state = self._remove_all(self.editor)
65 hachoir_core.field.writeIntoFile(self.editor, self.output) 65 hachoir_core.field.writeIntoFile(self.editor, self.output)
@@ -67,6 +67,9 @@ class GenericParser(object):
67 return state 67 return state
68 68
69 def _remove_all(self, fieldset): 69 def _remove_all(self, fieldset):
70 '''
71 Recursive way to handle tree metadatas
72 '''
70 try: 73 try:
71 for field in fieldset: 74 for field in fieldset:
72 remove = self._should_remove(field) 75 remove = self._should_remove(field)
@@ -93,6 +96,9 @@ class GenericParser(object):
93 return metadata 96 return metadata
94 97
95 def _get_meta(self, fieldset, metadata): 98 def _get_meta(self, fieldset, metadata):
99 '''
100 Recursive way to handle tree metadatas
101 '''
96 for field in fieldset: 102 for field in fieldset:
97 remove = self._should_remove(field) 103 remove = self._should_remove(field)
98 if remove is True: 104 if remove is True:
@@ -101,11 +107,11 @@ class GenericParser(object):
101 except: 107 except:
102 metadata[field.name] = 'harmful content' 108 metadata[field.name] = 'harmful content'
103 if remove is FIELD: 109 if remove is FIELD:
104 self._get_meta(field) 110 self._get_meta(field, None)
105 111
106 def _should_remove(self, key): 112 def _should_remove(self, key):
107 ''' 113 '''
108 return True if the field is compromizing 114 return True if the field is compromising
109 abstract method 115 abstract method
110 ''' 116 '''
111 raise NotImplementedError 117 raise NotImplementedError
@@ -115,6 +121,6 @@ class GenericParser(object):
115 Do a backup of the file if asked, 121 Do a backup of the file if asked,
116 and change his creation/access date 122 and change his creation/access date
117 ''' 123 '''
118 if self.backup is False: 124 if not self.backup:
119 mat.secure_remove(self.filename) 125 mat.secure_remove(self.filename)
120 os.rename(self.output, self.filename) 126 os.rename(self.output, self.filename)