diff options
| author | jvoisin | 2012-12-27 17:01:30 +0100 |
|---|---|---|
| committer | jvoisin | 2012-12-27 17:01:30 +0100 |
| commit | cffdcc1b1e78d48cad62c54432a9d8ce41f4d616 (patch) | |
| tree | f07e10cdbfb6fa1a5de4edb84435f10cb50ee191 /MAT | |
| parent | a36f48b460f7638052f2e8ac3f9ddde8232cf339 (diff) | |
Refactor the archive parser
Refactoring of the archive and office parser,
in order to simplify the code and reduce abstraction
Diffstat (limited to 'MAT')
| -rw-r--r-- | MAT/archive.py | 7 | ||||
| -rw-r--r-- | MAT/office.py | 10 | ||||
| -rw-r--r-- | MAT/parser.py | 14 |
3 files changed, 14 insertions, 17 deletions
diff --git a/MAT/archive.py b/MAT/archive.py index 69c8f1b..1dcddef 100644 --- a/MAT/archive.py +++ b/MAT/archive.py | |||
| @@ -35,9 +35,6 @@ class GenericArchiveStripper(parser.GenericParser): | |||
| 35 | shutil.rmtree(self.tempdir) | 35 | shutil.rmtree(self.tempdir) |
| 36 | 36 | ||
| 37 | def remove_all(self): | 37 | def remove_all(self): |
| 38 | return self._remove_all() | ||
| 39 | |||
| 40 | def _remove_all(self): | ||
| 41 | raise NotImplementedError | 38 | raise NotImplementedError |
| 42 | 39 | ||
| 43 | 40 | ||
| @@ -113,7 +110,7 @@ harmless format' % item.filename) | |||
| 113 | zipin.close() | 110 | zipin.close() |
| 114 | return metadata | 111 | return metadata |
| 115 | 112 | ||
| 116 | def _remove_all(self): | 113 | def remove_all(self): |
| 117 | ''' | 114 | ''' |
| 118 | So far, the zipfile module does not allow to write a ZipInfo | 115 | So far, the zipfile module does not allow to write a ZipInfo |
| 119 | object into a zipfile (and it's a shame !) : so data added | 116 | object into a zipfile (and it's a shame !) : so data added |
| @@ -162,7 +159,7 @@ class TarStripper(GenericArchiveStripper): | |||
| 162 | current_file.gname = '' | 159 | current_file.gname = '' |
| 163 | return current_file | 160 | return current_file |
| 164 | 161 | ||
| 165 | def _remove_all(self): | 162 | def remove_all(self): |
| 166 | tarin = tarfile.open(self.filename, 'r' + self.compression) | 163 | tarin = tarfile.open(self.filename, 'r' + self.compression) |
| 167 | tarout = tarfile.open(self.output, 'w' + self.compression) | 164 | tarout = tarfile.open(self.output, 'w' + self.compression) |
| 168 | for item in tarin.getmembers(): | 165 | for item in tarin.getmembers(): |
diff --git a/MAT/office.py b/MAT/office.py index e7ce661..20664d2 100644 --- a/MAT/office.py +++ b/MAT/office.py | |||
| @@ -49,7 +49,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): | |||
| 49 | logging.debug('%s has no opendocument metadata' % self.filename) | 49 | logging.debug('%s has no opendocument metadata' % self.filename) |
| 50 | return metadata | 50 | return metadata |
| 51 | 51 | ||
| 52 | def _remove_all(self): | 52 | def remove_all(self): |
| 53 | ''' | 53 | ''' |
| 54 | FIXME ? | 54 | FIXME ? |
| 55 | There is a patch implementing the Zipfile.remove() | 55 | There is a patch implementing the Zipfile.remove() |
| @@ -140,12 +140,6 @@ class PdfStripper(parser.GenericParser): | |||
| 140 | 140 | ||
| 141 | def remove_all(self): | 141 | def remove_all(self): |
| 142 | ''' | 142 | ''' |
| 143 | Remove metadata | ||
| 144 | ''' | ||
| 145 | return self._remove_meta() | ||
| 146 | |||
| 147 | def _remove_meta(self): | ||
| 148 | ''' | ||
| 149 | Opening the PDF with poppler, then doing a render | 143 | Opening the PDF with poppler, then doing a render |
| 150 | on a cairo pdfsurface for each pages. | 144 | on a cairo pdfsurface for each pages. |
| 151 | 145 | ||
| @@ -202,7 +196,7 @@ class OpenXmlStripper(archive.GenericArchiveStripper): | |||
| 202 | It contains mostly xml, but can have media blobs, crap, ... | 196 | It contains mostly xml, but can have media blobs, crap, ... |
| 203 | (I don't like this format.) | 197 | (I don't like this format.) |
| 204 | ''' | 198 | ''' |
| 205 | def _remove_all(self): | 199 | def remove_all(self): |
| 206 | ''' | 200 | ''' |
| 207 | FIXME ? | 201 | FIXME ? |
| 208 | There is a patch implementing the Zipfile.remove() | 202 | There is a patch implementing the Zipfile.remove() |
diff --git a/MAT/parser.py b/MAT/parser.py index 6be2b03..e5acbf8 100644 --- a/MAT/parser.py +++ b/MAT/parser.py | |||
| @@ -59,7 +59,7 @@ class GenericParser(object): | |||
| 59 | 59 | ||
| 60 | def remove_all(self): | 60 | def remove_all(self): |
| 61 | ''' | 61 | ''' |
| 62 | Remove all the files that are compromizing | 62 | Remove all compromising fields |
| 63 | ''' | 63 | ''' |
| 64 | state = self._remove_all(self.editor) | 64 | state = self._remove_all(self.editor) |
| 65 | hachoir_core.field.writeIntoFile(self.editor, self.output) | 65 | hachoir_core.field.writeIntoFile(self.editor, self.output) |
| @@ -67,6 +67,9 @@ class GenericParser(object): | |||
| 67 | return state | 67 | return state |
| 68 | 68 | ||
| 69 | def _remove_all(self, fieldset): | 69 | def _remove_all(self, fieldset): |
| 70 | ''' | ||
| 71 | Recursive way to handle tree metadatas | ||
| 72 | ''' | ||
| 70 | try: | 73 | try: |
| 71 | for field in fieldset: | 74 | for field in fieldset: |
| 72 | remove = self._should_remove(field) | 75 | remove = self._should_remove(field) |
| @@ -93,6 +96,9 @@ class GenericParser(object): | |||
| 93 | return metadata | 96 | return metadata |
| 94 | 97 | ||
| 95 | def _get_meta(self, fieldset, metadata): | 98 | def _get_meta(self, fieldset, metadata): |
| 99 | ''' | ||
| 100 | Recursive way to handle tree metadatas | ||
| 101 | ''' | ||
| 96 | for field in fieldset: | 102 | for field in fieldset: |
| 97 | remove = self._should_remove(field) | 103 | remove = self._should_remove(field) |
| 98 | if remove is True: | 104 | if remove is True: |
| @@ -101,11 +107,11 @@ class GenericParser(object): | |||
| 101 | except: | 107 | except: |
| 102 | metadata[field.name] = 'harmful content' | 108 | metadata[field.name] = 'harmful content' |
| 103 | if remove is FIELD: | 109 | if remove is FIELD: |
| 104 | self._get_meta(field) | 110 | self._get_meta(field, None) |
| 105 | 111 | ||
| 106 | def _should_remove(self, key): | 112 | def _should_remove(self, key): |
| 107 | ''' | 113 | ''' |
| 108 | return True if the field is compromizing | 114 | return True if the field is compromising |
| 109 | abstract method | 115 | abstract method |
| 110 | ''' | 116 | ''' |
| 111 | raise NotImplementedError | 117 | raise NotImplementedError |
| @@ -115,6 +121,6 @@ class GenericParser(object): | |||
| 115 | Do a backup of the file if asked, | 121 | Do a backup of the file if asked, |
| 116 | and change his creation/access date | 122 | and change his creation/access date |
| 117 | ''' | 123 | ''' |
| 118 | if self.backup is False: | 124 | if not self.backup: |
| 119 | mat.secure_remove(self.filename) | 125 | mat.secure_remove(self.filename) |
| 120 | os.rename(self.output, self.filename) | 126 | os.rename(self.output, self.filename) |
