diff options
| author | jvoisin | 2015-07-25 17:14:23 +0200 |
|---|---|---|
| committer | jvoisin | 2015-07-25 17:14:23 +0200 |
| commit | 6ba3e3f20d7d52895bc44f9fc35b068cfce47133 (patch) | |
| tree | 15df2aca17d56d941c6376ef729e0c1fea4c396f /libmat/office.py | |
| parent | 85e6279d16af063e5150c7cf4bd491185b8ae788 (diff) | |
_MASSIVE_ pep8 revamp
Thank you so much PyCharm
Diffstat (limited to 'libmat/office.py')
| -rw-r--r-- | libmat/office.py | 55 |
1 files changed, 29 insertions, 26 deletions
diff --git a/libmat/office.py b/libmat/office.py index d020c46..bd4bd97 100644 --- a/libmat/office.py +++ b/libmat/office.py | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | ''' Care about office's formats | 1 | """ Care about office's formats |
| 2 | 2 | ||
| 3 | ''' | 3 | """ |
| 4 | 4 | ||
| 5 | import logging | 5 | import logging |
| 6 | import os | 6 | import os |
| @@ -21,14 +21,14 @@ import archive | |||
| 21 | 21 | ||
| 22 | 22 | ||
| 23 | class OpenDocumentStripper(archive.TerminalZipStripper): | 23 | class OpenDocumentStripper(archive.TerminalZipStripper): |
| 24 | ''' An open document file is a zip, with xml file into. | 24 | """ An open document file is a zip, with xml file into. |
| 25 | The one that interest us is meta.xml | 25 | The one that interest us is meta.xml |
| 26 | ''' | 26 | """ |
| 27 | 27 | ||
| 28 | def get_meta(self): | 28 | def get_meta(self): |
| 29 | ''' Return a dict with all the meta of the file by | 29 | """ Return a dict with all the meta of the file by |
| 30 | trying to read the meta.xml file. | 30 | trying to read the meta.xml file. |
| 31 | ''' | 31 | """ |
| 32 | metadata = super(OpenDocumentStripper, self).get_meta() | 32 | metadata = super(OpenDocumentStripper, self).get_meta() |
| 33 | zipin = zipfile.ZipFile(self.filename, 'r') | 33 | zipin = zipfile.ZipFile(self.filename, 'r') |
| 34 | try: | 34 | try: |
| @@ -49,13 +49,13 @@ class OpenDocumentStripper(archive.TerminalZipStripper): | |||
| 49 | return metadata | 49 | return metadata |
| 50 | 50 | ||
| 51 | def remove_all(self): | 51 | def remove_all(self): |
| 52 | ''' Removes metadata | 52 | """ Removes metadata |
| 53 | ''' | 53 | """ |
| 54 | return super(OpenDocumentStripper, self).remove_all(ending_blacklist=['meta.xml']) | 54 | return super(OpenDocumentStripper, self).remove_all(ending_blacklist=['meta.xml']) |
| 55 | 55 | ||
| 56 | def is_clean(self): | 56 | def is_clean(self): |
| 57 | ''' Check if the file is clean from harmful metadatas | 57 | """ Check if the file is clean from harmful metadatas |
| 58 | ''' | 58 | """ |
| 59 | clean_super = super(OpenDocumentStripper, self).is_clean() | 59 | clean_super = super(OpenDocumentStripper, self).is_clean() |
| 60 | if clean_super is False: | 60 | if clean_super is False: |
| 61 | return False | 61 | return False |
| @@ -70,20 +70,21 @@ class OpenDocumentStripper(archive.TerminalZipStripper): | |||
| 70 | 70 | ||
| 71 | 71 | ||
| 72 | class OpenXmlStripper(archive.TerminalZipStripper): | 72 | class OpenXmlStripper(archive.TerminalZipStripper): |
| 73 | ''' Represent an office openxml document, which is like | 73 | """ Represent an office openxml document, which is like |
| 74 | an opendocument format, with some tricky stuff added. | 74 | an opendocument format, with some tricky stuff added. |
| 75 | It contains mostly xml, but can have media blobs, crap, ... | 75 | It contains mostly xml, but can have media blobs, crap, ... |
| 76 | (I don't like this format.) | 76 | (I don't like this format.) |
| 77 | ''' | 77 | """ |
| 78 | |||
| 78 | def remove_all(self): | 79 | def remove_all(self): |
| 79 | return super(OpenXmlStripper, self).remove_all( | 80 | return super(OpenXmlStripper, self).remove_all( |
| 80 | beginning_blacklist=('docProps/'), whitelist=('.rels')) | 81 | beginning_blacklist='docProps/', whitelist='.rels') |
| 81 | 82 | ||
| 82 | def is_clean(self): | 83 | def is_clean(self): |
| 83 | ''' Check if the file is clean from harmful metadatas. | 84 | """ Check if the file is clean from harmful metadatas. |
| 84 | This implementation is faster than something like | 85 | This implementation is faster than something like |
| 85 | "return this.get_meta() == {}". | 86 | "return this.get_meta() == {}". |
| 86 | ''' | 87 | """ |
| 87 | clean_super = super(OpenXmlStripper, self).is_clean() | 88 | clean_super = super(OpenXmlStripper, self).is_clean() |
| 88 | if clean_super is False: | 89 | if clean_super is False: |
| 89 | return False | 90 | return False |
| @@ -96,8 +97,8 @@ class OpenXmlStripper(archive.TerminalZipStripper): | |||
| 96 | return True | 97 | return True |
| 97 | 98 | ||
| 98 | def get_meta(self): | 99 | def get_meta(self): |
| 99 | ''' Return a dict with all the meta of the file | 100 | """ Return a dict with all the meta of the file |
| 100 | ''' | 101 | """ |
| 101 | metadata = super(OpenXmlStripper, self).get_meta() | 102 | metadata = super(OpenXmlStripper, self).get_meta() |
| 102 | 103 | ||
| 103 | zipin = zipfile.ZipFile(self.filename, 'r') | 104 | zipin = zipfile.ZipFile(self.filename, 'r') |
| @@ -109,8 +110,9 @@ class OpenXmlStripper(archive.TerminalZipStripper): | |||
| 109 | 110 | ||
| 110 | 111 | ||
| 111 | class PdfStripper(parser.GenericParser): | 112 | class PdfStripper(parser.GenericParser): |
| 112 | ''' Represent a PDF file | 113 | """ Represent a PDF file |
| 113 | ''' | 114 | """ |
| 115 | |||
| 114 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): | 116 | def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): |
| 115 | super(PdfStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) | 117 | super(PdfStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) |
| 116 | self.uri = 'file://' + os.path.abspath(self.filename) | 118 | self.uri = 'file://' + os.path.abspath(self.filename) |
| @@ -121,16 +123,16 @@ class PdfStripper(parser.GenericParser): | |||
| 121 | self.pdf_quality = False | 123 | self.pdf_quality = False |
| 122 | 124 | ||
| 123 | self.meta_list = frozenset(['title', 'author', 'subject', | 125 | self.meta_list = frozenset(['title', 'author', 'subject', |
| 124 | 'keywords', 'creator', 'producer', 'metadata']) | 126 | 'keywords', 'creator', 'producer', 'metadata']) |
| 125 | 127 | ||
| 126 | def is_clean(self): | 128 | def is_clean(self): |
| 127 | ''' Check if the file is clean from harmful metadatas | 129 | """ Check if the file is clean from harmful metadatas |
| 128 | ''' | 130 | """ |
| 129 | document = Poppler.Document.new_from_file(self.uri, self.password) | 131 | document = Poppler.Document.new_from_file(self.uri, self.password) |
| 130 | return not any(document.get_property(key) for key in self.meta_list) | 132 | return not any(document.get_property(key) for key in self.meta_list) |
| 131 | 133 | ||
| 132 | def remove_all(self): | 134 | def remove_all(self): |
| 133 | ''' Opening the PDF with poppler, then doing a render | 135 | """ Opening the PDF with poppler, then doing a render |
| 134 | on a cairo pdfsurface for each pages. | 136 | on a cairo pdfsurface for each pages. |
| 135 | 137 | ||
| 136 | http://cairographics.org/documentation/pycairo/2/ | 138 | http://cairographics.org/documentation/pycairo/2/ |
| @@ -138,7 +140,7 @@ class PdfStripper(parser.GenericParser): | |||
| 138 | The use of an intermediate tempfile is necessary because | 140 | The use of an intermediate tempfile is necessary because |
| 139 | python-cairo segfaults on unicode. | 141 | python-cairo segfaults on unicode. |
| 140 | See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=699457 | 142 | See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=699457 |
| 141 | ''' | 143 | """ |
| 142 | document = Poppler.Document.new_from_file(self.uri, self.password) | 144 | document = Poppler.Document.new_from_file(self.uri, self.password) |
| 143 | try: | 145 | try: |
| 144 | output = tempfile.mkstemp()[1] | 146 | output = tempfile.mkstemp()[1] |
| @@ -169,6 +171,7 @@ class PdfStripper(parser.GenericParser): | |||
| 169 | 171 | ||
| 170 | try: | 172 | try: |
| 171 | import pdfrw # For now, poppler cannot write meta, so we must use pdfrw | 173 | import pdfrw # For now, poppler cannot write meta, so we must use pdfrw |
| 174 | |||
| 172 | logging.debug('Removing %s\'s superficial metadata' % self.filename) | 175 | logging.debug('Removing %s\'s superficial metadata' % self.filename) |
| 173 | trailer = pdfrw.PdfReader(self.output) | 176 | trailer = pdfrw.PdfReader(self.output) |
| 174 | trailer.Info.Producer = None | 177 | trailer.Info.Producer = None |
| @@ -183,8 +186,8 @@ class PdfStripper(parser.GenericParser): | |||
| 183 | return True | 186 | return True |
| 184 | 187 | ||
| 185 | def get_meta(self): | 188 | def get_meta(self): |
| 186 | ''' Return a dict with all the meta of the file | 189 | """ Return a dict with all the meta of the file |
| 187 | ''' | 190 | """ |
| 188 | document = Poppler.Document.new_from_file(self.uri, self.password) | 191 | document = Poppler.Document.new_from_file(self.uri, self.password) |
| 189 | metadata = {} | 192 | metadata = {} |
| 190 | for key in self.meta_list: | 193 | for key in self.meta_list: |
