diff options
| author | jvoisin | 2013-07-05 20:06:58 +0200 |
|---|---|---|
| committer | jvoisin | 2013-07-05 20:06:58 +0200 |
| commit | 28845856c4c7731a6ed97f5481295854af8c5b45 (patch) | |
| tree | 0a7e24d1d636855c764f42871c807688226e0b54 /MAT | |
| parent | ef4543aa004ae75c792b858b0d5d12d504d2fe35 (diff) | |
Facilitate lib usage for PDF
Set a default value to pdf_quality
Diffstat (limited to 'MAT')
| -rw-r--r-- | MAT/office.py | 13 |
1 files changed, 8 insertions, 5 deletions
diff --git a/MAT/office.py b/MAT/office.py index c6e86fb..339f3e3 100644 --- a/MAT/office.py +++ b/MAT/office.py | |||
| @@ -68,14 +68,14 @@ class OpenDocumentStripper(archive.GenericArchiveStripper): | |||
| 68 | # contain the list of all files present in the archive | 68 | # contain the list of all files present in the archive |
| 69 | zipin.extract(item, self.tempdir) | 69 | zipin.extract(item, self.tempdir) |
| 70 | for line in fileinput.input(name, inplace=1): | 70 | for line in fileinput.input(name, inplace=1): |
| 71 | #remove the line which contains "meta.xml" | 71 | # remove the line which contains "meta.xml" |
| 72 | line = line.strip() | 72 | line = line.strip() |
| 73 | if not 'meta.xml' in line: | 73 | if not 'meta.xml' in line: |
| 74 | print line | 74 | print line |
| 75 | zipout.write(name, item) | 75 | zipout.write(name, item) |
| 76 | 76 | ||
| 77 | elif ext in parser.NOMETA or item == 'mimetype': | 77 | elif ext in parser.NOMETA or item == 'mimetype': |
| 78 | #keep NOMETA files, and the "manifest" file | 78 | # keep NOMETA files, and the "manifest" file |
| 79 | if item != 'meta.xml': # contains the metadata | 79 | if item != 'meta.xml': # contains the metadata |
| 80 | zipin.extract(item, self.tempdir) | 80 | zipin.extract(item, self.tempdir) |
| 81 | zipout.write(name, item) | 81 | zipout.write(name, item) |
| @@ -126,7 +126,11 @@ class PdfStripper(parser.GenericParser): | |||
| 126 | super(PdfStripper, self).__init__(filename, parser, mime, backup, **kwargs) | 126 | super(PdfStripper, self).__init__(filename, parser, mime, backup, **kwargs) |
| 127 | uri = 'file://' + os.path.abspath(self.filename) | 127 | uri = 'file://' + os.path.abspath(self.filename) |
| 128 | self.password = None | 128 | self.password = None |
| 129 | self.pdf_quality = kwargs['low_pdf_quality'] | 129 | try: |
| 130 | self.pdf_quality = kwargs['low_pdf_quality'] | ||
| 131 | except KeyError: | ||
| 132 | self.pdf_quality = False | ||
| 133 | |||
| 130 | self.document = Poppler.Document.new_from_file(uri, self.password) | 134 | self.document = Poppler.Document.new_from_file(uri, self.password) |
| 131 | self.meta_list = frozenset(['title', 'author', 'subject', 'keywords', 'creator', | 135 | self.meta_list = frozenset(['title', 'author', 'subject', 'keywords', 'creator', |
| 132 | 'producer', 'metadata']) | 136 | 'producer', 'metadata']) |
| @@ -146,7 +150,6 @@ class PdfStripper(parser.GenericParser): | |||
| 146 | on a cairo pdfsurface for each pages. | 150 | on a cairo pdfsurface for each pages. |
| 147 | 151 | ||
| 148 | http://cairographics.org/documentation/pycairo/2/ | 152 | http://cairographics.org/documentation/pycairo/2/ |
| 149 | python-poppler is not documented at all : have fun ;) | ||
| 150 | 153 | ||
| 151 | The use of an intermediate tempfile is necessary because | 154 | The use of an intermediate tempfile is necessary because |
| 152 | python-cairo segfaults on unicode. | 155 | python-cairo segfaults on unicode. |
| @@ -219,7 +222,7 @@ class OpenXmlStripper(archive.GenericArchiveStripper): | |||
| 219 | if item.startswith('docProps/'): # metadatas | 222 | if item.startswith('docProps/'): # metadatas |
| 220 | pass | 223 | pass |
| 221 | elif ext in parser.NOMETA or item == '.rels': | 224 | elif ext in parser.NOMETA or item == '.rels': |
| 222 | #keep parser.NOMETA files, and the file named ".rels" | 225 | # keep parser.NOMETA files, and the file named ".rels" |
| 223 | zipin.extract(item, self.tempdir) | 226 | zipin.extract(item, self.tempdir) |
| 224 | zipout.write(name, item) | 227 | zipout.write(name, item) |
| 225 | else: | 228 | else: |
