summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjvoisin2013-07-05 20:06:58 +0200
committerjvoisin2013-07-05 20:06:58 +0200
commit28845856c4c7731a6ed97f5481295854af8c5b45 (patch)
tree0a7e24d1d636855c764f42871c807688226e0b54
parentef4543aa004ae75c792b858b0d5d12d504d2fe35 (diff)
Facilitate lib usage for PDF
Set a default value to pdf_quality
-rw-r--r--MAT/office.py13
1 files changed, 8 insertions, 5 deletions
diff --git a/MAT/office.py b/MAT/office.py
index c6e86fb..339f3e3 100644
--- a/MAT/office.py
+++ b/MAT/office.py
@@ -68,14 +68,14 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
68 # contain the list of all files present in the archive 68 # contain the list of all files present in the archive
69 zipin.extract(item, self.tempdir) 69 zipin.extract(item, self.tempdir)
70 for line in fileinput.input(name, inplace=1): 70 for line in fileinput.input(name, inplace=1):
71 #remove the line which contains "meta.xml" 71 # remove the line which contains "meta.xml"
72 line = line.strip() 72 line = line.strip()
73 if not 'meta.xml' in line: 73 if not 'meta.xml' in line:
74 print line 74 print line
75 zipout.write(name, item) 75 zipout.write(name, item)
76 76
77 elif ext in parser.NOMETA or item == 'mimetype': 77 elif ext in parser.NOMETA or item == 'mimetype':
78 #keep NOMETA files, and the "manifest" file 78 # keep NOMETA files, and the "manifest" file
79 if item != 'meta.xml': # contains the metadata 79 if item != 'meta.xml': # contains the metadata
80 zipin.extract(item, self.tempdir) 80 zipin.extract(item, self.tempdir)
81 zipout.write(name, item) 81 zipout.write(name, item)
@@ -126,7 +126,11 @@ class PdfStripper(parser.GenericParser):
126 super(PdfStripper, self).__init__(filename, parser, mime, backup, **kwargs) 126 super(PdfStripper, self).__init__(filename, parser, mime, backup, **kwargs)
127 uri = 'file://' + os.path.abspath(self.filename) 127 uri = 'file://' + os.path.abspath(self.filename)
128 self.password = None 128 self.password = None
129 self.pdf_quality = kwargs['low_pdf_quality'] 129 try:
130 self.pdf_quality = kwargs['low_pdf_quality']
131 except KeyError:
132 self.pdf_quality = False
133
130 self.document = Poppler.Document.new_from_file(uri, self.password) 134 self.document = Poppler.Document.new_from_file(uri, self.password)
131 self.meta_list = frozenset(['title', 'author', 'subject', 'keywords', 'creator', 135 self.meta_list = frozenset(['title', 'author', 'subject', 'keywords', 'creator',
132 'producer', 'metadata']) 136 'producer', 'metadata'])
@@ -146,7 +150,6 @@ class PdfStripper(parser.GenericParser):
146 on a cairo pdfsurface for each pages. 150 on a cairo pdfsurface for each pages.
147 151
148 http://cairographics.org/documentation/pycairo/2/ 152 http://cairographics.org/documentation/pycairo/2/
149 python-poppler is not documented at all : have fun ;)
150 153
151 The use of an intermediate tempfile is necessary because 154 The use of an intermediate tempfile is necessary because
152 python-cairo segfaults on unicode. 155 python-cairo segfaults on unicode.
@@ -219,7 +222,7 @@ class OpenXmlStripper(archive.GenericArchiveStripper):
219 if item.startswith('docProps/'): # metadatas 222 if item.startswith('docProps/'): # metadatas
220 pass 223 pass
221 elif ext in parser.NOMETA or item == '.rels': 224 elif ext in parser.NOMETA or item == '.rels':
222 #keep parser.NOMETA files, and the file named ".rels" 225 # keep parser.NOMETA files, and the file named ".rels"
223 zipin.extract(item, self.tempdir) 226 zipin.extract(item, self.tempdir)
224 zipout.write(name, item) 227 zipout.write(name, item)
225 else: 228 else: