diff options
| author | jvoisin | 2011-08-05 12:06:47 +0200 |
|---|---|---|
| committer | jvoisin | 2011-08-05 12:06:47 +0200 |
| commit | 5a6bd3a9312f1d3444ebb9343353812bde7702da (patch) | |
| tree | 29d9b8456b9da67201c74dbc2616de8ed890003f /lib/office.py | |
| parent | 503e926812d35032ed527c81e78444f362a5d527 (diff) | |
Tests for openxml format, and some improvement for this format support
Diffstat (limited to 'lib/office.py')
| -rw-r--r-- | lib/office.py | 20 |
1 files changed, 6 insertions, 14 deletions
diff --git a/lib/office.py b/lib/office.py index b7c607f..03e386b 100644 --- a/lib/office.py +++ b/lib/office.py | |||
| @@ -146,13 +146,13 @@ class PdfStripper(parser.GenericParser): | |||
| 146 | page = self.document.get_page(0) | 146 | page = self.document.get_page(0) |
| 147 | page_width, page_height = page.get_size() | 147 | page_width, page_height = page.get_size() |
| 148 | surface = cairo.PDFSurface(self.output, page_width, page_height) | 148 | surface = cairo.PDFSurface(self.output, page_width, page_height) |
| 149 | context = cairo.Context(surface) # context draws on the surface | 149 | context = cairo.Context(surface) # context draws on the surface |
| 150 | logging.debug('Pdf rendering of %s' % self.filename) | 150 | logging.debug('Pdf rendering of %s' % self.filename) |
| 151 | for pagenum in xrange(self.document.get_n_pages()): | 151 | for pagenum in xrange(self.document.get_n_pages()): |
| 152 | page = self.document.get_page(pagenum) | 152 | page = self.document.get_page(pagenum) |
| 153 | context.translate(0, 0) | 153 | context.translate(0, 0) |
| 154 | page.render(context) # render the page on context | 154 | page.render(context) # render the page on context |
| 155 | context.show_page() # draw context on surface | 155 | context.show_page() # draw context on surface |
| 156 | surface.finish() | 156 | surface.finish() |
| 157 | 157 | ||
| 158 | #For now, poppler cannot write meta, so we must use pdfrw | 158 | #For now, poppler cannot write meta, so we must use pdfrw |
| @@ -253,16 +253,8 @@ class OpenXmlStripper(archive.GenericArchiveStripper): | |||
| 253 | ''' | 253 | ''' |
| 254 | zipin = zipfile.ZipFile(self.filename, 'r') | 254 | zipin = zipfile.ZipFile(self.filename, 'r') |
| 255 | metadata = {} | 255 | metadata = {} |
| 256 | try: | 256 | for item in zipin.namelist(): |
| 257 | content = zipin.read('docProps/app.xml') | 257 | if item.startswith('docProps/'): |
| 258 | metadata['app'] = 'harful meta' | 258 | metadata[item] = 'harmful content' |
| 259 | except KeyError: # no app.xml file found | ||
| 260 | logging.debug('%s has no app.xml metadata' % self.filename) | ||
| 261 | try: | ||
| 262 | content = zipin.read('docProps/core.xml') | ||
| 263 | metadata['core'] = 'harmful meta' | ||
| 264 | except KeyError: # no core.xml found | ||
| 265 | logging.debug('%s has no core.xml metadata' % self.filename) | ||
| 266 | zipin.close() | 259 | zipin.close() |
| 267 | |||
| 268 | return metadata | 260 | return metadata |
