diff options
| author | jvoisin | 2011-07-30 19:14:50 +0200 |
|---|---|---|
| committer | jvoisin | 2011-07-30 19:14:50 +0200 |
| commit | 158fbf02f5f349d2f9a7b1976306804224ad92da (patch) | |
| tree | 9e1de3c98d4d7dfcb1fdbd3ebb875f3f3cc203d5 /lib/pdfrw/buildxobj.py | |
| parent | 8f889fead81b2046d289402b831e18f8ddb00276 (diff) | |
Cleanup of pdfrw
Diffstat (limited to 'lib/pdfrw/buildxobj.py')
| -rw-r--r-- | lib/pdfrw/buildxobj.py | 191 |
1 files changed, 0 insertions, 191 deletions
diff --git a/lib/pdfrw/buildxobj.py b/lib/pdfrw/buildxobj.py deleted file mode 100644 index 203dd8c..0000000 --- a/lib/pdfrw/buildxobj.py +++ /dev/null | |||
| @@ -1,191 +0,0 @@ | |||
| 1 | # A part of pdfrw (pdfrw.googlecode.com) | ||
| 2 | # Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas | ||
| 3 | # MIT license -- See LICENSE.txt for details | ||
| 4 | |||
| 5 | ''' | ||
| 6 | |||
| 7 | This module contains code to build PDF "Form XObjects". | ||
| 8 | |||
| 9 | A Form XObject allows a fragment from one PDF file to be cleanly | ||
| 10 | included in another PDF file. | ||
| 11 | |||
| 12 | Reference for syntax: "Parameters for opening PDF files" from SDK 8.1 | ||
| 13 | |||
| 14 | http://www.adobe.com/devnet/acrobat/pdfs/pdf_open_parameters.pdf | ||
| 15 | |||
| 16 | supported 'page=xxx', 'viewrect=<left>,<top>,<width>,<height>' | ||
| 17 | |||
| 18 | Units are in points | ||
| 19 | |||
| 20 | Reference for content: Adobe PDF reference, sixth edition, version 1.7 | ||
| 21 | |||
| 22 | http://www.adobe.com/devnet/acrobat/pdfs/pdf_reference_1-7.pdf | ||
| 23 | |||
| 24 | Form xobjects discussed chapter 4.9, page 355 | ||
| 25 | ''' | ||
| 26 | |||
| 27 | from pdfobjects import PdfDict, PdfArray, PdfName | ||
| 28 | from pdfreader import PdfReader | ||
| 29 | |||
| 30 | class ViewInfo(object): | ||
| 31 | ''' Instantiate ViewInfo with a uri, and it will parse out | ||
| 32 | the filename, page, and viewrect into object attributes. | ||
| 33 | ''' | ||
| 34 | doc = None | ||
| 35 | docname = None | ||
| 36 | page = None | ||
| 37 | viewrect = None | ||
| 38 | |||
| 39 | def __init__(self, pageinfo='', **kw): | ||
| 40 | pageinfo=pageinfo.split('#',1) | ||
| 41 | if len(pageinfo) == 2: | ||
| 42 | pageinfo[1:] = pageinfo[1].replace('&', '#').split('#') | ||
| 43 | for key in 'page viewrect'.split(): | ||
| 44 | if pageinfo[0].startswith(key+'='): | ||
| 45 | break | ||
| 46 | else: | ||
| 47 | self.docname = pageinfo.pop(0) | ||
| 48 | for item in pageinfo: | ||
| 49 | key, value = item.split('=') | ||
| 50 | key = key.strip() | ||
| 51 | value = value.replace(',', ' ').split() | ||
| 52 | if key == 'page': | ||
| 53 | assert len(value) == 1 | ||
| 54 | setattr(self, key, int(value[0])) | ||
| 55 | elif key == 'viewrect': | ||
| 56 | assert len(value) == 4 | ||
| 57 | setattr(self, key, [float(x) for x in value]) | ||
| 58 | else: | ||
| 59 | log.error('Unknown option: %s', key) | ||
| 60 | for key, value in kw.iteritems(): | ||
| 61 | assert hasattr(self, key), key | ||
| 62 | setattr(self, key, value) | ||
| 63 | |||
| 64 | def getrects(inheritable, pageinfo): | ||
| 65 | ''' Given the inheritable attributes of a page and | ||
| 66 | the desired pageinfo rectangle, return the page's | ||
| 67 | media box and the calculated boundary (clip) box. | ||
| 68 | ''' | ||
| 69 | mbox = tuple([float(x) for x in inheritable.MediaBox]) | ||
| 70 | vrect = pageinfo.viewrect | ||
| 71 | if vrect is None: | ||
| 72 | cbox = tuple([float(x) for x in (inheritable.CropBox or mbox)]) | ||
| 73 | else: | ||
| 74 | mleft, mbot, mright, mtop = mbox | ||
| 75 | x, y, w, h = vrect | ||
| 76 | cleft = mleft + x | ||
| 77 | ctop = mtop - y | ||
| 78 | cright = cleft + w | ||
| 79 | cbot = ctop - h | ||
| 80 | cbox = max(mleft, cleft), max(mbot, cbot), min(mright, cright), min(mtop, ctop) | ||
| 81 | return mbox, cbox | ||
| 82 | |||
| 83 | def _cache_xobj(contents, resources, mbox, bbox): | ||
| 84 | ''' Return a cached Form XObject, or create a new one and cache it. | ||
| 85 | ''' | ||
| 86 | cachedict = contents.xobj_cachedict | ||
| 87 | if cachedict is None: | ||
| 88 | cachedict = contents.private.xobj_cachedict = {} | ||
| 89 | result = cachedict.get(bbox) | ||
| 90 | if result is None: | ||
| 91 | func = (_get_fullpage, _get_subpage)[mbox != bbox] | ||
| 92 | result = PdfDict( | ||
| 93 | func(contents, resources, mbox, bbox), | ||
| 94 | Type = PdfName.XObject, | ||
| 95 | Subtype = PdfName.Form, | ||
| 96 | FormType = 1, | ||
| 97 | BBox = PdfArray(bbox), | ||
| 98 | ) | ||
| 99 | cachedict[bbox] = result | ||
| 100 | return result | ||
| 101 | |||
| 102 | def _get_fullpage(contents, resources, mbox, bbox): | ||
| 103 | ''' fullpage is easy. Just copy the contents, | ||
| 104 | set up the resources, and let _cache_xobj handle the | ||
| 105 | rest. | ||
| 106 | ''' | ||
| 107 | return PdfDict(contents, Resources=resources) | ||
| 108 | |||
| 109 | def _get_subpage(contents, resources, mbox, bbox): | ||
| 110 | ''' subpages *could* be as easy as full pages, but we | ||
| 111 | choose to complicate life by creating a Form XObject | ||
| 112 | for the page, and then one that references it for | ||
| 113 | the subpage, on the off-chance that we want multiple | ||
| 114 | items from the page. | ||
| 115 | ''' | ||
| 116 | return PdfDict( | ||
| 117 | stream = '/FullPage Do\n', | ||
| 118 | Resources = PdfDict( | ||
| 119 | XObject = PdfDict( | ||
| 120 | FullPage = _cache_xobj(contents, resources, mbox, mbox) | ||
| 121 | ) | ||
| 122 | ) | ||
| 123 | ) | ||
| 124 | |||
| 125 | def pagexobj(page, viewinfo=ViewInfo(), allow_compressed=True): | ||
| 126 | ''' pagexobj creates and returns a Form XObject for | ||
| 127 | a given view within a page (Defaults to entire page.) | ||
| 128 | ''' | ||
| 129 | inheritable = page.inheritable | ||
| 130 | resources = inheritable.Resources | ||
| 131 | mbox, bbox = getrects(inheritable, viewinfo) | ||
| 132 | contents = page.Contents | ||
| 133 | # Make sure the only attribute is length | ||
| 134 | # All the filters must have been executed | ||
| 135 | assert int(contents.Length) == len(contents.stream) | ||
| 136 | if not allow_compressed: | ||
| 137 | assert len([x for x in contents.iteritems()]) == 1 | ||
| 138 | |||
| 139 | return _cache_xobj(contents, resources, mbox, bbox) | ||
| 140 | |||
| 141 | |||
| 142 | def docxobj(pageinfo, doc=None, allow_compressed=True): | ||
| 143 | ''' docxobj creates and returns an actual Form XObject. | ||
| 144 | Can work standalone, or in conjunction with | ||
| 145 | the CacheXObj class (below). | ||
| 146 | ''' | ||
| 147 | if not isinstance(pageinfo, ViewInfo): | ||
| 148 | pageinfo = ViewInfo(pageinfo) | ||
| 149 | |||
| 150 | # If we're explicitly passed a document, | ||
| 151 | # make sure we don't have one implicitly as well. | ||
| 152 | # If no implicit or explicit doc, then read one in | ||
| 153 | # from the filename. | ||
| 154 | if doc is not None: | ||
| 155 | assert pageinfo.doc is None | ||
| 156 | pageinfo.doc = doc | ||
| 157 | elif pageinfo.doc is not None: | ||
| 158 | doc = pageinfo.doc | ||
| 159 | else: | ||
| 160 | doc = pageinfo.doc = PdfReader(pageinfo.docname, decompress = not allow_compressed) | ||
| 161 | assert isinstance(doc, PdfReader) | ||
| 162 | |||
| 163 | sourcepage = doc.pages[(pageinfo.page or 1) - 1] | ||
| 164 | return pagexobj(sourcepage, pageinfo, allow_compressed) | ||
| 165 | |||
| 166 | |||
| 167 | class CacheXObj(object): | ||
| 168 | ''' Use to keep from reparsing files over and over, | ||
| 169 | and to keep from making the output too much | ||
| 170 | bigger than it ought to be by replicating | ||
| 171 | unnecessary object copies. | ||
| 172 | ''' | ||
| 173 | def __init__(self, decompress=False): | ||
| 174 | ''' Set decompress true if you need | ||
| 175 | the Form XObjects to be decompressed. | ||
| 176 | Will decompress what it can and scream | ||
| 177 | about the rest. | ||
| 178 | ''' | ||
| 179 | self.cached_pdfs = {} | ||
| 180 | self.decompress = decompress | ||
| 181 | |||
| 182 | def load(self, sourcename): | ||
| 183 | ''' Load a Form XObject from a uri | ||
| 184 | ''' | ||
| 185 | info = ViewInfo(sourcename) | ||
| 186 | fname = info.docname | ||
| 187 | pcache = self.cached_pdfs | ||
| 188 | doc = pcache.get(fname) | ||
| 189 | if doc is None: | ||
| 190 | doc = pcache[fname] = PdfReader(fname, decompress=self.decompress) | ||
| 191 | return docxobj(info, doc, allow_compressed=not self.decompress) | ||
