summaryrefslogtreecommitdiff
path: root/lib/pdfrw/buildxobj.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pdfrw/buildxobj.py')
-rw-r--r--lib/pdfrw/buildxobj.py191
1 files changed, 0 insertions, 191 deletions
diff --git a/lib/pdfrw/buildxobj.py b/lib/pdfrw/buildxobj.py
deleted file mode 100644
index 203dd8c..0000000
--- a/lib/pdfrw/buildxobj.py
+++ /dev/null
@@ -1,191 +0,0 @@
1# A part of pdfrw (pdfrw.googlecode.com)
2# Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas
3# MIT license -- See LICENSE.txt for details
4
5'''
6
7This module contains code to build PDF "Form XObjects".
8
9A Form XObject allows a fragment from one PDF file to be cleanly
10included in another PDF file.
11
12Reference for syntax: "Parameters for opening PDF files" from SDK 8.1
13
14 http://www.adobe.com/devnet/acrobat/pdfs/pdf_open_parameters.pdf
15
16 supported 'page=xxx', 'viewrect=<left>,<top>,<width>,<height>'
17
18 Units are in points
19
20Reference for content: Adobe PDF reference, sixth edition, version 1.7
21
22 http://www.adobe.com/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
23
24 Form xobjects discussed chapter 4.9, page 355
25'''
26
27from pdfobjects import PdfDict, PdfArray, PdfName
28from pdfreader import PdfReader
29
30class ViewInfo(object):
31 ''' Instantiate ViewInfo with a uri, and it will parse out
32 the filename, page, and viewrect into object attributes.
33 '''
34 doc = None
35 docname = None
36 page = None
37 viewrect = None
38
39 def __init__(self, pageinfo='', **kw):
40 pageinfo=pageinfo.split('#',1)
41 if len(pageinfo) == 2:
42 pageinfo[1:] = pageinfo[1].replace('&', '#').split('#')
43 for key in 'page viewrect'.split():
44 if pageinfo[0].startswith(key+'='):
45 break
46 else:
47 self.docname = pageinfo.pop(0)
48 for item in pageinfo:
49 key, value = item.split('=')
50 key = key.strip()
51 value = value.replace(',', ' ').split()
52 if key == 'page':
53 assert len(value) == 1
54 setattr(self, key, int(value[0]))
55 elif key == 'viewrect':
56 assert len(value) == 4
57 setattr(self, key, [float(x) for x in value])
58 else:
59 log.error('Unknown option: %s', key)
60 for key, value in kw.iteritems():
61 assert hasattr(self, key), key
62 setattr(self, key, value)
63
64def getrects(inheritable, pageinfo):
65 ''' Given the inheritable attributes of a page and
66 the desired pageinfo rectangle, return the page's
67 media box and the calculated boundary (clip) box.
68 '''
69 mbox = tuple([float(x) for x in inheritable.MediaBox])
70 vrect = pageinfo.viewrect
71 if vrect is None:
72 cbox = tuple([float(x) for x in (inheritable.CropBox or mbox)])
73 else:
74 mleft, mbot, mright, mtop = mbox
75 x, y, w, h = vrect
76 cleft = mleft + x
77 ctop = mtop - y
78 cright = cleft + w
79 cbot = ctop - h
80 cbox = max(mleft, cleft), max(mbot, cbot), min(mright, cright), min(mtop, ctop)
81 return mbox, cbox
82
83def _cache_xobj(contents, resources, mbox, bbox):
84 ''' Return a cached Form XObject, or create a new one and cache it.
85 '''
86 cachedict = contents.xobj_cachedict
87 if cachedict is None:
88 cachedict = contents.private.xobj_cachedict = {}
89 result = cachedict.get(bbox)
90 if result is None:
91 func = (_get_fullpage, _get_subpage)[mbox != bbox]
92 result = PdfDict(
93 func(contents, resources, mbox, bbox),
94 Type = PdfName.XObject,
95 Subtype = PdfName.Form,
96 FormType = 1,
97 BBox = PdfArray(bbox),
98 )
99 cachedict[bbox] = result
100 return result
101
102def _get_fullpage(contents, resources, mbox, bbox):
103 ''' fullpage is easy. Just copy the contents,
104 set up the resources, and let _cache_xobj handle the
105 rest.
106 '''
107 return PdfDict(contents, Resources=resources)
108
109def _get_subpage(contents, resources, mbox, bbox):
110 ''' subpages *could* be as easy as full pages, but we
111 choose to complicate life by creating a Form XObject
112 for the page, and then one that references it for
113 the subpage, on the off-chance that we want multiple
114 items from the page.
115 '''
116 return PdfDict(
117 stream = '/FullPage Do\n',
118 Resources = PdfDict(
119 XObject = PdfDict(
120 FullPage = _cache_xobj(contents, resources, mbox, mbox)
121 )
122 )
123 )
124
125def pagexobj(page, viewinfo=ViewInfo(), allow_compressed=True):
126 ''' pagexobj creates and returns a Form XObject for
127 a given view within a page (Defaults to entire page.)
128 '''
129 inheritable = page.inheritable
130 resources = inheritable.Resources
131 mbox, bbox = getrects(inheritable, viewinfo)
132 contents = page.Contents
133 # Make sure the only attribute is length
134 # All the filters must have been executed
135 assert int(contents.Length) == len(contents.stream)
136 if not allow_compressed:
137 assert len([x for x in contents.iteritems()]) == 1
138
139 return _cache_xobj(contents, resources, mbox, bbox)
140
141
142def docxobj(pageinfo, doc=None, allow_compressed=True):
143 ''' docxobj creates and returns an actual Form XObject.
144 Can work standalone, or in conjunction with
145 the CacheXObj class (below).
146 '''
147 if not isinstance(pageinfo, ViewInfo):
148 pageinfo = ViewInfo(pageinfo)
149
150 # If we're explicitly passed a document,
151 # make sure we don't have one implicitly as well.
152 # If no implicit or explicit doc, then read one in
153 # from the filename.
154 if doc is not None:
155 assert pageinfo.doc is None
156 pageinfo.doc = doc
157 elif pageinfo.doc is not None:
158 doc = pageinfo.doc
159 else:
160 doc = pageinfo.doc = PdfReader(pageinfo.docname, decompress = not allow_compressed)
161 assert isinstance(doc, PdfReader)
162
163 sourcepage = doc.pages[(pageinfo.page or 1) - 1]
164 return pagexobj(sourcepage, pageinfo, allow_compressed)
165
166
167class CacheXObj(object):
168 ''' Use to keep from reparsing files over and over,
169 and to keep from making the output too much
170 bigger than it ought to be by replicating
171 unnecessary object copies.
172 '''
173 def __init__(self, decompress=False):
174 ''' Set decompress true if you need
175 the Form XObjects to be decompressed.
176 Will decompress what it can and scream
177 about the rest.
178 '''
179 self.cached_pdfs = {}
180 self.decompress = decompress
181
182 def load(self, sourcename):
183 ''' Load a Form XObject from a uri
184 '''
185 info = ViewInfo(sourcename)
186 fname = info.docname
187 pcache = self.cached_pdfs
188 doc = pcache.get(fname)
189 if doc is None:
190 doc = pcache[fname] = PdfReader(fname, decompress=self.decompress)
191 return docxobj(info, doc, allow_compressed=not self.decompress)