Cleanup of pdfrw

author: jvoisin 2011-07-30 19:14:50 +0200
committer: jvoisin 2011-07-30 19:14:50 +0200
commit: 158fbf02f5f349d2f9a7b1976306804224ad92da (patch)
tree: 9e1de3c98d4d7dfcb1fdbd3ebb875f3f3cc203d5 /lib/pdfrw/buildxobj.py
parent: 8f889fead81b2046d289402b831e18f8ddb00276 (diff)
1 files changed, 0 insertions, 191 deletions
diff --git a/lib/pdfrw/buildxobj.py b/lib/pdfrw/buildxobj.py
deleted file mode 100644
index 203dd8c..0000000
--- a/lib/pdfrw/buildxobj.py
+++ /dev/null
@@ -1,191 +0,0 @@
-# A part of pdfrw (pdfrw.googlecode.com)
-# Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas
-# MIT license -- See LICENSE.txt for details
-'''
-This module contains code to build PDF "Form XObjects".
-A Form XObject allows a fragment from one PDF file to be cleanly
-included in another PDF file.
-Reference for syntax: "Parameters for opening PDF files" from SDK 8.1
-        http://www.adobe.com/devnet/acrobat/pdfs/pdf_open_parameters.pdf
-        supported 'page=xxx', 'viewrect=<left>,<top>,<width>,<height>'
-        Units are in points
-Reference for content:   Adobe PDF reference, sixth edition, version 1.7
-        http://www.adobe.com/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
-        Form xobjects discussed chapter 4.9, page 355
-'''
-from pdfobjects import PdfDict, PdfArray, PdfName
-from pdfreader import PdfReader
-class ViewInfo(object):
-    ''' Instantiate ViewInfo with a uri, and it will parse out
-        the filename, page, and viewrect into object attributes.
-    '''
-    doc = None
-    docname = None
-    page = None
-    viewrect = None
-    def __init__(self, pageinfo='', **kw):
-        pageinfo=pageinfo.split('#',1)
-        if len(pageinfo) == 2:
-            pageinfo[1:] = pageinfo[1].replace('&', '#').split('#')
-        for key in 'page viewrect'.split():
-            if pageinfo[0].startswith(key+'='):
-                break
-        else:
-            self.docname = pageinfo.pop(0)
-        for item in pageinfo:
-            key, value = item.split('=')
-            key = key.strip()
-            value = value.replace(',', ' ').split()
-            if key == 'page':
-                assert len(value) == 1
-                setattr(self, key, int(value[0]))
-            elif key == 'viewrect':
-                assert len(value) == 4
-                setattr(self, key, [float(x) for x in value])
-            else:
-                log.error('Unknown option: %s', key)
-        for key, value in kw.iteritems():
-            assert hasattr(self, key), key
-            setattr(self, key, value)
-def getrects(inheritable, pageinfo):
-    ''' Given the inheritable attributes of a page and
-        the desired pageinfo rectangle, return the page's
-        media box and the calculated boundary (clip) box.
-    '''
-    mbox = tuple([float(x) for x in inheritable.MediaBox])
-    vrect = pageinfo.viewrect
-    if vrect is None:
-        cbox = tuple([float(x) for x in (inheritable.CropBox or mbox)])
-    else:
-        mleft, mbot, mright, mtop = mbox
-        x, y, w, h = vrect
-        cleft = mleft + x
-        ctop = mtop - y
-        cright = cleft + w
-        cbot = ctop - h
-        cbox = max(mleft, cleft), max(mbot, cbot), min(mright, cright), min(mtop, ctop)
-    return mbox, cbox
-def _cache_xobj(contents, resources, mbox, bbox):
-    ''' Return a cached Form XObject, or create a new one and cache it.
-    '''
-    cachedict = contents.xobj_cachedict
-    if cachedict is None:
-        cachedict = contents.private.xobj_cachedict = {}
-    result = cachedict.get(bbox)
-    if result is None:
-        func = (_get_fullpage, _get_subpage)[mbox != bbox]
-        result = PdfDict(
-            func(contents, resources, mbox, bbox),
-            Type = PdfName.XObject,
-            Subtype = PdfName.Form,
-            FormType = 1,
-            BBox = PdfArray(bbox),
-        )
-        cachedict[bbox] = result
-    return result
-def _get_fullpage(contents, resources, mbox, bbox):
-    ''' fullpage is easy.  Just copy the contents,
-        set up the resources, and let _cache_xobj handle the
-        rest.
-    '''
-    return PdfDict(contents, Resources=resources)
-def _get_subpage(contents, resources, mbox, bbox):
-    ''' subpages *could* be as easy as full pages, but we
-        choose to complicate life by creating a Form XObject
-        for the page, and then one that references it for
-        the subpage, on the off-chance that we want multiple
-        items from the page.
-    '''
-    return PdfDict(
-        stream = '/FullPage Do\n',
-        Resources = PdfDict(
-            XObject = PdfDict(
-                FullPage = _cache_xobj(contents, resources, mbox, mbox)
-            )
-        )
-    )
-def pagexobj(page, viewinfo=ViewInfo(), allow_compressed=True):
-    ''' pagexobj creates and returns a Form XObject for
-        a given view within a page (Defaults to entire page.)
-    '''
-    inheritable = page.inheritable
-    resources = inheritable.Resources
-    mbox, bbox = getrects(inheritable, viewinfo)
-    contents = page.Contents
-    # Make sure the only attribute is length
-    # All the filters must have been executed
-    assert int(contents.Length) == len(contents.stream)
-    if not allow_compressed:
-        assert len([x for x in contents.iteritems()]) == 1
-    return _cache_xobj(contents, resources, mbox, bbox)
-def docxobj(pageinfo, doc=None, allow_compressed=True):
-    ''' docxobj creates and returns an actual Form XObject.
-        Can work standalone, or in conjunction with
-        the CacheXObj class (below).
-    '''
-    if not isinstance(pageinfo, ViewInfo):
-        pageinfo = ViewInfo(pageinfo)
-    # If we're explicitly passed a document,
-    # make sure we don't have one implicitly as well.
-    # If no implicit or explicit doc, then read one in
-    # from the filename.
-    if doc is not None:
-        assert pageinfo.doc is None
-        pageinfo.doc = doc
-    elif pageinfo.doc is not None:
-        doc = pageinfo.doc
-    else:
-        doc = pageinfo.doc = PdfReader(pageinfo.docname, decompress = not allow_compressed)
-    assert isinstance(doc, PdfReader)
-    sourcepage = doc.pages[(pageinfo.page or 1) - 1]
-    return pagexobj(sourcepage, pageinfo, allow_compressed)
-class CacheXObj(object):
-    ''' Use to keep from reparsing files over and over,
-        and to keep from making the output too much
-        bigger than it ought to be by replicating
-        unnecessary object copies.
-    '''
-    def __init__(self, decompress=False):
-        ''' Set decompress true if you need
-            the Form XObjects to be decompressed.
-            Will decompress what it can and scream
-            about the rest.
-        '''
-        self.cached_pdfs = {}
-        self.decompress = decompress
-    def load(self, sourcename):
-        ''' Load a Form XObject from a uri
-        '''
-        info = ViewInfo(sourcename)
-        fname = info.docname
-        pcache = self.cached_pdfs
-        doc = pcache.get(fname)
-        if doc is None:
-            doc = pcache[fname] = PdfReader(fname, decompress=self.decompress)
-        return docxobj(info, doc, allow_compressed=not self.decompress)
author	jvoisin	2011-07-30 19:14:50 +0200
committer	jvoisin	2011-07-30 19:14:50 +0200
commit	158fbf02f5f349d2f9a7b1976306804224ad92da (patch)
tree	9e1de3c98d4d7dfcb1fdbd3ebb875f3f3cc203d5 /lib/pdfrw/buildxobj.py
parent	8f889fead81b2046d289402b831e18f8ddb00276 (diff)

diff --git a/lib/pdfrw/buildxobj.py b/lib/pdfrw/buildxobj.py deleted file mode 100644 index 203dd8c..0000000 --- a/lib/pdfrw/buildxobj.py +++ /dev/null
@@ -1,191 +0,0 @@
1	# A part of pdfrw (pdfrw.googlecode.com)
2	# Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas
3	# MIT license -- See LICENSE.txt for details
4
5	'''
6
7	This module contains code to build PDF "Form XObjects".
8
9	A Form XObject allows a fragment from one PDF file to be cleanly
10	included in another PDF file.
11
12	Reference for syntax: "Parameters for opening PDF files" from SDK 8.1
13
14	http://www.adobe.com/devnet/acrobat/pdfs/pdf_open_parameters.pdf
15
16	supported 'page=xxx', 'viewrect=<left>,<top>,<width>,<height>'
17
18	Units are in points
19
20	Reference for content: Adobe PDF reference, sixth edition, version 1.7
21
22	http://www.adobe.com/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
23
24	Form xobjects discussed chapter 4.9, page 355
25	'''
26
27	from pdfobjects import PdfDict, PdfArray, PdfName
28	from pdfreader import PdfReader
29
30	class ViewInfo(object):
31	''' Instantiate ViewInfo with a uri, and it will parse out
32	the filename, page, and viewrect into object attributes.
33	'''
34	doc = None
35	docname = None
36	page = None
37	viewrect = None
38
39	def __init__(self, pageinfo='', **kw):
40	pageinfo=pageinfo.split('#',1)
41	if len(pageinfo) == 2:
42	pageinfo[1:] = pageinfo[1].replace('&', '#').split('#')
43	for key in 'page viewrect'.split():
44	if pageinfo[0].startswith(key+'='):
45	break
46	else:
47	self.docname = pageinfo.pop(0)
48	for item in pageinfo:
49	key, value = item.split('=')
50	key = key.strip()
51	value = value.replace(',', ' ').split()
52	if key == 'page':
53	assert len(value) == 1
54	setattr(self, key, int(value[0]))
55	elif key == 'viewrect':
56	assert len(value) == 4
57	setattr(self, key, [float(x) for x in value])
58	else:
59	log.error('Unknown option: %s', key)
60	for key, value in kw.iteritems():
61	assert hasattr(self, key), key
62	setattr(self, key, value)
63
64	def getrects(inheritable, pageinfo):
65	''' Given the inheritable attributes of a page and
66	the desired pageinfo rectangle, return the page's
67	media box and the calculated boundary (clip) box.
68	'''
69	mbox = tuple([float(x) for x in inheritable.MediaBox])
70	vrect = pageinfo.viewrect
71	if vrect is None:
72	cbox = tuple([float(x) for x in (inheritable.CropBox or mbox)])
73	else:
74	mleft, mbot, mright, mtop = mbox
75	x, y, w, h = vrect
76	cleft = mleft + x
77	ctop = mtop - y
78	cright = cleft + w
79	cbot = ctop - h
80	cbox = max(mleft, cleft), max(mbot, cbot), min(mright, cright), min(mtop, ctop)
81	return mbox, cbox
82
83	def _cache_xobj(contents, resources, mbox, bbox):
84	''' Return a cached Form XObject, or create a new one and cache it.
85	'''
86	cachedict = contents.xobj_cachedict
87	if cachedict is None:
88	cachedict = contents.private.xobj_cachedict = {}
89	result = cachedict.get(bbox)
90	if result is None:
91	func = (_get_fullpage, _get_subpage)[mbox != bbox]
92	result = PdfDict(
93	func(contents, resources, mbox, bbox),
94	Type = PdfName.XObject,
95	Subtype = PdfName.Form,
96	FormType = 1,
97	BBox = PdfArray(bbox),
98	)
99	cachedict[bbox] = result
100	return result
101
102	def _get_fullpage(contents, resources, mbox, bbox):
103	''' fullpage is easy. Just copy the contents,
104	set up the resources, and let _cache_xobj handle the
105	rest.
106	'''
107	return PdfDict(contents, Resources=resources)
108
109	def _get_subpage(contents, resources, mbox, bbox):
110	''' subpages could be as easy as full pages, but we
111	choose to complicate life by creating a Form XObject
112	for the page, and then one that references it for
113	the subpage, on the off-chance that we want multiple
114	items from the page.
115	'''
116	return PdfDict(
117	stream = '/FullPage Do\n',
118	Resources = PdfDict(
119	XObject = PdfDict(
120	FullPage = _cache_xobj(contents, resources, mbox, mbox)
121	)
122	)
123	)
124
125	def pagexobj(page, viewinfo=ViewInfo(), allow_compressed=True):
126	''' pagexobj creates and returns a Form XObject for
127	a given view within a page (Defaults to entire page.)
128	'''
129	inheritable = page.inheritable
130	resources = inheritable.Resources
131	mbox, bbox = getrects(inheritable, viewinfo)
132	contents = page.Contents
133	# Make sure the only attribute is length
134	# All the filters must have been executed
135	assert int(contents.Length) == len(contents.stream)
136	if not allow_compressed:
137	assert len([x for x in contents.iteritems()]) == 1
138
139	return _cache_xobj(contents, resources, mbox, bbox)
140
141
142	def docxobj(pageinfo, doc=None, allow_compressed=True):
143	''' docxobj creates and returns an actual Form XObject.
144	Can work standalone, or in conjunction with
145	the CacheXObj class (below).
146	'''
147	if not isinstance(pageinfo, ViewInfo):
148	pageinfo = ViewInfo(pageinfo)
149
150	# If we're explicitly passed a document,
151	# make sure we don't have one implicitly as well.
152	# If no implicit or explicit doc, then read one in
153	# from the filename.
154	if doc is not None:
155	assert pageinfo.doc is None
156	pageinfo.doc = doc
157	elif pageinfo.doc is not None:
158	doc = pageinfo.doc
159	else:
160	doc = pageinfo.doc = PdfReader(pageinfo.docname, decompress = not allow_compressed)
161	assert isinstance(doc, PdfReader)
162
163	sourcepage = doc.pages[(pageinfo.page or 1) - 1]
164	return pagexobj(sourcepage, pageinfo, allow_compressed)
165
166
167	class CacheXObj(object):
168	''' Use to keep from reparsing files over and over,
169	and to keep from making the output too much
170	bigger than it ought to be by replicating
171	unnecessary object copies.
172	'''
173	def __init__(self, decompress=False):
174	''' Set decompress true if you need
175	the Form XObjects to be decompressed.
176	Will decompress what it can and scream
177	about the rest.
178	'''
179	self.cached_pdfs = {}
180	self.decompress = decompress
181
182	def load(self, sourcename):
183	''' Load a Form XObject from a uri
184	'''
185	info = ViewInfo(sourcename)
186	fname = info.docname
187	pcache = self.cached_pdfs
188	doc = pcache.get(fname)
189	if doc is None:
190	doc = pcache[fname] = PdfReader(fname, decompress=self.decompress)
191	return docxobj(info, doc, allow_compressed=not self.decompress)