summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorjvoisin2011-07-30 19:14:50 +0200
committerjvoisin2011-07-30 19:14:50 +0200
commit158fbf02f5f349d2f9a7b1976306804224ad92da (patch)
tree9e1de3c98d4d7dfcb1fdbd3ebb875f3f3cc203d5 /lib
parent8f889fead81b2046d289402b831e18f8ddb00276 (diff)
Cleanup of pdfrw
Diffstat (limited to 'lib')
-rw-r--r--lib/pdfrw/__init__.py1
-rw-r--r--lib/pdfrw/buildxobj.py191
-rw-r--r--[-rwxr-xr-x]lib/pdfrw/pdfwriter.py0
-rw-r--r--lib/pdfrw/toreportlab.py139
-rw-r--r--lib/sounds.py1
5 files changed, 1 insertions, 331 deletions
diff --git a/lib/pdfrw/__init__.py b/lib/pdfrw/__init__.py
index 964972f..26e8c73 100644
--- a/lib/pdfrw/__init__.py
+++ b/lib/pdfrw/__init__.py
@@ -11,3 +11,4 @@ from pdftokens import PdfTokens
11 11
12PdfFileReader = PdfReader 12PdfFileReader = PdfReader
13PdfFileWriter = PdfWriter 13PdfFileWriter = PdfWriter
14
diff --git a/lib/pdfrw/buildxobj.py b/lib/pdfrw/buildxobj.py
deleted file mode 100644
index 203dd8c..0000000
--- a/lib/pdfrw/buildxobj.py
+++ /dev/null
@@ -1,191 +0,0 @@
1# A part of pdfrw (pdfrw.googlecode.com)
2# Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas
3# MIT license -- See LICENSE.txt for details
4
5'''
6
7This module contains code to build PDF "Form XObjects".
8
9A Form XObject allows a fragment from one PDF file to be cleanly
10included in another PDF file.
11
12Reference for syntax: "Parameters for opening PDF files" from SDK 8.1
13
14 http://www.adobe.com/devnet/acrobat/pdfs/pdf_open_parameters.pdf
15
16 supported 'page=xxx', 'viewrect=<left>,<top>,<width>,<height>'
17
18 Units are in points
19
20Reference for content: Adobe PDF reference, sixth edition, version 1.7
21
22 http://www.adobe.com/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
23
24 Form xobjects discussed chapter 4.9, page 355
25'''
26
27from pdfobjects import PdfDict, PdfArray, PdfName
28from pdfreader import PdfReader
29
30class ViewInfo(object):
31 ''' Instantiate ViewInfo with a uri, and it will parse out
32 the filename, page, and viewrect into object attributes.
33 '''
34 doc = None
35 docname = None
36 page = None
37 viewrect = None
38
39 def __init__(self, pageinfo='', **kw):
40 pageinfo=pageinfo.split('#',1)
41 if len(pageinfo) == 2:
42 pageinfo[1:] = pageinfo[1].replace('&', '#').split('#')
43 for key in 'page viewrect'.split():
44 if pageinfo[0].startswith(key+'='):
45 break
46 else:
47 self.docname = pageinfo.pop(0)
48 for item in pageinfo:
49 key, value = item.split('=')
50 key = key.strip()
51 value = value.replace(',', ' ').split()
52 if key == 'page':
53 assert len(value) == 1
54 setattr(self, key, int(value[0]))
55 elif key == 'viewrect':
56 assert len(value) == 4
57 setattr(self, key, [float(x) for x in value])
58 else:
59 log.error('Unknown option: %s', key)
60 for key, value in kw.iteritems():
61 assert hasattr(self, key), key
62 setattr(self, key, value)
63
64def getrects(inheritable, pageinfo):
65 ''' Given the inheritable attributes of a page and
66 the desired pageinfo rectangle, return the page's
67 media box and the calculated boundary (clip) box.
68 '''
69 mbox = tuple([float(x) for x in inheritable.MediaBox])
70 vrect = pageinfo.viewrect
71 if vrect is None:
72 cbox = tuple([float(x) for x in (inheritable.CropBox or mbox)])
73 else:
74 mleft, mbot, mright, mtop = mbox
75 x, y, w, h = vrect
76 cleft = mleft + x
77 ctop = mtop - y
78 cright = cleft + w
79 cbot = ctop - h
80 cbox = max(mleft, cleft), max(mbot, cbot), min(mright, cright), min(mtop, ctop)
81 return mbox, cbox
82
83def _cache_xobj(contents, resources, mbox, bbox):
84 ''' Return a cached Form XObject, or create a new one and cache it.
85 '''
86 cachedict = contents.xobj_cachedict
87 if cachedict is None:
88 cachedict = contents.private.xobj_cachedict = {}
89 result = cachedict.get(bbox)
90 if result is None:
91 func = (_get_fullpage, _get_subpage)[mbox != bbox]
92 result = PdfDict(
93 func(contents, resources, mbox, bbox),
94 Type = PdfName.XObject,
95 Subtype = PdfName.Form,
96 FormType = 1,
97 BBox = PdfArray(bbox),
98 )
99 cachedict[bbox] = result
100 return result
101
102def _get_fullpage(contents, resources, mbox, bbox):
103 ''' fullpage is easy. Just copy the contents,
104 set up the resources, and let _cache_xobj handle the
105 rest.
106 '''
107 return PdfDict(contents, Resources=resources)
108
109def _get_subpage(contents, resources, mbox, bbox):
110 ''' subpages *could* be as easy as full pages, but we
111 choose to complicate life by creating a Form XObject
112 for the page, and then one that references it for
113 the subpage, on the off-chance that we want multiple
114 items from the page.
115 '''
116 return PdfDict(
117 stream = '/FullPage Do\n',
118 Resources = PdfDict(
119 XObject = PdfDict(
120 FullPage = _cache_xobj(contents, resources, mbox, mbox)
121 )
122 )
123 )
124
125def pagexobj(page, viewinfo=ViewInfo(), allow_compressed=True):
126 ''' pagexobj creates and returns a Form XObject for
127 a given view within a page (Defaults to entire page.)
128 '''
129 inheritable = page.inheritable
130 resources = inheritable.Resources
131 mbox, bbox = getrects(inheritable, viewinfo)
132 contents = page.Contents
133 # Make sure the only attribute is length
134 # All the filters must have been executed
135 assert int(contents.Length) == len(contents.stream)
136 if not allow_compressed:
137 assert len([x for x in contents.iteritems()]) == 1
138
139 return _cache_xobj(contents, resources, mbox, bbox)
140
141
142def docxobj(pageinfo, doc=None, allow_compressed=True):
143 ''' docxobj creates and returns an actual Form XObject.
144 Can work standalone, or in conjunction with
145 the CacheXObj class (below).
146 '''
147 if not isinstance(pageinfo, ViewInfo):
148 pageinfo = ViewInfo(pageinfo)
149
150 # If we're explicitly passed a document,
151 # make sure we don't have one implicitly as well.
152 # If no implicit or explicit doc, then read one in
153 # from the filename.
154 if doc is not None:
155 assert pageinfo.doc is None
156 pageinfo.doc = doc
157 elif pageinfo.doc is not None:
158 doc = pageinfo.doc
159 else:
160 doc = pageinfo.doc = PdfReader(pageinfo.docname, decompress = not allow_compressed)
161 assert isinstance(doc, PdfReader)
162
163 sourcepage = doc.pages[(pageinfo.page or 1) - 1]
164 return pagexobj(sourcepage, pageinfo, allow_compressed)
165
166
167class CacheXObj(object):
168 ''' Use to keep from reparsing files over and over,
169 and to keep from making the output too much
170 bigger than it ought to be by replicating
171 unnecessary object copies.
172 '''
173 def __init__(self, decompress=False):
174 ''' Set decompress true if you need
175 the Form XObjects to be decompressed.
176 Will decompress what it can and scream
177 about the rest.
178 '''
179 self.cached_pdfs = {}
180 self.decompress = decompress
181
182 def load(self, sourcename):
183 ''' Load a Form XObject from a uri
184 '''
185 info = ViewInfo(sourcename)
186 fname = info.docname
187 pcache = self.cached_pdfs
188 doc = pcache.get(fname)
189 if doc is None:
190 doc = pcache[fname] = PdfReader(fname, decompress=self.decompress)
191 return docxobj(info, doc, allow_compressed=not self.decompress)
diff --git a/lib/pdfrw/pdfwriter.py b/lib/pdfrw/pdfwriter.py
index c193843..c193843 100755..100644
--- a/lib/pdfrw/pdfwriter.py
+++ b/lib/pdfrw/pdfwriter.py
diff --git a/lib/pdfrw/toreportlab.py b/lib/pdfrw/toreportlab.py
deleted file mode 100644
index 00ad324..0000000
--- a/lib/pdfrw/toreportlab.py
+++ /dev/null
@@ -1,139 +0,0 @@
1# A part of pdfrw (pdfrw.googlecode.com)
2# Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas
3# MIT license -- See LICENSE.txt for details
4
5'''
6Converts pdfrw objects into reportlab objects.
7
8Designed for and tested with rl 2.3.
9
10Knows too much about reportlab internals.
11What can you do?
12
13The interface to this function is through the makerl() function.
14
15Parameters:
16 canv - a reportlab "canvas" (also accepts a "document")
17 pdfobj - a pdfrw PDF object
18
19Returns:
20 A corresponding reportlab object, or if the
21 object is a PDF Form XObject, the name to
22 use with reportlab for the object.
23
24 Will recursively convert all necessary objects.
25 Be careful when converting a page -- if /Parent is set,
26 will recursively convert all pages!
27
28Notes:
29 1) Original objects are annotated with a
30 derived_rl_obj attribute which points to the
31 reportlab object. This keeps multiple reportlab
32 objects from being generated for the same pdfobj
33 via repeated calls to makerl. This is great for
34 not putting too many objects into the
35 new PDF, but not so good if you are modifying
36 objects for different pages. Then you
37 need to do your own deep copying (of circular
38 structures). You're on your own.
39
40 2) ReportLab seems weird about FormXObjects.
41 They pass around a partial name instead of the
42 object or a reference to it. So we have to
43 reach into reportlab and get a number for
44 a unique name. I guess this is to make it
45 where you can combine page streams with
46 impunity, but that's just a guess.
47
48 3) Updated 1/23/2010 to handle multipass documents
49 (e.g. with a table of contents). These have
50 a different doc object on every pass.
51
52'''
53
54from reportlab.pdfbase import pdfdoc as rldocmodule
55from pdfobjects import PdfDict, PdfArray, PdfName
56
57RLStream = rldocmodule.PDFStream
58RLDict = rldocmodule.PDFDictionary
59RLArray = rldocmodule.PDFArray
60
61
62def _makedict(rldoc, pdfobj):
63 rlobj = rldict = RLDict()
64 if pdfobj.indirect:
65 rlobj.__RefOnly__ = 1
66 rlobj = rldoc.Reference(rlobj)
67 pdfobj.derived_rl_obj[rldoc] = rlobj, None
68
69 for key, value in pdfobj.iteritems():
70 rldict[key[1:]] = makerl_recurse(rldoc, value)
71
72 return rlobj
73
74def _makestream(rldoc, pdfobj, xobjtype=PdfName.XObject):
75 rldict = RLDict()
76 rlobj = RLStream(rldict, pdfobj.stream)
77
78 if pdfobj.Type == xobjtype:
79 shortname = 'pdfrw_%s' % (rldoc.objectcounter+1)
80 fullname = rldoc.getXObjectName(shortname)
81 else:
82 shortname = fullname = None
83 result = rldoc.Reference(rlobj, fullname)
84 pdfobj.derived_rl_obj[rldoc] = result, shortname
85
86 for key, value in pdfobj.iteritems():
87 rldict[key[1:]] = makerl_recurse(rldoc, value)
88
89 return result
90
91def _makearray(rldoc, pdfobj):
92 rlobj = rlarray = RLArray([])
93 if pdfobj.indirect:
94 rlobj.__RefOnly__ = 1
95 rlobj = rldoc.Reference(rlobj)
96 pdfobj.derived_rl_obj[rldoc] = rlobj, None
97
98 mylist = rlarray.sequence
99 for value in pdfobj:
100 mylist.append(makerl_recurse(rldoc, value))
101
102 return rlobj
103
104def _makestr(rldoc, pdfobj):
105 assert isinstance(pdfobj, (float, int, str)), repr(pdfobj)
106 return pdfobj
107
108def makerl_recurse(rldoc, pdfobj):
109 docdict = getattr(pdfobj, 'derived_rl_obj', None)
110 if docdict is not None:
111 value = docdict.get(rldoc)
112 if value is not None:
113 return value[0]
114 if isinstance(pdfobj, PdfDict):
115 if pdfobj.stream is not None:
116 func = _makestream
117 else:
118 func = _makedict
119 if docdict is None:
120 pdfobj.private.derived_rl_obj = {}
121 elif isinstance(pdfobj, PdfArray):
122 func = _makearray
123 if docdict is None:
124 pdfobj.derived_rl_obj = {}
125 else:
126 func = _makestr
127 return func(rldoc, pdfobj)
128
129def makerl(canv, pdfobj):
130 try:
131 rldoc = canv._doc
132 except AttributeError:
133 rldoc = canv
134 rlobj = makerl_recurse(rldoc, pdfobj)
135 try:
136 name = pdfobj.derived_rl_obj[rldoc][1]
137 except AttributeError:
138 name = None
139 return name or rlobj
diff --git a/lib/sounds.py b/lib/sounds.py
deleted file mode 100644
index a4bf5b6..0000000
--- a/lib/sounds.py
+++ /dev/null
@@ -1 +0,0 @@
1import parser