summaryrefslogtreecommitdiff
path: root/lib/pdfrw/pdfwriter.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pdfrw/pdfwriter.py')
-rw-r--r--lib/pdfrw/pdfwriter.py234
1 files changed, 0 insertions, 234 deletions
diff --git a/lib/pdfrw/pdfwriter.py b/lib/pdfrw/pdfwriter.py
deleted file mode 100644
index c193843..0000000
--- a/lib/pdfrw/pdfwriter.py
+++ /dev/null
@@ -1,234 +0,0 @@
1#!/usr/bin/env python
2
3# A part of pdfrw (pdfrw.googlecode.com)
4# Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas
5# MIT license -- See LICENSE.txt for details
6
7'''
8The PdfWriter class writes an entire PDF file out to disk.
9
10The writing process is not at all optimized or organized.
11
12An instance of the PdfWriter class has two methods:
13 addpage(page)
14and
15 write(fname)
16
17addpage() assumes that the pages are part of a valid
18tree/forest of PDF objects.
19'''
20
21try:
22 set
23except NameError:
24 from sets import Set as set
25
26from pdfobjects import PdfName, PdfArray, PdfDict, IndirectPdfDict, PdfObject, PdfString
27from pdfcompress import compress
28
29debug = False
30
31class FormatObjects(object):
32 ''' FormatObjects performs the actual formatting and disk write.
33 '''
34
35 def add(self, obj, visited):
36 ''' Add an object to our list, if it's an indirect
37 object. Just format it if not.
38 '''
39 # Can't hash dicts, so just hash the object ID
40 objid = id(obj)
41
42 # Automatically set stream objects to indirect
43 if isinstance(obj, PdfDict):
44 indirect = obj.indirect or (obj.stream is not None)
45 else:
46 indirect = getattr(obj, 'indirect', False)
47
48 if not indirect:
49 assert objid not in visited, \
50 'Circular reference encountered in non-indirect object %s' % repr(obj)
51 visited.add(objid)
52 result = self.format_obj(obj, visited)
53 visited.remove(objid)
54 return result
55
56 objnum = self.indirect_dict.get(objid)
57
58 # If we haven't seen the object yet, we need to
59 # add it to the indirect object list.
60 if objnum is None:
61 objlist = self.objlist
62 objnum = len(objlist) + 1
63 if debug:
64 print ' Object', objnum, '\r',
65 objlist.append(None)
66 self.indirect_dict[objid] = objnum
67 objlist[objnum-1] = self.format_obj(obj)
68 return '%s 0 R' % objnum
69
70 def format_array(myarray, formatter):
71 # Format array data into semi-readable ASCII
72 if sum([len(x) for x in myarray]) <= 70:
73 return formatter % ' '.join(myarray)
74 bigarray = []
75 count = 1000000
76 for x in myarray:
77 lenx = len(x)
78 if lenx + count > 70:
79 subarray = []
80 bigarray.append(subarray)
81 count = 0
82 count += lenx + 1
83 subarray.append(x)
84 return formatter % '\n '.join([' '.join(x) for x in bigarray])
85 format_array = staticmethod(format_array)
86
87 def format_obj(self, obj, visited=None):
88 ''' format PDF object data into semi-readable ASCII.
89 May mutually recurse with add() -- add() will
90 return references for indirect objects, and add
91 the indirect object to the list.
92 '''
93 if visited is None:
94 visited = set()
95 if isinstance(obj, PdfArray):
96 myarray = [self.add(x, visited) for x in obj]
97 return self.format_array(myarray, '[%s]')
98 elif isinstance(obj, PdfDict):
99 if self.compress and obj.stream:
100 compress([obj])
101 myarray = []
102 # Jython 2.2.1 has a bug which segfaults when
103 # sorting subclassed strings, so we un-subclass them.
104 dictkeys = [str(x) for x in obj.iterkeys()]
105 dictkeys.sort()
106 for key in dictkeys:
107 myarray.append(key)
108 myarray.append(self.add(obj[key], visited))
109 result = self.format_array(myarray, '<<%s>>')
110 stream = obj.stream
111 if stream is not None:
112 result = '%s\nstream\n%s\nendstream' % (result, stream)
113 return result
114 elif isinstance(obj, basestring) and not hasattr(obj, 'indirect'):
115 return PdfString.encode(obj)
116 else:
117 return str(obj)
118
119 def dump(cls, f, trailer, version='1.3', compress=True):
120 self = cls()
121 self.compress = compress
122 self.indirect_dict = {}
123 self.objlist = []
124
125 # The first format of trailer gets all the information,
126 # but we throw away the actual trailer formatting.
127 self.format_obj(trailer)
128 # Now we know the size, so we update the trailer dict
129 # and get the formatted data.
130 trailer.Size = PdfObject(len(self.objlist) + 1)
131 trailer = self.format_obj(trailer)
132
133 # Now we have all the pieces to write out to the file.
134 # Keep careful track of the counts while we do it so
135 # we can correctly build the cross-reference.
136
137 header = '%%PDF-%s\n%%\xe2\xe3\xcf\xd3\n' % version
138 f.write(header)
139 offset = len(header)
140 offsets = [(0, 65535, 'f')]
141
142 for i, x in enumerate(self.objlist):
143 objstr = '%s 0 obj\n%s\nendobj\n' % (i + 1, x)
144 offsets.append((offset, 0, 'n'))
145 offset += len(objstr)
146 f.write(objstr)
147
148 f.write('xref\n0 %s\n' % len(offsets))
149 for x in offsets:
150 f.write('%010d %05d %s\r\n' % x)
151 f.write('trailer\n\n%s\nstartxref\n%s\n%%%%EOF\n' % (trailer, offset))
152 dump = classmethod(dump)
153
154class PdfWriter(object):
155
156 _trailer = None
157
158 def __init__(self, version='1.3', compress=True):
159 self.pagearray = PdfArray()
160 self.compress = compress
161 self.version = version
162
163 def addpage(self, page):
164 self._trailer = None
165 assert page.Type == PdfName.Page
166 inheritable = page.inheritable # searches for resources
167 self.pagearray.append(
168 IndirectPdfDict(
169 page,
170 Resources = inheritable.Resources,
171 MediaBox = inheritable.MediaBox,
172 CropBox = inheritable.CropBox,
173 Rotate = inheritable.Rotate,
174 )
175 )
176 return self
177
178 addPage = addpage # for compatibility with pyPdf
179
180 def addpages(self, pagelist):
181 for page in pagelist:
182 self.addpage(page)
183 return self
184
185 def _get_trailer(self):
186 trailer = self._trailer
187 if trailer is not None:
188 return trailer
189
190 # Create the basic object structure of the PDF file
191 trailer = PdfDict(
192 Root = IndirectPdfDict(
193 Type = PdfName.Catalog,
194 Pages = IndirectPdfDict(
195 Type = PdfName.Pages,
196 Count = PdfObject(len(self.pagearray)),
197 Kids = self.pagearray
198 )
199 )
200 )
201 # Make all the pages point back to the page dictionary
202 pagedict = trailer.Root.Pages
203 for page in pagedict.Kids:
204 page.Parent = pagedict
205 self._trailer = trailer
206 return trailer
207
208 def _set_trailer(self, trailer):
209 self._trailer = trailer
210
211 trailer = property(_get_trailer, _set_trailer)
212
213 def write(self, fname, trailer=None):
214 trailer = trailer or self.trailer
215
216 # Dump the data. We either have a filename or a preexisting
217 # file object.
218 preexisting = hasattr(fname, 'write')
219 f = preexisting and fname or open(fname, 'wb')
220 FormatObjects.dump(f, trailer, self.version, self.compress)
221 if not preexisting:
222 f.close()
223
224if __name__ == '__main__':
225 debug = True
226 import pdfreader
227 x = pdfreader.PdfReader('source.pdf')
228 y = PdfWriter()
229 for i, page in enumerate(x.pages):
230 print ' Adding page', i+1, '\r',
231 y.addpage(page)
232 print
233 y.write('result.pdf')
234 print