1 files changed, 0 insertions, 234 deletions
diff --git a/lib/pdfrw/pdfwriter.py b/lib/pdfrw/pdfwriter.py
deleted file mode 100644
index c193843..0000000
--- a/lib/pdfrw/pdfwriter.py
+++ /dev/null
@@ -1,234 +0,0 @@
-#!/usr/bin/env python
-# A part of pdfrw (pdfrw.googlecode.com)
-# Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas
-# MIT license -- See LICENSE.txt for details
-'''
-The PdfWriter class writes an entire PDF file out to disk.
-The writing process is not at all optimized or organized.
-An instance of the PdfWriter class has two methods:
-    addpage(page)
-and
-    write(fname)
-addpage() assumes that the pages are part of a valid
-tree/forest of PDF objects.
-'''
-try:
-    set
-except NameError:
-    from sets import Set as set
-from pdfobjects import PdfName, PdfArray, PdfDict, IndirectPdfDict, PdfObject, PdfString
-from pdfcompress import compress
-debug = False
-class FormatObjects(object):
-    ''' FormatObjects performs the actual formatting and disk write.
-    '''
-    def add(self, obj, visited):
-        ''' Add an object to our list, if it's an indirect
-            object.  Just format it if not.
-        '''
-        # Can't hash dicts, so just hash the object ID
-        objid = id(obj)
-        # Automatically set stream objects to indirect
-        if isinstance(obj, PdfDict):
-            indirect = obj.indirect or (obj.stream is not None)
-        else:
-            indirect = getattr(obj, 'indirect', False)
-        if not indirect:
-            assert objid not in visited, \
-                'Circular reference encountered in non-indirect object %s' % repr(obj)
-            visited.add(objid)
-            result = self.format_obj(obj, visited)
-            visited.remove(objid)
-            return result
-        objnum = self.indirect_dict.get(objid)
-        # If we haven't seen the object yet, we need to
-        # add it to the indirect object list.
-        if objnum is None:
-            objlist = self.objlist
-            objnum = len(objlist) + 1
-            if debug:
-                print '  Object', objnum, '\r',
-            objlist.append(None)
-            self.indirect_dict[objid] = objnum
-            objlist[objnum-1] = self.format_obj(obj)
-        return '%s 0 R' % objnum
-    def format_array(myarray, formatter):
-        # Format array data into semi-readable ASCII
-        if sum([len(x) for x in myarray]) <= 70:
-            return formatter % ' '.join(myarray)
-        bigarray = []
-        count = 1000000
-        for x in myarray:
-            lenx = len(x)
-            if lenx + count > 70:
-                subarray = []
-                bigarray.append(subarray)
-                count = 0
-            count += lenx + 1
-            subarray.append(x)
-        return formatter % '\n  '.join([' '.join(x) for x in bigarray])
-    format_array = staticmethod(format_array)
-    def format_obj(self, obj, visited=None):
-        ''' format PDF object data into semi-readable ASCII.
-            May mutually recurse with add() -- add() will
-            return references for indirect objects, and add
-            the indirect object to the list.
-        '''
-        if visited is None:
-            visited = set()
-        if isinstance(obj, PdfArray):
-            myarray = [self.add(x, visited) for x in obj]
-            return self.format_array(myarray, '[%s]')
-        elif isinstance(obj, PdfDict):
-            if self.compress and obj.stream:
-                compress([obj])
-            myarray = []
-            # Jython 2.2.1 has a bug which segfaults when
-            # sorting subclassed strings, so we un-subclass them.
-            dictkeys = [str(x) for x in obj.iterkeys()]
-            dictkeys.sort()
-            for key in dictkeys:
-                myarray.append(key)
-                myarray.append(self.add(obj[key], visited))
-            result = self.format_array(myarray, '<<%s>>')
-            stream = obj.stream
-            if stream is not None:
-                result = '%s\nstream\n%s\nendstream' % (result, stream)
-            return result
-        elif isinstance(obj, basestring) and not hasattr(obj, 'indirect'):
-            return PdfString.encode(obj)
-        else:
-            return str(obj)
-    def dump(cls, f, trailer, version='1.3', compress=True):
-        self = cls()
-        self.compress = compress
-        self.indirect_dict = {}
-        self.objlist = []
-        # The first format of trailer gets all the information,
-        # but we throw away the actual trailer formatting.
-        self.format_obj(trailer)
-        # Now we know the size, so we update the trailer dict
-        # and get the formatted data.
-        trailer.Size = PdfObject(len(self.objlist) + 1)
-        trailer = self.format_obj(trailer)
-        # Now we have all the pieces to write out to the file.
-        # Keep careful track of the counts while we do it so
-        # we can correctly build the cross-reference.
-        header = '%%PDF-%s\n%%\xe2\xe3\xcf\xd3\n' % version
-        f.write(header)
-        offset = len(header)
-        offsets = [(0, 65535, 'f')]
-        for i, x in enumerate(self.objlist):
-            objstr = '%s 0 obj\n%s\nendobj\n' % (i + 1, x)
-            offsets.append((offset, 0, 'n'))
-            offset += len(objstr)
-            f.write(objstr)
-        f.write('xref\n0 %s\n' % len(offsets))
-        for x in offsets:
-            f.write('%010d %05d %s\r\n' % x)
-        f.write('trailer\n\n%s\nstartxref\n%s\n%%%%EOF\n' % (trailer, offset))
-    dump = classmethod(dump)
-class PdfWriter(object):
-    _trailer = None
-    def __init__(self, version='1.3', compress=True):
-        self.pagearray = PdfArray()
-        self.compress = compress
-        self.version = version
-    def addpage(self, page):
-        self._trailer = None
-        assert page.Type == PdfName.Page
-        inheritable = page.inheritable # searches for resources
-        self.pagearray.append(
-            IndirectPdfDict(
-                page,
-                Resources = inheritable.Resources,
-                MediaBox = inheritable.MediaBox,
-                CropBox = inheritable.CropBox,
-                Rotate = inheritable.Rotate,
-            )
-        )
-        return self
-    addPage = addpage  # for compatibility with pyPdf
-    def addpages(self, pagelist):
-        for page in pagelist:
-            self.addpage(page)
-        return self
-    def _get_trailer(self):
-        trailer = self._trailer
-        if trailer is not None:
-            return trailer
-        # Create the basic object structure of the PDF file
-        trailer = PdfDict(
-            Root = IndirectPdfDict(
-                Type = PdfName.Catalog,
-                Pages = IndirectPdfDict(
-                    Type = PdfName.Pages,
-                    Count = PdfObject(len(self.pagearray)),
-                    Kids = self.pagearray
-                )
-            )
-        )
-        # Make all the pages point back to the page dictionary
-        pagedict = trailer.Root.Pages
-        for page in pagedict.Kids:
-            page.Parent = pagedict
-        self._trailer = trailer
-        return trailer
-    def _set_trailer(self, trailer):
-        self._trailer = trailer
-    trailer = property(_get_trailer, _set_trailer)
-    def write(self, fname, trailer=None):
-        trailer = trailer or self.trailer
-        # Dump the data.  We either have a filename or a preexisting
-        # file object.
-        preexisting = hasattr(fname, 'write')
-        f = preexisting and fname or open(fname, 'wb')
-        FormatObjects.dump(f, trailer, self.version, self.compress)
-        if not preexisting:
-            f.close()
-if __name__ == '__main__':
-    debug = True
-    import pdfreader
-    x = pdfreader.PdfReader('source.pdf')
-    y = PdfWriter()
-    for i, page in enumerate(x.pages):
-        print '  Adding page', i+1, '\r',
-        y.addpage(page)
-    print
-    y.write('result.pdf')
-    print

diff --git a/lib/pdfrw/pdfwriter.py b/lib/pdfrw/pdfwriter.py deleted file mode 100644 index c193843..0000000 --- a/lib/pdfrw/pdfwriter.py +++ /dev/null
@@ -1,234 +0,0 @@
1	#!/usr/bin/env python
2
3	# A part of pdfrw (pdfrw.googlecode.com)
4	# Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas
5	# MIT license -- See LICENSE.txt for details
6
7	'''
8	The PdfWriter class writes an entire PDF file out to disk.
9
10	The writing process is not at all optimized or organized.
11
12	An instance of the PdfWriter class has two methods:
13	addpage(page)
14	and
15	write(fname)
16
17	addpage() assumes that the pages are part of a valid
18	tree/forest of PDF objects.
19	'''
20
21	try:
22	set
23	except NameError:
24	from sets import Set as set
25
26	from pdfobjects import PdfName, PdfArray, PdfDict, IndirectPdfDict, PdfObject, PdfString
27	from pdfcompress import compress
28
29	debug = False
30
31	class FormatObjects(object):
32	''' FormatObjects performs the actual formatting and disk write.
33	'''
34
35	def add(self, obj, visited):
36	''' Add an object to our list, if it's an indirect
37	object. Just format it if not.
38	'''
39	# Can't hash dicts, so just hash the object ID
40	objid = id(obj)
41
42	# Automatically set stream objects to indirect
43	if isinstance(obj, PdfDict):
44	indirect = obj.indirect or (obj.stream is not None)
45	else:
46	indirect = getattr(obj, 'indirect', False)
47
48	if not indirect:
49	assert objid not in visited, \
50	'Circular reference encountered in non-indirect object %s' % repr(obj)
51	visited.add(objid)
52	result = self.format_obj(obj, visited)
53	visited.remove(objid)
54	return result
55
56	objnum = self.indirect_dict.get(objid)
57
58	# If we haven't seen the object yet, we need to
59	# add it to the indirect object list.
60	if objnum is None:
61	objlist = self.objlist
62	objnum = len(objlist) + 1
63	if debug:
64	print ' Object', objnum, '\r',
65	objlist.append(None)
66	self.indirect_dict[objid] = objnum
67	objlist[objnum-1] = self.format_obj(obj)
68	return '%s 0 R' % objnum
69
70	def format_array(myarray, formatter):
71	# Format array data into semi-readable ASCII
72	if sum([len(x) for x in myarray]) <= 70:
73	return formatter % ' '.join(myarray)
74	bigarray = []
75	count = 1000000
76	for x in myarray:
77	lenx = len(x)
78	if lenx + count > 70:
79	subarray = []
80	bigarray.append(subarray)
81	count = 0
82	count += lenx + 1
83	subarray.append(x)
84	return formatter % '\n '.join([' '.join(x) for x in bigarray])
85	format_array = staticmethod(format_array)
86
87	def format_obj(self, obj, visited=None):
88	''' format PDF object data into semi-readable ASCII.
89	May mutually recurse with add() -- add() will
90	return references for indirect objects, and add
91	the indirect object to the list.
92	'''
93	if visited is None:
94	visited = set()
95	if isinstance(obj, PdfArray):
96	myarray = [self.add(x, visited) for x in obj]
97	return self.format_array(myarray, '[%s]')
98	elif isinstance(obj, PdfDict):
99	if self.compress and obj.stream:
100	compress([obj])
101	myarray = []
102	# Jython 2.2.1 has a bug which segfaults when
103	# sorting subclassed strings, so we un-subclass them.
104	dictkeys = [str(x) for x in obj.iterkeys()]
105	dictkeys.sort()
106	for key in dictkeys:
107	myarray.append(key)
108	myarray.append(self.add(obj[key], visited))
109	result = self.format_array(myarray, '<<%s>>')
110	stream = obj.stream
111	if stream is not None:
112	result = '%s\nstream\n%s\nendstream' % (result, stream)
113	return result
114	elif isinstance(obj, basestring) and not hasattr(obj, 'indirect'):
115	return PdfString.encode(obj)
116	else:
117	return str(obj)
118
119	def dump(cls, f, trailer, version='1.3', compress=True):
120	self = cls()
121	self.compress = compress
122	self.indirect_dict = {}
123	self.objlist = []
124
125	# The first format of trailer gets all the information,
126	# but we throw away the actual trailer formatting.
127	self.format_obj(trailer)
128	# Now we know the size, so we update the trailer dict
129	# and get the formatted data.
130	trailer.Size = PdfObject(len(self.objlist) + 1)
131	trailer = self.format_obj(trailer)
132
133	# Now we have all the pieces to write out to the file.
134	# Keep careful track of the counts while we do it so
135	# we can correctly build the cross-reference.
136
137	header = '%%PDF-%s\n%%\xe2\xe3\xcf\xd3\n' % version
138	f.write(header)
139	offset = len(header)
140	offsets = [(0, 65535, 'f')]
141
142	for i, x in enumerate(self.objlist):
143	objstr = '%s 0 obj\n%s\nendobj\n' % (i + 1, x)
144	offsets.append((offset, 0, 'n'))
145	offset += len(objstr)
146	f.write(objstr)
147
148	f.write('xref\n0 %s\n' % len(offsets))
149	for x in offsets:
150	f.write('%010d %05d %s\r\n' % x)
151	f.write('trailer\n\n%s\nstartxref\n%s\n%%%%EOF\n' % (trailer, offset))
152	dump = classmethod(dump)
153
154	class PdfWriter(object):
155
156	_trailer = None
157
158	def __init__(self, version='1.3', compress=True):
159	self.pagearray = PdfArray()
160	self.compress = compress
161	self.version = version
162
163	def addpage(self, page):
164	self._trailer = None
165	assert page.Type == PdfName.Page
166	inheritable = page.inheritable # searches for resources
167	self.pagearray.append(
168	IndirectPdfDict(
169	page,
170	Resources = inheritable.Resources,
171	MediaBox = inheritable.MediaBox,
172	CropBox = inheritable.CropBox,
173	Rotate = inheritable.Rotate,
174	)
175	)
176	return self
177
178	addPage = addpage # for compatibility with pyPdf
179
180	def addpages(self, pagelist):
181	for page in pagelist:
182	self.addpage(page)
183	return self
184
185	def _get_trailer(self):
186	trailer = self._trailer
187	if trailer is not None:
188	return trailer
189
190	# Create the basic object structure of the PDF file
191	trailer = PdfDict(
192	Root = IndirectPdfDict(
193	Type = PdfName.Catalog,
194	Pages = IndirectPdfDict(
195	Type = PdfName.Pages,
196	Count = PdfObject(len(self.pagearray)),
197	Kids = self.pagearray
198	)
199	)
200	)
201	# Make all the pages point back to the page dictionary
202	pagedict = trailer.Root.Pages
203	for page in pagedict.Kids:
204	page.Parent = pagedict
205	self._trailer = trailer
206	return trailer
207
208	def _set_trailer(self, trailer):
209	self._trailer = trailer
210
211	trailer = property(_get_trailer, _set_trailer)
212
213	def write(self, fname, trailer=None):
214	trailer = trailer or self.trailer
215
216	# Dump the data. We either have a filename or a preexisting
217	# file object.
218	preexisting = hasattr(fname, 'write')
219	f = preexisting and fname or open(fname, 'wb')
220	FormatObjects.dump(f, trailer, self.version, self.compress)
221	if not preexisting:
222	f.close()
223
224	if __name__ == '__main__':
225	debug = True
226	import pdfreader
227	x = pdfreader.PdfReader('source.pdf')
228	y = PdfWriter()
229	for i, page in enumerate(x.pages):
230	print ' Adding page', i+1, '\r',
231	y.addpage(page)
232	print
233	y.write('result.pdf')
234	print