1 files changed, 0 insertions, 249 deletions
diff --git a/lib/pdfrw/pdftokens.py b/lib/pdfrw/pdftokens.py
deleted file mode 100644
index 04bd559..0000000
--- a/lib/pdfrw/pdftokens.py
+++ /dev/null
@@ -1,249 +0,0 @@
-# A part of pdfrw (pdfrw.googlecode.com)
-# Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas
-# MIT license -- See LICENSE.txt for details
-'''
-A tokenizer for PDF streams.
-In general, documentation used was "PDF reference",
-sixth edition, for PDF version 1.7, dated November 2006.
-'''
-from __future__ import generators
-try:
-    set
-except NameError:
-    from sets import Set as set
-import re
-from pdfobjects import PdfString, PdfObject
-class _PrimitiveTokens(object):
-    # Table 3.1, page 50 of reference, defines whitespace
-    whitespaceset = set('\x00\t\n\f\r ')
-    # Text on page 50 defines delimiter characters
-    delimiterset = set('()<>{}[]/%')
-    # Coalesce contiguous whitespace into a single token
-    whitespace_pattern = '[%s]+' % ''.join(whitespaceset)
-    # In addition to the delimiters, we also use '\', which
-    # is special in some contexts in PDF.
-    delimiter_pattern = '\\\\|\\' + '|\\'.join(delimiterset)
-    # Dictionary delimiters are '<<' and '>>'.  Look for
-    # these before the single variety.
-    dictdelim_pattern = r'\<\<|\>\>'
-    pattern = '(%s|%s|%s)' % (whitespace_pattern,
-                    dictdelim_pattern, delimiter_pattern)
-    re_func = re.compile(pattern).finditer
-    del whitespace_pattern, dictdelim_pattern
-    del delimiter_pattern, pattern
-    def __init__(self, fdata):
-        class MyIterator(object):
-            def next():
-                if not tokens:
-                    startloc = self.startloc
-                    for match in next_match[0]:
-                        start = match.start()
-                        end = match.end()
-                        tappend(fdata[start:end])
-                        if start > startloc:
-                            tappend(fdata[startloc:start])
-                        self.startloc = end
-                        break
-                    else:
-                        s = fdata[startloc:]
-                        self.startloc = len(fdata)
-                        if s:
-                            tappend(s)
-                    if not tokens:
-                        raise StopIteration
-                return tpop()
-            next = staticmethod(next)
-        self.fdata = fdata
-        self.tokens = tokens = []
-        self.iterator = iterator = MyIterator()
-        self.next = iterator.next
-        self.next_match = next_match = [None]
-        tappend = tokens.append
-        tpop = tokens.pop
-    def setstart(self, startloc):
-        self.startloc = startloc
-        self.next_match[0] = self.re_func(self.fdata, startloc)
-    def __iter__(self):
-        return self.iterator
-    def coalesce(self, result):
-        ''' This function coalesces tokens together up until
-            the next delimiter or whitespace.
-            All of the coalesced tokens will either be non-matches,
-            or will be a matched backslash.  We distinguish the
-            non-matches by the fact that next() will have left
-            a following match inside self.tokens for the actual match.
-        '''
-        tokens = self.tokens
-        whitespace = self.whitespaceset
-        # Optimized path for usual case -- regular data (not a name string),
-        # with no escape character, and followed by whitespace.
-        if tokens:
-            token = tokens.pop()
-            if token != '\\':
-                if token[0] not in whitespace:
-                    tokens.append(token)
-                return
-            result.append(token)
-        # Non-optimized path.  Either start of a name string received,
-        # or we just had one escape.
-        for token in self:
-            if tokens:
-                result.append(token)
-                token = tokens.pop()
-            if token != '\\':
-                if token[0] not in whitespace:
-                    tokens.append(token)
-                return
-            result.append(token)
-    def floc(self):
-        return self.startloc - sum([len(x) for x in self.tokens])
-class PdfTokens(object):
-    def __init__(self, fdata, startloc=0, strip_comments=True):
-        def comment(token):
-            tokens = [token]
-            for token in primitive:
-                tokens.append(token)
-                if token[0] in whitespaceset and ('\n' in token or '\r' in token):
-                    break
-            return not strip_comments and ''.join(tokens)
-        def single(token):
-            return token
-        def regular_string(token):
-            def escaped():
-                escaped = False
-                i = -2
-                while tokens[i] == '\\':
-                    escaped = not escaped
-                    i -= 1
-                return escaped
-            tokens = [token]
-            nestlevel = 1
-            for token in primitive:
-                tokens.append(token)
-                if token in '()' and not escaped():
-                    nestlevel += token == '(' or -1
-                    if not nestlevel:
-                        break
-            else:
-                assert 0, "Unexpected end of token stream"
-            return PdfString(''.join(tokens))
-        def hex_string(token):
-            tokens = [token]
-            for token in primitive:
-                tokens.append(token)
-                if token == '>':
-                    break
-            while tokens[-2] == '>>':
-                tokens.append(tokens.pop(-2))
-            return PdfString(''.join(tokens))
-        def normal_data(token):
-            # Obscure optimization -- we can get here with
-            # whitespace or regular character data.  If we get
-            # here with whitespace, then there won't be an additional
-            # token queued up in the primitive object, otherwise there
-            # will...
-            if primitive_tokens:     #if token[0] not in whitespaceset:
-                tokens = [token]
-                primitive.coalesce(tokens)
-                return PdfObject(''.join(tokens))
-        def name_string(token):
-            tokens = [token]
-            primitive.coalesce(tokens)
-            token = ''.join(tokens)
-            if '#' in token:
-                substrs = token.split('#')
-                substrs.reverse()
-                tokens = [substrs.pop()]
-                while substrs:
-                    s = substrs.pop()
-                    tokens.append(chr(int(s[:2], 16)))
-                    tokens.append(s[2:])
-                token = ''.join(tokens)
-            return PdfObject(token)
-        def broken(token):
-            assert 0, token
-        dispatch = {
-            '(': regular_string,
-            ')': broken,
-            '<': hex_string,
-            '>': broken,
-            '[': single,
-            ']': single,
-            '{': single,
-            '}': single,
-            '/': name_string,
-            '%' : comment,
-            '<<': single,
-            '>>': single,
-        }.get
-        class MyIterator(object):
-            def next():
-                while not tokens:
-                    token = primitive_next()
-                    token = dispatch(token, normal_data)(token)
-                    if token:
-                        return token
-                return tokens.pop()
-            next = staticmethod(next)
-        self.primitive = primitive = _PrimitiveTokens(fdata)
-        self.setstart = primitive.setstart
-        primitive.setstart(startloc)
-        self.fdata = fdata
-        self.strip_comments = strip_comments
-        self.tokens = tokens = []
-        self.iterator = iterator = MyIterator()
-        self.next = iterator.next
-        primitive_next = primitive.next
-        primitive_tokens = primitive.tokens
-        whitespaceset = _PrimitiveTokens.whitespaceset
-    def floc(self):
-        return self.primitive.floc() - sum([len(x) for x in self.tokens])
-    floc = property(floc)
-    def __iter__(self):
-        return self.iterator
-    def multiple(self, count):
-        next = self.next
-        return [next() for i in range(count)]

diff --git a/lib/pdfrw/pdftokens.py b/lib/pdfrw/pdftokens.py deleted file mode 100644 index 04bd559..0000000 --- a/lib/pdfrw/pdftokens.py +++ /dev/null
@@ -1,249 +0,0 @@
1	# A part of pdfrw (pdfrw.googlecode.com)
2	# Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas
3	# MIT license -- See LICENSE.txt for details
4
5	'''
6	A tokenizer for PDF streams.
7
8	In general, documentation used was "PDF reference",
9	sixth edition, for PDF version 1.7, dated November 2006.
10
11	'''
12
13	from __future__ import generators
14
15	try:
16	set
17	except NameError:
18	from sets import Set as set
19
20	import re
21	from pdfobjects import PdfString, PdfObject
22
23	class _PrimitiveTokens(object):
24
25	# Table 3.1, page 50 of reference, defines whitespace
26	whitespaceset = set('\x00\t\n\f\r ')
27
28
29	# Text on page 50 defines delimiter characters
30	delimiterset = set('()<>{}[]/%')
31
32	# Coalesce contiguous whitespace into a single token
33	whitespace_pattern = '[%s]+' % ''.join(whitespaceset)
34
35	# In addition to the delimiters, we also use '\', which
36	# is special in some contexts in PDF.
37	delimiter_pattern = '\\\\\|\\' + '\|\\'.join(delimiterset)
38
39	# Dictionary delimiters are '<<' and '>>'. Look for
40	# these before the single variety.
41	dictdelim_pattern = r'\<\<\|\>\>'
42
43	pattern = '(%s\|%s\|%s)' % (whitespace_pattern,
44	dictdelim_pattern, delimiter_pattern)
45	re_func = re.compile(pattern).finditer
46	del whitespace_pattern, dictdelim_pattern
47	del delimiter_pattern, pattern
48
49	def __init__(self, fdata):
50
51	class MyIterator(object):
52	def next():
53	if not tokens:
54	startloc = self.startloc
55	for match in next_match[0]:
56	start = match.start()
57	end = match.end()
58	tappend(fdata[start:end])
59	if start > startloc:
60	tappend(fdata[startloc:start])
61	self.startloc = end
62	break
63	else:
64	s = fdata[startloc:]
65	self.startloc = len(fdata)
66	if s:
67	tappend(s)
68	if not tokens:
69	raise StopIteration
70	return tpop()
71	next = staticmethod(next)
72
73	self.fdata = fdata
74	self.tokens = tokens = []
75	self.iterator = iterator = MyIterator()
76	self.next = iterator.next
77	self.next_match = next_match = [None]
78	tappend = tokens.append
79	tpop = tokens.pop
80
81	def setstart(self, startloc):
82	self.startloc = startloc
83	self.next_match[0] = self.re_func(self.fdata, startloc)
84
85	def __iter__(self):
86	return self.iterator
87
88	def coalesce(self, result):
89	''' This function coalesces tokens together up until
90	the next delimiter or whitespace.
91	All of the coalesced tokens will either be non-matches,
92	or will be a matched backslash. We distinguish the
93	non-matches by the fact that next() will have left
94	a following match inside self.tokens for the actual match.
95	'''
96	tokens = self.tokens
97	whitespace = self.whitespaceset
98
99	# Optimized path for usual case -- regular data (not a name string),
100	# with no escape character, and followed by whitespace.
101
102	if tokens:
103	token = tokens.pop()
104	if token != '\\':
105	if token[0] not in whitespace:
106	tokens.append(token)
107	return
108	result.append(token)
109
110	# Non-optimized path. Either start of a name string received,
111	# or we just had one escape.
112
113	for token in self:
114	if tokens:
115	result.append(token)
116	token = tokens.pop()
117	if token != '\\':
118	if token[0] not in whitespace:
119	tokens.append(token)
120	return
121	result.append(token)
122
123
124	def floc(self):
125	return self.startloc - sum([len(x) for x in self.tokens])
126
127	class PdfTokens(object):
128
129	def __init__(self, fdata, startloc=0, strip_comments=True):
130
131	def comment(token):
132	tokens = [token]
133	for token in primitive:
134	tokens.append(token)
135	if token[0] in whitespaceset and ('\n' in token or '\r' in token):
136	break
137	return not strip_comments and ''.join(tokens)
138
139	def single(token):
140	return token
141
142	def regular_string(token):
143	def escaped():
144	escaped = False
145	i = -2
146	while tokens[i] == '\\':
147	escaped = not escaped
148	i -= 1
149	return escaped
150
151	tokens = [token]
152	nestlevel = 1
153	for token in primitive:
154	tokens.append(token)
155	if token in '()' and not escaped():
156	nestlevel += token == '(' or -1
157	if not nestlevel:
158	break
159	else:
160	assert 0, "Unexpected end of token stream"
161	return PdfString(''.join(tokens))
162
163	def hex_string(token):
164	tokens = [token]
165	for token in primitive:
166	tokens.append(token)
167	if token == '>':
168	break
169	while tokens[-2] == '>>':
170	tokens.append(tokens.pop(-2))
171	return PdfString(''.join(tokens))
172
173	def normal_data(token):
174
175	# Obscure optimization -- we can get here with
176	# whitespace or regular character data. If we get
177	# here with whitespace, then there won't be an additional
178	# token queued up in the primitive object, otherwise there
179	# will...
180	if primitive_tokens: #if token[0] not in whitespaceset:
181	tokens = [token]
182	primitive.coalesce(tokens)
183	return PdfObject(''.join(tokens))
184
185	def name_string(token):
186	tokens = [token]
187	primitive.coalesce(tokens)
188	token = ''.join(tokens)
189	if '#' in token:
190	substrs = token.split('#')
191	substrs.reverse()
192	tokens = [substrs.pop()]
193	while substrs:
194	s = substrs.pop()
195	tokens.append(chr(int(s[:2], 16)))
196	tokens.append(s[2:])
197	token = ''.join(tokens)
198	return PdfObject(token)
199
200	def broken(token):
201	assert 0, token
202
203	dispatch = {
204	'(': regular_string,
205	')': broken,
206	'<': hex_string,
207	'>': broken,
208	'[': single,
209	']': single,
210	'{': single,
211	'}': single,
212	'/': name_string,
213	'%' : comment,
214	'<<': single,
215	'>>': single,
216	}.get
217
218	class MyIterator(object):
219	def next():
220	while not tokens:
221	token = primitive_next()
222	token = dispatch(token, normal_data)(token)
223	if token:
224	return token
225	return tokens.pop()
226	next = staticmethod(next)
227
228	self.primitive = primitive = _PrimitiveTokens(fdata)
229	self.setstart = primitive.setstart
230	primitive.setstart(startloc)
231	self.fdata = fdata
232	self.strip_comments = strip_comments
233	self.tokens = tokens = []
234	self.iterator = iterator = MyIterator()
235	self.next = iterator.next
236	primitive_next = primitive.next
237	primitive_tokens = primitive.tokens
238	whitespaceset = _PrimitiveTokens.whitespaceset
239
240	def floc(self):
241	return self.primitive.floc() - sum([len(x) for x in self.tokens])
242	floc = property(floc)
243
244	def __iter__(self):
245	return self.iterator
246
247	def multiple(self, count):
248	next = self.next
249	return [next() for i in range(count)]