summaryrefslogtreecommitdiff
path: root/lib/pdfrw/pdfobjects.py
diff options
context:
space:
mode:
authorjvoisin2011-08-16 18:11:24 +0200
committerjvoisin2011-08-16 18:11:24 +0200
commit4bd3e47da02fde08acfada1795cc55170abdb00a (patch)
treef8c7aa5fd5e1b07a28b350c5ded8125ef2467c51 /lib/pdfrw/pdfobjects.py
parentbaf8e080125614326ba9c96ca8f2404fd12b050e (diff)
setup.py now works !
Diffstat (limited to 'lib/pdfrw/pdfobjects.py')
-rw-r--r--lib/pdfrw/pdfobjects.py183
1 files changed, 0 insertions, 183 deletions
diff --git a/lib/pdfrw/pdfobjects.py b/lib/pdfrw/pdfobjects.py
deleted file mode 100644
index 08ad825..0000000
--- a/lib/pdfrw/pdfobjects.py
+++ /dev/null
@@ -1,183 +0,0 @@
1# A part of pdfrw (pdfrw.googlecode.com)
2# Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas
3# MIT license -- See LICENSE.txt for details
4
5'''
6Objects that can occur in PDF files. The most important
7objects are arrays and dicts. Either of these can be
8indirect or not, and dicts could have an associated
9stream.
10'''
11from __future__ import generators
12
13try:
14 set
15except NameError:
16 from sets import Set as set
17
18import re
19
20class PdfObject(str):
21 indirect = False
22
23class PdfArray(list):
24 indirect = False
25
26class PdfName(object):
27 def __getattr__(self, name):
28 return self(name)
29 def __call__(self, name):
30 return PdfObject('/' + name)
31
32PdfName = PdfName()
33
34class PdfString(str):
35 indirect = False
36 unescape_dict = {'\\b':'\b', '\\f':'\f', '\\n':'\n',
37 '\\r':'\r', '\\t':'\t',
38 '\\\r\n': '', '\\\r':'', '\\\n':'',
39 '\\\\':'\\', '\\':'',
40 }
41 unescape_pattern = r'(\\b|\\f|\\n|\\r|\\t|\\\r\n|\\\r|\\\n|\\[0-9]+|\\)'
42 unescape_func = re.compile(unescape_pattern).split
43
44 hex_pattern = '([a-fA-F0-9][a-fA-F0-9]|[a-fA-F0-9])'
45 hex_func = re.compile(hex_pattern).split
46
47 hex_pattern2 = '([a-fA-F0-9][a-fA-F0-9][a-fA-F0-9][a-fA-F0-9]|[a-fA-F0-9][a-fA-F0-9]|[a-fA-F0-9])'
48 hex_func2 = re.compile(hex_pattern2).split
49
50 hex_funcs = hex_func, hex_func2
51
52 indirect = False
53
54 def decode_regular(self, remap=chr):
55 assert self[0] == '(' and self[-1] == ')'
56 mylist = self.unescape_func(self[1:-1])
57 result = []
58 unescape = self.unescape_dict.get
59 for chunk in mylist:
60 chunk = unescape(chunk, chunk)
61 if chunk.startswith('\\') and len(chunk) > 1:
62 value = int(chunk[1:], 8)
63 # FIXME: TODO: Handle unicode here
64 if value > 127:
65 value = 127
66 chunk = remap(value)
67 if chunk:
68 result.append(chunk)
69 return ''.join(result)
70
71 def decode_hex(self, remap=chr, twobytes=False):
72 data = ''.join(self.split())
73 data = self.hex_funcs[twobytes](data)
74 chars = data[1::2]
75 other = data[0::2]
76 assert other[0] == '<' and other[-1] == '>' and ''.join(other) == '<>', self
77 return ''.join([remap(int(x, 16)) for x in chars])
78
79 def decode(self, remap=chr, twobytes=False):
80 if self.startswith('('):
81 return self.decode_regular(remap)
82
83 else:
84 return self.decode_hex(remap, twobytes)
85
86 def encode(cls, source, usehex=False):
87 assert not usehex, "Not supported yet"
88 if isinstance(source, unicode):
89 source = source.encode('utf-8')
90 else:
91 source = str(source)
92 source = source.replace('\\', '\\\\')
93 source = source.replace('(', '\\(')
94 source = source.replace(')', '\\)')
95 return cls('(' +source + ')')
96 encode = classmethod(encode)
97
98class PdfDict(dict):
99 indirect = False
100 stream = None
101
102 _special = dict(indirect = ('indirect', False),
103 stream = ('stream', True),
104 _stream = ('stream', False),
105 )
106
107 def __setitem__(self, name, value):
108 assert name.startswith('/'), name
109 if value is not None:
110 dict.__setitem__(self, name, value)
111 elif name in self:
112 del self[name]
113
114 def __init__(self, *args, **kw):
115 if args:
116 if len(args) == 1:
117 args = args[0]
118 self.update(args)
119 if isinstance(args, PdfDict):
120 self.indirect = args.indirect
121 self._stream = args.stream
122 for key, value in kw.iteritems():
123 setattr(self, key, value)
124
125 def __getattr__(self, name):
126 return self.get(PdfName(name))
127
128 def __setattr__(self, name, value):
129 info = self._special.get(name)
130 if info is None:
131 self[PdfName(name)] = value
132 else:
133 name, setlen = info
134 self.__dict__[name] = value
135 if setlen:
136 notnone = value is not None
137 self.Length = notnone and PdfObject(len(value)) or None
138
139 def iteritems(self):
140 for key, value in dict.iteritems(self):
141 if value is not None:
142 assert key.startswith('/'), (key, value)
143 yield key, value
144
145 def inheritable(self):
146 ''' Search through ancestors as needed for inheritable
147 dictionary items
148 '''
149 class Search(object):
150 def __init__(self, basedict):
151 self.basedict = basedict
152 def __getattr__(self, name):
153 return self[name]
154 def __getitem__(self, name):
155 visited = set()
156 mydict = self.basedict
157 while 1:
158 value = getattr(mydict, name)
159 if value is not None:
160 return value
161 myid = id(mydict)
162 assert myid not in visited
163 visited.add(myid)
164 mydict = mydict.Parent
165 if mydict is None:
166 return
167 return Search(self)
168 inheritable = property(inheritable)
169
170 def private(self):
171 ''' Allows setting private metadata for use in
172 processing (not sent to PDF file)
173 '''
174 class Private(object):
175 pass
176
177 result = Private()
178 result.__dict__ = self.__dict__
179 return result
180 private = property(private)
181
182class IndirectPdfDict(PdfDict):
183 indirect = True