diff options
| author | jvoisin | 2011-07-30 19:14:50 +0200 |
|---|---|---|
| committer | jvoisin | 2011-07-30 19:14:50 +0200 |
| commit | 158fbf02f5f349d2f9a7b1976306804224ad92da (patch) | |
| tree | 9e1de3c98d4d7dfcb1fdbd3ebb875f3f3cc203d5 /lib | |
| parent | 8f889fead81b2046d289402b831e18f8ddb00276 (diff) | |
Cleanup of pdfrw
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/pdfrw/__init__.py | 1 | ||||
| -rw-r--r-- | lib/pdfrw/buildxobj.py | 191 | ||||
| -rw-r--r--[-rwxr-xr-x] | lib/pdfrw/pdfwriter.py | 0 | ||||
| -rw-r--r-- | lib/pdfrw/toreportlab.py | 139 | ||||
| -rw-r--r-- | lib/sounds.py | 1 |
5 files changed, 1 insertions, 331 deletions
diff --git a/lib/pdfrw/__init__.py b/lib/pdfrw/__init__.py index 964972f..26e8c73 100644 --- a/lib/pdfrw/__init__.py +++ b/lib/pdfrw/__init__.py | |||
| @@ -11,3 +11,4 @@ from pdftokens import PdfTokens | |||
| 11 | 11 | ||
| 12 | PdfFileReader = PdfReader | 12 | PdfFileReader = PdfReader |
| 13 | PdfFileWriter = PdfWriter | 13 | PdfFileWriter = PdfWriter |
| 14 | |||
diff --git a/lib/pdfrw/buildxobj.py b/lib/pdfrw/buildxobj.py deleted file mode 100644 index 203dd8c..0000000 --- a/lib/pdfrw/buildxobj.py +++ /dev/null | |||
| @@ -1,191 +0,0 @@ | |||
| 1 | # A part of pdfrw (pdfrw.googlecode.com) | ||
| 2 | # Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas | ||
| 3 | # MIT license -- See LICENSE.txt for details | ||
| 4 | |||
| 5 | ''' | ||
| 6 | |||
| 7 | This module contains code to build PDF "Form XObjects". | ||
| 8 | |||
| 9 | A Form XObject allows a fragment from one PDF file to be cleanly | ||
| 10 | included in another PDF file. | ||
| 11 | |||
| 12 | Reference for syntax: "Parameters for opening PDF files" from SDK 8.1 | ||
| 13 | |||
| 14 | http://www.adobe.com/devnet/acrobat/pdfs/pdf_open_parameters.pdf | ||
| 15 | |||
| 16 | supported 'page=xxx', 'viewrect=<left>,<top>,<width>,<height>' | ||
| 17 | |||
| 18 | Units are in points | ||
| 19 | |||
| 20 | Reference for content: Adobe PDF reference, sixth edition, version 1.7 | ||
| 21 | |||
| 22 | http://www.adobe.com/devnet/acrobat/pdfs/pdf_reference_1-7.pdf | ||
| 23 | |||
| 24 | Form xobjects discussed chapter 4.9, page 355 | ||
| 25 | ''' | ||
| 26 | |||
| 27 | from pdfobjects import PdfDict, PdfArray, PdfName | ||
| 28 | from pdfreader import PdfReader | ||
| 29 | |||
| 30 | class ViewInfo(object): | ||
| 31 | ''' Instantiate ViewInfo with a uri, and it will parse out | ||
| 32 | the filename, page, and viewrect into object attributes. | ||
| 33 | ''' | ||
| 34 | doc = None | ||
| 35 | docname = None | ||
| 36 | page = None | ||
| 37 | viewrect = None | ||
| 38 | |||
| 39 | def __init__(self, pageinfo='', **kw): | ||
| 40 | pageinfo=pageinfo.split('#',1) | ||
| 41 | if len(pageinfo) == 2: | ||
| 42 | pageinfo[1:] = pageinfo[1].replace('&', '#').split('#') | ||
| 43 | for key in 'page viewrect'.split(): | ||
| 44 | if pageinfo[0].startswith(key+'='): | ||
| 45 | break | ||
| 46 | else: | ||
| 47 | self.docname = pageinfo.pop(0) | ||
| 48 | for item in pageinfo: | ||
| 49 | key, value = item.split('=') | ||
| 50 | key = key.strip() | ||
| 51 | value = value.replace(',', ' ').split() | ||
| 52 | if key == 'page': | ||
| 53 | assert len(value) == 1 | ||
| 54 | setattr(self, key, int(value[0])) | ||
| 55 | elif key == 'viewrect': | ||
| 56 | assert len(value) == 4 | ||
| 57 | setattr(self, key, [float(x) for x in value]) | ||
| 58 | else: | ||
| 59 | log.error('Unknown option: %s', key) | ||
| 60 | for key, value in kw.iteritems(): | ||
| 61 | assert hasattr(self, key), key | ||
| 62 | setattr(self, key, value) | ||
| 63 | |||
| 64 | def getrects(inheritable, pageinfo): | ||
| 65 | ''' Given the inheritable attributes of a page and | ||
| 66 | the desired pageinfo rectangle, return the page's | ||
| 67 | media box and the calculated boundary (clip) box. | ||
| 68 | ''' | ||
| 69 | mbox = tuple([float(x) for x in inheritable.MediaBox]) | ||
| 70 | vrect = pageinfo.viewrect | ||
| 71 | if vrect is None: | ||
| 72 | cbox = tuple([float(x) for x in (inheritable.CropBox or mbox)]) | ||
| 73 | else: | ||
| 74 | mleft, mbot, mright, mtop = mbox | ||
| 75 | x, y, w, h = vrect | ||
| 76 | cleft = mleft + x | ||
| 77 | ctop = mtop - y | ||
| 78 | cright = cleft + w | ||
| 79 | cbot = ctop - h | ||
| 80 | cbox = max(mleft, cleft), max(mbot, cbot), min(mright, cright), min(mtop, ctop) | ||
| 81 | return mbox, cbox | ||
| 82 | |||
| 83 | def _cache_xobj(contents, resources, mbox, bbox): | ||
| 84 | ''' Return a cached Form XObject, or create a new one and cache it. | ||
| 85 | ''' | ||
| 86 | cachedict = contents.xobj_cachedict | ||
| 87 | if cachedict is None: | ||
| 88 | cachedict = contents.private.xobj_cachedict = {} | ||
| 89 | result = cachedict.get(bbox) | ||
| 90 | if result is None: | ||
| 91 | func = (_get_fullpage, _get_subpage)[mbox != bbox] | ||
| 92 | result = PdfDict( | ||
| 93 | func(contents, resources, mbox, bbox), | ||
| 94 | Type = PdfName.XObject, | ||
| 95 | Subtype = PdfName.Form, | ||
| 96 | FormType = 1, | ||
| 97 | BBox = PdfArray(bbox), | ||
| 98 | ) | ||
| 99 | cachedict[bbox] = result | ||
| 100 | return result | ||
| 101 | |||
| 102 | def _get_fullpage(contents, resources, mbox, bbox): | ||
| 103 | ''' fullpage is easy. Just copy the contents, | ||
| 104 | set up the resources, and let _cache_xobj handle the | ||
| 105 | rest. | ||
| 106 | ''' | ||
| 107 | return PdfDict(contents, Resources=resources) | ||
| 108 | |||
| 109 | def _get_subpage(contents, resources, mbox, bbox): | ||
| 110 | ''' subpages *could* be as easy as full pages, but we | ||
| 111 | choose to complicate life by creating a Form XObject | ||
| 112 | for the page, and then one that references it for | ||
| 113 | the subpage, on the off-chance that we want multiple | ||
| 114 | items from the page. | ||
| 115 | ''' | ||
| 116 | return PdfDict( | ||
| 117 | stream = '/FullPage Do\n', | ||
| 118 | Resources = PdfDict( | ||
| 119 | XObject = PdfDict( | ||
| 120 | FullPage = _cache_xobj(contents, resources, mbox, mbox) | ||
| 121 | ) | ||
| 122 | ) | ||
| 123 | ) | ||
| 124 | |||
| 125 | def pagexobj(page, viewinfo=ViewInfo(), allow_compressed=True): | ||
| 126 | ''' pagexobj creates and returns a Form XObject for | ||
| 127 | a given view within a page (Defaults to entire page.) | ||
| 128 | ''' | ||
| 129 | inheritable = page.inheritable | ||
| 130 | resources = inheritable.Resources | ||
| 131 | mbox, bbox = getrects(inheritable, viewinfo) | ||
| 132 | contents = page.Contents | ||
| 133 | # Make sure the only attribute is length | ||
| 134 | # All the filters must have been executed | ||
| 135 | assert int(contents.Length) == len(contents.stream) | ||
| 136 | if not allow_compressed: | ||
| 137 | assert len([x for x in contents.iteritems()]) == 1 | ||
| 138 | |||
| 139 | return _cache_xobj(contents, resources, mbox, bbox) | ||
| 140 | |||
| 141 | |||
| 142 | def docxobj(pageinfo, doc=None, allow_compressed=True): | ||
| 143 | ''' docxobj creates and returns an actual Form XObject. | ||
| 144 | Can work standalone, or in conjunction with | ||
| 145 | the CacheXObj class (below). | ||
| 146 | ''' | ||
| 147 | if not isinstance(pageinfo, ViewInfo): | ||
| 148 | pageinfo = ViewInfo(pageinfo) | ||
| 149 | |||
| 150 | # If we're explicitly passed a document, | ||
| 151 | # make sure we don't have one implicitly as well. | ||
| 152 | # If no implicit or explicit doc, then read one in | ||
| 153 | # from the filename. | ||
| 154 | if doc is not None: | ||
| 155 | assert pageinfo.doc is None | ||
| 156 | pageinfo.doc = doc | ||
| 157 | elif pageinfo.doc is not None: | ||
| 158 | doc = pageinfo.doc | ||
| 159 | else: | ||
| 160 | doc = pageinfo.doc = PdfReader(pageinfo.docname, decompress = not allow_compressed) | ||
| 161 | assert isinstance(doc, PdfReader) | ||
| 162 | |||
| 163 | sourcepage = doc.pages[(pageinfo.page or 1) - 1] | ||
| 164 | return pagexobj(sourcepage, pageinfo, allow_compressed) | ||
| 165 | |||
| 166 | |||
| 167 | class CacheXObj(object): | ||
| 168 | ''' Use to keep from reparsing files over and over, | ||
| 169 | and to keep from making the output too much | ||
| 170 | bigger than it ought to be by replicating | ||
| 171 | unnecessary object copies. | ||
| 172 | ''' | ||
| 173 | def __init__(self, decompress=False): | ||
| 174 | ''' Set decompress true if you need | ||
| 175 | the Form XObjects to be decompressed. | ||
| 176 | Will decompress what it can and scream | ||
| 177 | about the rest. | ||
| 178 | ''' | ||
| 179 | self.cached_pdfs = {} | ||
| 180 | self.decompress = decompress | ||
| 181 | |||
| 182 | def load(self, sourcename): | ||
| 183 | ''' Load a Form XObject from a uri | ||
| 184 | ''' | ||
| 185 | info = ViewInfo(sourcename) | ||
| 186 | fname = info.docname | ||
| 187 | pcache = self.cached_pdfs | ||
| 188 | doc = pcache.get(fname) | ||
| 189 | if doc is None: | ||
| 190 | doc = pcache[fname] = PdfReader(fname, decompress=self.decompress) | ||
| 191 | return docxobj(info, doc, allow_compressed=not self.decompress) | ||
diff --git a/lib/pdfrw/pdfwriter.py b/lib/pdfrw/pdfwriter.py index c193843..c193843 100755..100644 --- a/lib/pdfrw/pdfwriter.py +++ b/lib/pdfrw/pdfwriter.py | |||
diff --git a/lib/pdfrw/toreportlab.py b/lib/pdfrw/toreportlab.py deleted file mode 100644 index 00ad324..0000000 --- a/lib/pdfrw/toreportlab.py +++ /dev/null | |||
| @@ -1,139 +0,0 @@ | |||
| 1 | # A part of pdfrw (pdfrw.googlecode.com) | ||
| 2 | # Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas | ||
| 3 | # MIT license -- See LICENSE.txt for details | ||
| 4 | |||
| 5 | ''' | ||
| 6 | Converts pdfrw objects into reportlab objects. | ||
| 7 | |||
| 8 | Designed for and tested with rl 2.3. | ||
| 9 | |||
| 10 | Knows too much about reportlab internals. | ||
| 11 | What can you do? | ||
| 12 | |||
| 13 | The interface to this function is through the makerl() function. | ||
| 14 | |||
| 15 | Parameters: | ||
| 16 | canv - a reportlab "canvas" (also accepts a "document") | ||
| 17 | pdfobj - a pdfrw PDF object | ||
| 18 | |||
| 19 | Returns: | ||
| 20 | A corresponding reportlab object, or if the | ||
| 21 | object is a PDF Form XObject, the name to | ||
| 22 | use with reportlab for the object. | ||
| 23 | |||
| 24 | Will recursively convert all necessary objects. | ||
| 25 | Be careful when converting a page -- if /Parent is set, | ||
| 26 | will recursively convert all pages! | ||
| 27 | |||
| 28 | Notes: | ||
| 29 | 1) Original objects are annotated with a | ||
| 30 | derived_rl_obj attribute which points to the | ||
| 31 | reportlab object. This keeps multiple reportlab | ||
| 32 | objects from being generated for the same pdfobj | ||
| 33 | via repeated calls to makerl. This is great for | ||
| 34 | not putting too many objects into the | ||
| 35 | new PDF, but not so good if you are modifying | ||
| 36 | objects for different pages. Then you | ||
| 37 | need to do your own deep copying (of circular | ||
| 38 | structures). You're on your own. | ||
| 39 | |||
| 40 | 2) ReportLab seems weird about FormXObjects. | ||
| 41 | They pass around a partial name instead of the | ||
| 42 | object or a reference to it. So we have to | ||
| 43 | reach into reportlab and get a number for | ||
| 44 | a unique name. I guess this is to make it | ||
| 45 | where you can combine page streams with | ||
| 46 | impunity, but that's just a guess. | ||
| 47 | |||
| 48 | 3) Updated 1/23/2010 to handle multipass documents | ||
| 49 | (e.g. with a table of contents). These have | ||
| 50 | a different doc object on every pass. | ||
| 51 | |||
| 52 | ''' | ||
| 53 | |||
| 54 | from reportlab.pdfbase import pdfdoc as rldocmodule | ||
| 55 | from pdfobjects import PdfDict, PdfArray, PdfName | ||
| 56 | |||
| 57 | RLStream = rldocmodule.PDFStream | ||
| 58 | RLDict = rldocmodule.PDFDictionary | ||
| 59 | RLArray = rldocmodule.PDFArray | ||
| 60 | |||
| 61 | |||
| 62 | def _makedict(rldoc, pdfobj): | ||
| 63 | rlobj = rldict = RLDict() | ||
| 64 | if pdfobj.indirect: | ||
| 65 | rlobj.__RefOnly__ = 1 | ||
| 66 | rlobj = rldoc.Reference(rlobj) | ||
| 67 | pdfobj.derived_rl_obj[rldoc] = rlobj, None | ||
| 68 | |||
| 69 | for key, value in pdfobj.iteritems(): | ||
| 70 | rldict[key[1:]] = makerl_recurse(rldoc, value) | ||
| 71 | |||
| 72 | return rlobj | ||
| 73 | |||
| 74 | def _makestream(rldoc, pdfobj, xobjtype=PdfName.XObject): | ||
| 75 | rldict = RLDict() | ||
| 76 | rlobj = RLStream(rldict, pdfobj.stream) | ||
| 77 | |||
| 78 | if pdfobj.Type == xobjtype: | ||
| 79 | shortname = 'pdfrw_%s' % (rldoc.objectcounter+1) | ||
| 80 | fullname = rldoc.getXObjectName(shortname) | ||
| 81 | else: | ||
| 82 | shortname = fullname = None | ||
| 83 | result = rldoc.Reference(rlobj, fullname) | ||
| 84 | pdfobj.derived_rl_obj[rldoc] = result, shortname | ||
| 85 | |||
| 86 | for key, value in pdfobj.iteritems(): | ||
| 87 | rldict[key[1:]] = makerl_recurse(rldoc, value) | ||
| 88 | |||
| 89 | return result | ||
| 90 | |||
| 91 | def _makearray(rldoc, pdfobj): | ||
| 92 | rlobj = rlarray = RLArray([]) | ||
| 93 | if pdfobj.indirect: | ||
| 94 | rlobj.__RefOnly__ = 1 | ||
| 95 | rlobj = rldoc.Reference(rlobj) | ||
| 96 | pdfobj.derived_rl_obj[rldoc] = rlobj, None | ||
| 97 | |||
| 98 | mylist = rlarray.sequence | ||
| 99 | for value in pdfobj: | ||
| 100 | mylist.append(makerl_recurse(rldoc, value)) | ||
| 101 | |||
| 102 | return rlobj | ||
| 103 | |||
| 104 | def _makestr(rldoc, pdfobj): | ||
| 105 | assert isinstance(pdfobj, (float, int, str)), repr(pdfobj) | ||
| 106 | return pdfobj | ||
| 107 | |||
| 108 | def makerl_recurse(rldoc, pdfobj): | ||
| 109 | docdict = getattr(pdfobj, 'derived_rl_obj', None) | ||
| 110 | if docdict is not None: | ||
| 111 | value = docdict.get(rldoc) | ||
| 112 | if value is not None: | ||
| 113 | return value[0] | ||
| 114 | if isinstance(pdfobj, PdfDict): | ||
| 115 | if pdfobj.stream is not None: | ||
| 116 | func = _makestream | ||
| 117 | else: | ||
| 118 | func = _makedict | ||
| 119 | if docdict is None: | ||
| 120 | pdfobj.private.derived_rl_obj = {} | ||
| 121 | elif isinstance(pdfobj, PdfArray): | ||
| 122 | func = _makearray | ||
| 123 | if docdict is None: | ||
| 124 | pdfobj.derived_rl_obj = {} | ||
| 125 | else: | ||
| 126 | func = _makestr | ||
| 127 | return func(rldoc, pdfobj) | ||
| 128 | |||
| 129 | def makerl(canv, pdfobj): | ||
| 130 | try: | ||
| 131 | rldoc = canv._doc | ||
| 132 | except AttributeError: | ||
| 133 | rldoc = canv | ||
| 134 | rlobj = makerl_recurse(rldoc, pdfobj) | ||
| 135 | try: | ||
| 136 | name = pdfobj.derived_rl_obj[rldoc][1] | ||
| 137 | except AttributeError: | ||
| 138 | name = None | ||
| 139 | return name or rlobj | ||
diff --git a/lib/sounds.py b/lib/sounds.py deleted file mode 100644 index a4bf5b6..0000000 --- a/lib/sounds.py +++ /dev/null | |||
| @@ -1 +0,0 @@ | |||
| 1 | import parser | ||
