diff options
| author | jvoisin | 2011-06-21 20:41:18 +0200 |
|---|---|---|
| committer | jvoisin | 2011-06-21 20:41:18 +0200 |
| commit | 9e69adbe1b065707f8be4f146cc3c05660cef711 (patch) | |
| tree | d60509a4982d7699204059184c4343352fef52de /lib/pdfrw/toreportlab.py | |
| parent | f0c9c5b56e3909ba36cc84ff82b05fab9a180911 (diff) | |
Add pdfrw, and many files that I have forgetten, sorry !
Diffstat (limited to 'lib/pdfrw/toreportlab.py')
| -rw-r--r-- | lib/pdfrw/toreportlab.py | 139 |
1 files changed, 139 insertions, 0 deletions
diff --git a/lib/pdfrw/toreportlab.py b/lib/pdfrw/toreportlab.py new file mode 100644 index 0000000..00ad324 --- /dev/null +++ b/lib/pdfrw/toreportlab.py | |||
| @@ -0,0 +1,139 @@ | |||
| 1 | # A part of pdfrw (pdfrw.googlecode.com) | ||
| 2 | # Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas | ||
| 3 | # MIT license -- See LICENSE.txt for details | ||
| 4 | |||
| 5 | ''' | ||
| 6 | Converts pdfrw objects into reportlab objects. | ||
| 7 | |||
| 8 | Designed for and tested with rl 2.3. | ||
| 9 | |||
| 10 | Knows too much about reportlab internals. | ||
| 11 | What can you do? | ||
| 12 | |||
| 13 | The interface to this function is through the makerl() function. | ||
| 14 | |||
| 15 | Parameters: | ||
| 16 | canv - a reportlab "canvas" (also accepts a "document") | ||
| 17 | pdfobj - a pdfrw PDF object | ||
| 18 | |||
| 19 | Returns: | ||
| 20 | A corresponding reportlab object, or if the | ||
| 21 | object is a PDF Form XObject, the name to | ||
| 22 | use with reportlab for the object. | ||
| 23 | |||
| 24 | Will recursively convert all necessary objects. | ||
| 25 | Be careful when converting a page -- if /Parent is set, | ||
| 26 | will recursively convert all pages! | ||
| 27 | |||
| 28 | Notes: | ||
| 29 | 1) Original objects are annotated with a | ||
| 30 | derived_rl_obj attribute which points to the | ||
| 31 | reportlab object. This keeps multiple reportlab | ||
| 32 | objects from being generated for the same pdfobj | ||
| 33 | via repeated calls to makerl. This is great for | ||
| 34 | not putting too many objects into the | ||
| 35 | new PDF, but not so good if you are modifying | ||
| 36 | objects for different pages. Then you | ||
| 37 | need to do your own deep copying (of circular | ||
| 38 | structures). You're on your own. | ||
| 39 | |||
| 40 | 2) ReportLab seems weird about FormXObjects. | ||
| 41 | They pass around a partial name instead of the | ||
| 42 | object or a reference to it. So we have to | ||
| 43 | reach into reportlab and get a number for | ||
| 44 | a unique name. I guess this is to make it | ||
| 45 | where you can combine page streams with | ||
| 46 | impunity, but that's just a guess. | ||
| 47 | |||
| 48 | 3) Updated 1/23/2010 to handle multipass documents | ||
| 49 | (e.g. with a table of contents). These have | ||
| 50 | a different doc object on every pass. | ||
| 51 | |||
| 52 | ''' | ||
| 53 | |||
| 54 | from reportlab.pdfbase import pdfdoc as rldocmodule | ||
| 55 | from pdfobjects import PdfDict, PdfArray, PdfName | ||
| 56 | |||
| 57 | RLStream = rldocmodule.PDFStream | ||
| 58 | RLDict = rldocmodule.PDFDictionary | ||
| 59 | RLArray = rldocmodule.PDFArray | ||
| 60 | |||
| 61 | |||
| 62 | def _makedict(rldoc, pdfobj): | ||
| 63 | rlobj = rldict = RLDict() | ||
| 64 | if pdfobj.indirect: | ||
| 65 | rlobj.__RefOnly__ = 1 | ||
| 66 | rlobj = rldoc.Reference(rlobj) | ||
| 67 | pdfobj.derived_rl_obj[rldoc] = rlobj, None | ||
| 68 | |||
| 69 | for key, value in pdfobj.iteritems(): | ||
| 70 | rldict[key[1:]] = makerl_recurse(rldoc, value) | ||
| 71 | |||
| 72 | return rlobj | ||
| 73 | |||
| 74 | def _makestream(rldoc, pdfobj, xobjtype=PdfName.XObject): | ||
| 75 | rldict = RLDict() | ||
| 76 | rlobj = RLStream(rldict, pdfobj.stream) | ||
| 77 | |||
| 78 | if pdfobj.Type == xobjtype: | ||
| 79 | shortname = 'pdfrw_%s' % (rldoc.objectcounter+1) | ||
| 80 | fullname = rldoc.getXObjectName(shortname) | ||
| 81 | else: | ||
| 82 | shortname = fullname = None | ||
| 83 | result = rldoc.Reference(rlobj, fullname) | ||
| 84 | pdfobj.derived_rl_obj[rldoc] = result, shortname | ||
| 85 | |||
| 86 | for key, value in pdfobj.iteritems(): | ||
| 87 | rldict[key[1:]] = makerl_recurse(rldoc, value) | ||
| 88 | |||
| 89 | return result | ||
| 90 | |||
| 91 | def _makearray(rldoc, pdfobj): | ||
| 92 | rlobj = rlarray = RLArray([]) | ||
| 93 | if pdfobj.indirect: | ||
| 94 | rlobj.__RefOnly__ = 1 | ||
| 95 | rlobj = rldoc.Reference(rlobj) | ||
| 96 | pdfobj.derived_rl_obj[rldoc] = rlobj, None | ||
| 97 | |||
| 98 | mylist = rlarray.sequence | ||
| 99 | for value in pdfobj: | ||
| 100 | mylist.append(makerl_recurse(rldoc, value)) | ||
| 101 | |||
| 102 | return rlobj | ||
| 103 | |||
| 104 | def _makestr(rldoc, pdfobj): | ||
| 105 | assert isinstance(pdfobj, (float, int, str)), repr(pdfobj) | ||
| 106 | return pdfobj | ||
| 107 | |||
| 108 | def makerl_recurse(rldoc, pdfobj): | ||
| 109 | docdict = getattr(pdfobj, 'derived_rl_obj', None) | ||
| 110 | if docdict is not None: | ||
| 111 | value = docdict.get(rldoc) | ||
| 112 | if value is not None: | ||
| 113 | return value[0] | ||
| 114 | if isinstance(pdfobj, PdfDict): | ||
| 115 | if pdfobj.stream is not None: | ||
| 116 | func = _makestream | ||
| 117 | else: | ||
| 118 | func = _makedict | ||
| 119 | if docdict is None: | ||
| 120 | pdfobj.private.derived_rl_obj = {} | ||
| 121 | elif isinstance(pdfobj, PdfArray): | ||
| 122 | func = _makearray | ||
| 123 | if docdict is None: | ||
| 124 | pdfobj.derived_rl_obj = {} | ||
| 125 | else: | ||
| 126 | func = _makestr | ||
| 127 | return func(rldoc, pdfobj) | ||
| 128 | |||
| 129 | def makerl(canv, pdfobj): | ||
| 130 | try: | ||
| 131 | rldoc = canv._doc | ||
| 132 | except AttributeError: | ||
| 133 | rldoc = canv | ||
| 134 | rlobj = makerl_recurse(rldoc, pdfobj) | ||
| 135 | try: | ||
| 136 | name = pdfobj.derived_rl_obj[rldoc][1] | ||
| 137 | except AttributeError: | ||
| 138 | name = None | ||
| 139 | return name or rlobj | ||
