From 158fbf02f5f349d2f9a7b1976306804224ad92da Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sat, 30 Jul 2011 19:14:50 +0200 Subject: Cleanup of pdfrw --- lib/pdfrw/__init__.py | 1 + lib/pdfrw/buildxobj.py | 191 ----------------------------------------------- lib/pdfrw/pdfwriter.py | 0 lib/pdfrw/toreportlab.py | 139 ---------------------------------- lib/sounds.py | 1 - 5 files changed, 1 insertion(+), 331 deletions(-) delete mode 100644 lib/pdfrw/buildxobj.py mode change 100755 => 100644 lib/pdfrw/pdfwriter.py delete mode 100644 lib/pdfrw/toreportlab.py delete mode 100644 lib/sounds.py (limited to 'lib') diff --git a/lib/pdfrw/__init__.py b/lib/pdfrw/__init__.py index 964972f..26e8c73 100644 --- a/lib/pdfrw/__init__.py +++ b/lib/pdfrw/__init__.py @@ -11,3 +11,4 @@ from pdftokens import PdfTokens PdfFileReader = PdfReader PdfFileWriter = PdfWriter + diff --git a/lib/pdfrw/buildxobj.py b/lib/pdfrw/buildxobj.py deleted file mode 100644 index 203dd8c..0000000 --- a/lib/pdfrw/buildxobj.py +++ /dev/null @@ -1,191 +0,0 @@ -# A part of pdfrw (pdfrw.googlecode.com) -# Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas -# MIT license -- See LICENSE.txt for details - -''' - -This module contains code to build PDF "Form XObjects". - -A Form XObject allows a fragment from one PDF file to be cleanly -included in another PDF file. - -Reference for syntax: "Parameters for opening PDF files" from SDK 8.1 - - http://www.adobe.com/devnet/acrobat/pdfs/pdf_open_parameters.pdf - - supported 'page=xxx', 'viewrect=,,,' - - Units are in points - -Reference for content: Adobe PDF reference, sixth edition, version 1.7 - - http://www.adobe.com/devnet/acrobat/pdfs/pdf_reference_1-7.pdf - - Form xobjects discussed chapter 4.9, page 355 -''' - -from pdfobjects import PdfDict, PdfArray, PdfName -from pdfreader import PdfReader - -class ViewInfo(object): - ''' Instantiate ViewInfo with a uri, and it will parse out - the filename, page, and viewrect into object attributes. - ''' - doc = None - docname = None - page = None - viewrect = None - - def __init__(self, pageinfo='', **kw): - pageinfo=pageinfo.split('#',1) - if len(pageinfo) == 2: - pageinfo[1:] = pageinfo[1].replace('&', '#').split('#') - for key in 'page viewrect'.split(): - if pageinfo[0].startswith(key+'='): - break - else: - self.docname = pageinfo.pop(0) - for item in pageinfo: - key, value = item.split('=') - key = key.strip() - value = value.replace(',', ' ').split() - if key == 'page': - assert len(value) == 1 - setattr(self, key, int(value[0])) - elif key == 'viewrect': - assert len(value) == 4 - setattr(self, key, [float(x) for x in value]) - else: - log.error('Unknown option: %s', key) - for key, value in kw.iteritems(): - assert hasattr(self, key), key - setattr(self, key, value) - -def getrects(inheritable, pageinfo): - ''' Given the inheritable attributes of a page and - the desired pageinfo rectangle, return the page's - media box and the calculated boundary (clip) box. - ''' - mbox = tuple([float(x) for x in inheritable.MediaBox]) - vrect = pageinfo.viewrect - if vrect is None: - cbox = tuple([float(x) for x in (inheritable.CropBox or mbox)]) - else: - mleft, mbot, mright, mtop = mbox - x, y, w, h = vrect - cleft = mleft + x - ctop = mtop - y - cright = cleft + w - cbot = ctop - h - cbox = max(mleft, cleft), max(mbot, cbot), min(mright, cright), min(mtop, ctop) - return mbox, cbox - -def _cache_xobj(contents, resources, mbox, bbox): - ''' Return a cached Form XObject, or create a new one and cache it. - ''' - cachedict = contents.xobj_cachedict - if cachedict is None: - cachedict = contents.private.xobj_cachedict = {} - result = cachedict.get(bbox) - if result is None: - func = (_get_fullpage, _get_subpage)[mbox != bbox] - result = PdfDict( - func(contents, resources, mbox, bbox), - Type = PdfName.XObject, - Subtype = PdfName.Form, - FormType = 1, - BBox = PdfArray(bbox), - ) - cachedict[bbox] = result - return result - -def _get_fullpage(contents, resources, mbox, bbox): - ''' fullpage is easy. Just copy the contents, - set up the resources, and let _cache_xobj handle the - rest. - ''' - return PdfDict(contents, Resources=resources) - -def _get_subpage(contents, resources, mbox, bbox): - ''' subpages *could* be as easy as full pages, but we - choose to complicate life by creating a Form XObject - for the page, and then one that references it for - the subpage, on the off-chance that we want multiple - items from the page. - ''' - return PdfDict( - stream = '/FullPage Do\n', - Resources = PdfDict( - XObject = PdfDict( - FullPage = _cache_xobj(contents, resources, mbox, mbox) - ) - ) - ) - -def pagexobj(page, viewinfo=ViewInfo(), allow_compressed=True): - ''' pagexobj creates and returns a Form XObject for - a given view within a page (Defaults to entire page.) - ''' - inheritable = page.inheritable - resources = inheritable.Resources - mbox, bbox = getrects(inheritable, viewinfo) - contents = page.Contents - # Make sure the only attribute is length - # All the filters must have been executed - assert int(contents.Length) == len(contents.stream) - if not allow_compressed: - assert len([x for x in contents.iteritems()]) == 1 - - return _cache_xobj(contents, resources, mbox, bbox) - - -def docxobj(pageinfo, doc=None, allow_compressed=True): - ''' docxobj creates and returns an actual Form XObject. - Can work standalone, or in conjunction with - the CacheXObj class (below). - ''' - if not isinstance(pageinfo, ViewInfo): - pageinfo = ViewInfo(pageinfo) - - # If we're explicitly passed a document, - # make sure we don't have one implicitly as well. - # If no implicit or explicit doc, then read one in - # from the filename. - if doc is not None: - assert pageinfo.doc is None - pageinfo.doc = doc - elif pageinfo.doc is not None: - doc = pageinfo.doc - else: - doc = pageinfo.doc = PdfReader(pageinfo.docname, decompress = not allow_compressed) - assert isinstance(doc, PdfReader) - - sourcepage = doc.pages[(pageinfo.page or 1) - 1] - return pagexobj(sourcepage, pageinfo, allow_compressed) - - -class CacheXObj(object): - ''' Use to keep from reparsing files over and over, - and to keep from making the output too much - bigger than it ought to be by replicating - unnecessary object copies. - ''' - def __init__(self, decompress=False): - ''' Set decompress true if you need - the Form XObjects to be decompressed. - Will decompress what it can and scream - about the rest. - ''' - self.cached_pdfs = {} - self.decompress = decompress - - def load(self, sourcename): - ''' Load a Form XObject from a uri - ''' - info = ViewInfo(sourcename) - fname = info.docname - pcache = self.cached_pdfs - doc = pcache.get(fname) - if doc is None: - doc = pcache[fname] = PdfReader(fname, decompress=self.decompress) - return docxobj(info, doc, allow_compressed=not self.decompress) diff --git a/lib/pdfrw/pdfwriter.py b/lib/pdfrw/pdfwriter.py old mode 100755 new mode 100644 diff --git a/lib/pdfrw/toreportlab.py b/lib/pdfrw/toreportlab.py deleted file mode 100644 index 00ad324..0000000 --- a/lib/pdfrw/toreportlab.py +++ /dev/null @@ -1,139 +0,0 @@ -# A part of pdfrw (pdfrw.googlecode.com) -# Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas -# MIT license -- See LICENSE.txt for details - -''' -Converts pdfrw objects into reportlab objects. - -Designed for and tested with rl 2.3. - -Knows too much about reportlab internals. -What can you do? - -The interface to this function is through the makerl() function. - -Parameters: - canv - a reportlab "canvas" (also accepts a "document") - pdfobj - a pdfrw PDF object - -Returns: - A corresponding reportlab object, or if the - object is a PDF Form XObject, the name to - use with reportlab for the object. - - Will recursively convert all necessary objects. - Be careful when converting a page -- if /Parent is set, - will recursively convert all pages! - -Notes: - 1) Original objects are annotated with a - derived_rl_obj attribute which points to the - reportlab object. This keeps multiple reportlab - objects from being generated for the same pdfobj - via repeated calls to makerl. This is great for - not putting too many objects into the - new PDF, but not so good if you are modifying - objects for different pages. Then you - need to do your own deep copying (of circular - structures). You're on your own. - - 2) ReportLab seems weird about FormXObjects. - They pass around a partial name instead of the - object or a reference to it. So we have to - reach into reportlab and get a number for - a unique name. I guess this is to make it - where you can combine page streams with - impunity, but that's just a guess. - - 3) Updated 1/23/2010 to handle multipass documents - (e.g. with a table of contents). These have - a different doc object on every pass. - -''' - -from reportlab.pdfbase import pdfdoc as rldocmodule -from pdfobjects import PdfDict, PdfArray, PdfName - -RLStream = rldocmodule.PDFStream -RLDict = rldocmodule.PDFDictionary -RLArray = rldocmodule.PDFArray - - -def _makedict(rldoc, pdfobj): - rlobj = rldict = RLDict() - if pdfobj.indirect: - rlobj.__RefOnly__ = 1 - rlobj = rldoc.Reference(rlobj) - pdfobj.derived_rl_obj[rldoc] = rlobj, None - - for key, value in pdfobj.iteritems(): - rldict[key[1:]] = makerl_recurse(rldoc, value) - - return rlobj - -def _makestream(rldoc, pdfobj, xobjtype=PdfName.XObject): - rldict = RLDict() - rlobj = RLStream(rldict, pdfobj.stream) - - if pdfobj.Type == xobjtype: - shortname = 'pdfrw_%s' % (rldoc.objectcounter+1) - fullname = rldoc.getXObjectName(shortname) - else: - shortname = fullname = None - result = rldoc.Reference(rlobj, fullname) - pdfobj.derived_rl_obj[rldoc] = result, shortname - - for key, value in pdfobj.iteritems(): - rldict[key[1:]] = makerl_recurse(rldoc, value) - - return result - -def _makearray(rldoc, pdfobj): - rlobj = rlarray = RLArray([]) - if pdfobj.indirect: - rlobj.__RefOnly__ = 1 - rlobj = rldoc.Reference(rlobj) - pdfobj.derived_rl_obj[rldoc] = rlobj, None - - mylist = rlarray.sequence - for value in pdfobj: - mylist.append(makerl_recurse(rldoc, value)) - - return rlobj - -def _makestr(rldoc, pdfobj): - assert isinstance(pdfobj, (float, int, str)), repr(pdfobj) - return pdfobj - -def makerl_recurse(rldoc, pdfobj): - docdict = getattr(pdfobj, 'derived_rl_obj', None) - if docdict is not None: - value = docdict.get(rldoc) - if value is not None: - return value[0] - if isinstance(pdfobj, PdfDict): - if pdfobj.stream is not None: - func = _makestream - else: - func = _makedict - if docdict is None: - pdfobj.private.derived_rl_obj = {} - elif isinstance(pdfobj, PdfArray): - func = _makearray - if docdict is None: - pdfobj.derived_rl_obj = {} - else: - func = _makestr - return func(rldoc, pdfobj) - -def makerl(canv, pdfobj): - try: - rldoc = canv._doc - except AttributeError: - rldoc = canv - rlobj = makerl_recurse(rldoc, pdfobj) - try: - name = pdfobj.derived_rl_obj[rldoc][1] - except AttributeError: - name = None - return name or rlobj diff --git a/lib/sounds.py b/lib/sounds.py deleted file mode 100644 index a4bf5b6..0000000 --- a/lib/sounds.py +++ /dev/null @@ -1 +0,0 @@ -import parser -- cgit v1.3