From 9e69adbe1b065707f8be4f146cc3c05660cef711 Mon Sep 17 00:00:00 2001
From: jvoisin
Date: Tue, 21 Jun 2011 20:41:18 +0200
Subject: Add pdfrw, and many files that I have forgetten, sorry !

---
 lib/pdfrw/pdfwriter.py | 234 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 234 insertions(+)
 create mode 100755 lib/pdfrw/pdfwriter.py

(limited to 'lib/pdfrw/pdfwriter.py')

diff --git a/lib/pdfrw/pdfwriter.py b/lib/pdfrw/pdfwriter.py
new file mode 100755
index 0000000..c193843
--- /dev/null
+++ b/lib/pdfrw/pdfwriter.py
@@ -0,0 +1,234 @@
+#!/usr/bin/env python
+
+# A part of pdfrw (pdfrw.googlecode.com)
+# Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas
+# MIT license -- See LICENSE.txt for details
+
+'''
+The PdfWriter class writes an entire PDF file out to disk.
+
+The writing process is not at all optimized or organized.
+
+An instance of the PdfWriter class has two methods:
+    addpage(page)
+and
+    write(fname)
+
+addpage() assumes that the pages are part of a valid
+tree/forest of PDF objects.
+'''
+
+try:
+    set
+except NameError:
+    from sets import Set as set
+
+from pdfobjects import PdfName, PdfArray, PdfDict, IndirectPdfDict, PdfObject, PdfString
+from pdfcompress import compress
+
+debug = False
+
+class FormatObjects(object):
+    ''' FormatObjects performs the actual formatting and disk write.
+    '''
+
+    def add(self, obj, visited):
+        ''' Add an object to our list, if it's an indirect
+            object.  Just format it if not.
+        '''
+        # Can't hash dicts, so just hash the object ID
+        objid = id(obj)
+
+        # Automatically set stream objects to indirect
+        if isinstance(obj, PdfDict):
+            indirect = obj.indirect or (obj.stream is not None)
+        else:
+            indirect = getattr(obj, 'indirect', False)
+
+        if not indirect:
+            assert objid not in visited, \
+                'Circular reference encountered in non-indirect object %s' % repr(obj)
+            visited.add(objid)
+            result = self.format_obj(obj, visited)
+            visited.remove(objid)
+            return result
+
+        objnum = self.indirect_dict.get(objid)
+
+        # If we haven't seen the object yet, we need to
+        # add it to the indirect object list.
+        if objnum is None:
+            objlist = self.objlist
+            objnum = len(objlist) + 1
+            if debug:
+                print '  Object', objnum, '\r',
+            objlist.append(None)
+            self.indirect_dict[objid] = objnum
+            objlist[objnum-1] = self.format_obj(obj)
+        return '%s 0 R' % objnum
+
+    def format_array(myarray, formatter):
+        # Format array data into semi-readable ASCII
+        if sum([len(x) for x in myarray]) <= 70:
+            return formatter % ' '.join(myarray)
+        bigarray = []
+        count = 1000000
+        for x in myarray:
+            lenx = len(x)
+            if lenx + count > 70:
+                subarray = []
+                bigarray.append(subarray)
+                count = 0
+            count += lenx + 1
+            subarray.append(x)
+        return formatter % '\n  '.join([' '.join(x) for x in bigarray])
+    format_array = staticmethod(format_array)
+
+    def format_obj(self, obj, visited=None):
+        ''' format PDF object data into semi-readable ASCII.
+            May mutually recurse with add() -- add() will
+            return references for indirect objects, and add
+            the indirect object to the list.
+        '''
+        if visited is None:
+            visited = set()
+        if isinstance(obj, PdfArray):
+            myarray = [self.add(x, visited) for x in obj]
+            return self.format_array(myarray, '[%s]')
+        elif isinstance(obj, PdfDict):
+            if self.compress and obj.stream:
+                compress([obj])
+            myarray = []
+            # Jython 2.2.1 has a bug which segfaults when
+            # sorting subclassed strings, so we un-subclass them.
+            dictkeys = [str(x) for x in obj.iterkeys()]
+            dictkeys.sort()
+            for key in dictkeys:
+                myarray.append(key)
+                myarray.append(self.add(obj[key], visited))
+            result = self.format_array(myarray, '<<%s>>')
+            stream = obj.stream
+            if stream is not None:
+                result = '%s\nstream\n%s\nendstream' % (result, stream)
+            return result
+        elif isinstance(obj, basestring) and not hasattr(obj, 'indirect'):
+            return PdfString.encode(obj)
+        else:
+            return str(obj)
+
+    def dump(cls, f, trailer, version='1.3', compress=True):
+        self = cls()
+        self.compress = compress
+        self.indirect_dict = {}
+        self.objlist = []
+
+        # The first format of trailer gets all the information,
+        # but we throw away the actual trailer formatting.
+        self.format_obj(trailer)
+        # Now we know the size, so we update the trailer dict
+        # and get the formatted data.
+        trailer.Size = PdfObject(len(self.objlist) + 1)
+        trailer = self.format_obj(trailer)
+
+        # Now we have all the pieces to write out to the file.
+        # Keep careful track of the counts while we do it so
+        # we can correctly build the cross-reference.
+
+        header = '%%PDF-%s\n%%\xe2\xe3\xcf\xd3\n' % version
+        f.write(header)
+        offset = len(header)
+        offsets = [(0, 65535, 'f')]
+
+        for i, x in enumerate(self.objlist):
+            objstr = '%s 0 obj\n%s\nendobj\n' % (i + 1, x)
+            offsets.append((offset, 0, 'n'))
+            offset += len(objstr)
+            f.write(objstr)
+
+        f.write('xref\n0 %s\n' % len(offsets))
+        for x in offsets:
+            f.write('%010d %05d %s\r\n' % x)
+        f.write('trailer\n\n%s\nstartxref\n%s\n%%%%EOF\n' % (trailer, offset))
+    dump = classmethod(dump)
+
+class PdfWriter(object):
+
+    _trailer = None
+
+    def __init__(self, version='1.3', compress=True):
+        self.pagearray = PdfArray()
+        self.compress = compress
+        self.version = version
+
+    def addpage(self, page):
+        self._trailer = None
+        assert page.Type == PdfName.Page
+        inheritable = page.inheritable # searches for resources
+        self.pagearray.append(
+            IndirectPdfDict(
+                page,
+                Resources = inheritable.Resources,
+                MediaBox = inheritable.MediaBox,
+                CropBox = inheritable.CropBox,
+                Rotate = inheritable.Rotate,
+            )
+        )
+        return self
+
+    addPage = addpage  # for compatibility with pyPdf
+
+    def addpages(self, pagelist):
+        for page in pagelist:
+            self.addpage(page)
+        return self
+
+    def _get_trailer(self):
+        trailer = self._trailer
+        if trailer is not None:
+            return trailer
+
+        # Create the basic object structure of the PDF file
+        trailer = PdfDict(
+            Root = IndirectPdfDict(
+                Type = PdfName.Catalog,
+                Pages = IndirectPdfDict(
+                    Type = PdfName.Pages,
+                    Count = PdfObject(len(self.pagearray)),
+                    Kids = self.pagearray
+                )
+            )
+        )
+        # Make all the pages point back to the page dictionary
+        pagedict = trailer.Root.Pages
+        for page in pagedict.Kids:
+            page.Parent = pagedict
+        self._trailer = trailer
+        return trailer
+
+    def _set_trailer(self, trailer):
+        self._trailer = trailer
+
+    trailer = property(_get_trailer, _set_trailer)
+
+    def write(self, fname, trailer=None):
+        trailer = trailer or self.trailer
+
+        # Dump the data.  We either have a filename or a preexisting
+        # file object.
+        preexisting = hasattr(fname, 'write')
+        f = preexisting and fname or open(fname, 'wb')
+        FormatObjects.dump(f, trailer, self.version, self.compress)
+        if not preexisting:
+            f.close()
+
+if __name__ == '__main__':
+    debug = True
+    import pdfreader
+    x = pdfreader.PdfReader('source.pdf')
+    y = PdfWriter()
+    for i, page in enumerate(x.pages):
+        print '  Adding page', i+1, '\r',
+        y.addpage(page)
+    print
+    y.write('result.pdf')
+    print
-- 
cgit v1.3