Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
pdfrw / pagemerge.py
Size: Mime:
# A part of pdfrw (https://github.com/pmaupin/pdfrw)
# Copyright (C) 2015 Patrick Maupin, Austin, Texas
# MIT license -- See LICENSE.txt for details

'''
This module contains code to edit pages.  Sort of a canvas, I
suppose, but I wouldn't want to call it that and get people all
excited or anything.

No, this is just for doing basic things like merging/splitting
apart pages, watermarking, etc.  All it does is allow converting
pages (or parts of pages) into Form XObject rectangles, and then
plopping those down on new or pre-existing pages.
'''

from .objects import PdfDict, PdfArray, PdfName
from .buildxobj import pagexobj, ViewInfo

NullInfo = ViewInfo()


class RectXObj(PdfDict):
    ''' This class facilitates doing positioning (moving and scaling)
        of Form XObjects within their containing page, by modifying
        the Form XObject's transformation matrix.

        By default, this class keeps the aspect ratio locked.  For
        example, if your object is foo, you can write 'foo.w = 200',
        and it will scale in both the x and y directions.

        To unlock the aspect ration, you have to do a tiny bit of math
        and call the scale function.
    '''
    def __init__(self, page, viewinfo=NullInfo, **kw):
        ''' The page is a page returned by PdfReader.  It will be
            turned into a cached Form XObject (so that multiple
            rectangles can be extracted from it if desired), and then
            another Form XObject will be built using it and the viewinfo
            (which should be a ViewInfo class).  The viewinfo includes
            source coordinates (from the top/left) and rotation information.

            Once the object has been built, its destination coordinates
            may be examined and manipulated by using x, y, w, h, and
            scale.  The destination coordinates are in the normal
            PDF programmatic system (starting at bottom left).
        '''
        if kw:
            if viewinfo is not NullInfo:
                raise ValueError("Cannot modify preexisting ViewInfo")
            viewinfo = ViewInfo(**kw)
        viewinfo.cacheable = False
        base = pagexobj(page, viewinfo)
        self.update(base)
        self.indirect = True
        self.stream = base.stream
        private = self.private
        private._rect = [base.x, base.y, base.w, base.h]
        matrix = self.Matrix
        if matrix is None:
            matrix = self.Matrix = PdfArray((1, 0, 0, 1, 0, 0))
        private._matrix = matrix  # Lookup optimization
        # Default to lower-left corner
        self.x = 0
        self.y = 0

    @property
    def x(self):
        ''' X location (from left) of object in points
        '''
        return self._rect[0]

    @property
    def y(self):
        ''' Y location (from bottom) of object in points
        '''
        return self._rect[1]

    @property
    def w(self):
        ''' Width of object in points
        '''
        return self._rect[2]

    @property
    def h(self):
        ''' Height of object in points
        '''
        return self._rect[3]

    def __setattr__(self, name, value, next=PdfDict.__setattr__,
                    mine=set('x y w h'.split())):
        ''' The underlying __setitem__ won't let us use a property
            setter, so we have to fake one.
        '''
        if name not in mine:
            return next(self, name, value)
        if name in 'xy':
            r_index, m_index = (0, 4) if name == 'x' else (1, 5)
            self._rect[r_index], old = value, self._rect[r_index]
            self._matrix[m_index] += value - old
        else:
            index = 2 + (value == 'h')
            self.scale(value / self._rect[index])

    def scale(self, x_scale, y_scale=None):
        ''' Current scaling deals properly with things that
            have been rotated in 90 degree increments
            (via the ViewMerge object given when instantiating).
        '''
        if y_scale is None:
            y_scale = x_scale
        x, y, w, h = rect = self._rect
        ao, bo, co, do, eo, fo = matrix = self._matrix
        an = ao * x_scale
        bn = bo * y_scale
        cn = co * x_scale
        dn = do * y_scale
        en = x + (eo - x) * 1.0 * (an + cn) / (ao + co)
        fn = y + (fo - y) * 1.0 * (bn + dn) / (bo + do)
        matrix[:] = an, bn, cn, dn, en, fn
        rect[:] = x, y, w * x_scale, h * y_scale

    @property
    def box(self):
        ''' Return the bounding box for the object
        '''
        x, y, w, h = self._rect
        return PdfArray([x, y, x + w, y + h])


class PageMerge(list):
    ''' A PageMerge object can have 0 or 1 underlying pages
        (that get edited with the results of the merge)
        and 0-n RectXObjs that can be applied before or
        after the underlying page.
    '''
    page = None
    mbox = None
    cbox = None
    resources = None
    rotate = None
    contents = None

    def __init__(self, page=None):
        if page is not None:
            self.setpage(page)

    def setpage(self, page):
        if page.Type != PdfName.Page:
            raise TypeError("Expected page")
        self.append(None)  # Placeholder
        self.page = page
        inheritable = page.inheritable
        self.mbox = inheritable.MediaBox
        self.cbox = inheritable.CropBox
        self.resources = inheritable.Resources
        self.rotate = inheritable.Rotate
        self.contents = page.Contents

    def __add__(self, other):
        if isinstance(other, dict):
            other = [other]
        for other in other:
            self.add(other)
        return self

    def add(self, obj, prepend=False, **kw):
        if kw:
            obj = RectXObj(obj, **kw)
        elif obj.Type == PdfName.Page:
            obj = RectXObj(obj)
        if prepend:
            self.insert(0, obj)
        else:
            self.append(obj)
        return self

    def render(self):
        def do_xobjs(xobj_list, restore_first=False):
            content = ['Q'] if restore_first else []
            for obj in xobj_list:
                index = PdfName('pdfrw_%d' % (key_offset + len(xobjs)))
                if xobjs.setdefault(index, obj) is not obj:
                    raise KeyError("XObj key %s already in use" % index)
                content.append('%s Do' % index)
            return PdfDict(indirect=True, stream='\n'.join(content))

        mbox = self.mbox
        cbox = self.cbox
        page = self.page
        old_contents = self.contents
        resources = self.resources or PdfDict()

        key_offset = 0
        xobjs = resources.XObject
        if xobjs is None:
            xobjs = resources.XObject = PdfDict()
        else:
            allkeys = xobjs.keys()
            if allkeys:
                keys = (x for x in allkeys if x.startswith('/pdfrw_'))
                keys = (x for x in keys if x[7:].isdigit())
                keys = sorted(keys, key=lambda x: int(x[7:]))
                key_offset = (int(keys[-1][7:]) + 1) if keys else 0
                key_offset -= len(allkeys)

        if old_contents is None:
            new_contents = do_xobjs(self)
        else:
            isdict = isinstance(old_contents, PdfDict)
            old_contents = [old_contents] if isdict else old_contents
            new_contents = PdfArray()
            index = self.index(None)
            if index:
                new_contents.append(do_xobjs(self[:index]))

            index += 1
            if index < len(self):
                # There are elements to add after the original page contents,
                # so push the graphics state to the stack. Restored below.
                new_contents.append(PdfDict(indirect=True, stream='q'))

            new_contents.extend(old_contents)

            if index < len(self):
                # Restore graphics state and add other elements.
                new_contents.append(do_xobjs(self[index:], restore_first=True))

        if mbox is None:
            cbox = None
            mbox = self.xobj_box
            mbox[0] = min(0, mbox[0])
            mbox[1] = min(0, mbox[1])

        page = PdfDict(indirect=True) if page is None else page
        page.Type = PdfName.Page
        page.Resources = resources
        page.MediaBox = mbox
        page.CropBox = cbox
        page.Rotate = self.rotate
        page.Contents = new_contents
        return page

    @property
    def xobj_box(self):
        ''' Return the smallest box that encloses every object
            in the list.
        '''
        a, b, c, d = zip(*(xobj.box for xobj in self))
        return PdfArray((min(a), min(b), max(c), max(d)))