Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

aroundthecode / SQLAlchemy   python

Repository URL to install this package:

Version: 1.2.10 

/ ext / horizontal_shard.py

# ext/horizontal_shard.py
# Copyright (C) 2005-2018 the SQLAlchemy authors and contributors
# <see AUTHORS file>
#
# This module is part of SQLAlchemy and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php

"""Horizontal sharding support.

Defines a rudimental 'horizontal sharding' system which allows a Session to
distribute queries and persistence operations across multiple databases.

For a usage example, see the :ref:`examples_sharding` example included in
the source distribution.

"""

from .. import inspect
from .. import util
from ..orm.session import Session
from ..orm.query import Query

__all__ = ['ShardedSession', 'ShardedQuery']


class ShardedQuery(Query):
    def __init__(self, *args, **kwargs):
        super(ShardedQuery, self).__init__(*args, **kwargs)
        self.id_chooser = self.session.id_chooser
        self.query_chooser = self.session.query_chooser
        self._shard_id = None

    def set_shard(self, shard_id):
        """return a new query, limited to a single shard ID.

        all subsequent operations with the returned query will
        be against the single shard regardless of other state.
        """

        q = self._clone()
        q._shard_id = shard_id
        return q

    def _execute_and_instances(self, context):
        def iter_for_shard(shard_id):
            context.attributes['shard_id'] = context.identity_token = shard_id
            result = self._connection_from_session(
                mapper=self._mapper_zero(),
                shard_id=shard_id).execute(
                context.statement,
                self._params)
            return self.instances(result, context)

        if context.identity_token is not None:
            return iter_for_shard(context.identity_token)
        elif self._shard_id is not None:
            return iter_for_shard(self._shard_id)
        else:
            partial = []
            for shard_id in self.query_chooser(self):
                partial.extend(iter_for_shard(shard_id))

            # if some kind of in memory 'sorting'
            # were done, this is where it would happen
            return iter(partial)

    def _identity_lookup(
            self, mapper, primary_key_identity, identity_token=None,
            lazy_loaded_from=None, **kw):
        """override the default Query._identity_lookup method so that we
        search for a given non-token primary key identity across all
        possible identity tokens (e.g. shard ids).

        """

        if identity_token is not None:
            return super(ShardedQuery, self)._identity_lookup(
                mapper, primary_key_identity,
                identity_token=identity_token, **kw
            )
        else:
            q = self.session.query(mapper)
            if lazy_loaded_from:
                q = q._set_lazyload_from(lazy_loaded_from)
            for shard_id in self.id_chooser(q, primary_key_identity):
                obj = super(ShardedQuery, self)._identity_lookup(
                    mapper, primary_key_identity, identity_token=shard_id, **kw
                )
                if obj is not None:
                    return obj

            return None

    def _get_impl(
            self, primary_key_identity, db_load_fn, identity_token=None):
        """Override the default Query._get_impl() method so that we emit
        a query to the DB for each possible identity token, if we don't
        have one already.

        """
        def _db_load_fn(query, primary_key_identity):
            # load from the database.  The original db_load_fn will
            # use the given Query object to load from the DB, so our
            # shard_id is what will indicate the DB that we query from.
            if self._shard_id is not None:
                return db_load_fn(self, primary_key_identity)
            else:
                ident = util.to_list(primary_key_identity)
                # build a ShardedQuery for each shard identifier and
                # try to load from the DB
                for shard_id in self.id_chooser(self, ident):
                    q = self.set_shard(shard_id)
                    o = db_load_fn(q, ident)
                    if o is not None:
                        return o
                else:
                    return None

        if identity_token is None and self._shard_id is not None:
            identity_token = self._shard_id

        return super(ShardedQuery, self)._get_impl(
            primary_key_identity, _db_load_fn, identity_token=identity_token)


class ShardedSession(Session):
    def __init__(self, shard_chooser, id_chooser, query_chooser, shards=None,
                 query_cls=ShardedQuery, **kwargs):
        """Construct a ShardedSession.

        :param shard_chooser: A callable which, passed a Mapper, a mapped
          instance, and possibly a SQL clause, returns a shard ID.  This id
          may be based off of the attributes present within the object, or on
          some round-robin scheme. If the scheme is based on a selection, it
          should set whatever state on the instance to mark it in the future as
          participating in that shard.

        :param id_chooser: A callable, passed a query and a tuple of identity
          values, which should return a list of shard ids where the ID might
          reside.  The databases will be queried in the order of this listing.

        :param query_chooser: For a given Query, returns the list of shard_ids
          where the query should be issued.  Results from all shards returned
          will be combined together into a single listing.

        :param shards: A dictionary of string shard names
          to :class:`~sqlalchemy.engine.Engine` objects.

        """
        super(ShardedSession, self).__init__(query_cls=query_cls, **kwargs)
        self.shard_chooser = shard_chooser
        self.id_chooser = id_chooser
        self.query_chooser = query_chooser
        self.__binds = {}
        self.connection_callable = self.connection
        if shards is not None:
            for k in shards:
                self.bind_shard(k, shards[k])

    def _choose_shard_and_assign(self, mapper, instance, **kw):
        if instance is not None:
            state = inspect(instance)
            if state.key:
                token = state.key[2]
                assert token is not None
                return token
            elif state.identity_token:
                return state.identity_token

        shard_id = self.shard_chooser(mapper, instance, **kw)
        if instance is not None:
            state.identity_token = shard_id
        return shard_id

    def connection(self, mapper=None, instance=None, shard_id=None, **kwargs):
        if shard_id is None:
            shard_id = self._choose_shard_and_assign(mapper, instance)

        if self.transaction is not None:
            return self.transaction.connection(mapper, shard_id=shard_id)
        else:
            return self.get_bind(
                mapper,
                shard_id=shard_id,
                instance=instance
            ).contextual_connect(**kwargs)

    def get_bind(self, mapper, shard_id=None,
                 instance=None, clause=None, **kw):
        if shard_id is None:
            shard_id = self._choose_shard_and_assign(
                mapper, instance, clause=clause)
        return self.__binds[shard_id]

    def bind_shard(self, shard_id, bind):
        self.__binds[shard_id] = bind