Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

edgify / persistent   python

Repository URL to install this package:

/ cPickleCache.c

/*****************************************************************************

  Copyright (c) 2001, 2002 Zope Foundation and Contributors.
  All Rights Reserved.

  This software is subject to the provisions of the Zope Public License,
  Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
  THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
  WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
  FOR A PARTICULAR PURPOSE

****************************************************************************/

/*

  Objects are stored under three different regimes:

  Regime 1: Persistent Classes

  Persistent Classes are part of ZClasses. They are stored in the
  self->data dictionary, and are never garbage collected.

  The klass_items() method returns a sequence of (oid,object) tuples for
  every Persistent Class, which should make it possible to implement
  garbage collection in Python if necessary.

  Regime 2: Ghost Objects

  There is no benefit to keeping a ghost object which has no external
  references, therefore a weak reference scheme is used to ensure that
  ghost objects are removed from memory as soon as possible, when the
  last external reference is lost.

  Ghost objects are stored in the self->data dictionary. Normally a
  dictionary keeps a strong reference on its values, however this
  reference count is 'stolen'.

  This weak reference scheme leaves a dangling reference, in the
  dictionary, when the last external reference is lost. To clean up this
  dangling reference the persistent object dealloc function calls
  self->cache->_oid_unreferenced(self->oid). The cache looks up the oid
  in the dictionary, ensures it points to an object whose reference
  count is zero, then removes it from the dictionary. Before removing
  the object from the dictionary it must temporarily resurrect the
  object in much the same way that class instances are resurrected
  before their __del__ is called.

  Since ghost objects are stored under a different regime to non-ghost
  objects, an extra ghostify function in cPersistenceAPI replaces
  self->state=GHOST_STATE assignments that were common in other
  persistent classes (such as BTrees).

  Regime 3: Non-Ghost Objects

  Non-ghost objects are stored in two data structures: the dictionary
  mapping oids to objects and a doubly-linked list that encodes the
  order in which the objects were accessed.  The dictionary reference is
  borrowed, as it is for ghosts.  The list reference is a new reference;
  the list stores recently used objects, even if they are otherwise
  unreferenced, to avoid loading the object from the database again.

  The doubly-link-list nodes contain next and previous pointers linking
  together the cache and all non-ghost persistent objects.

  The node embedded in the cache is the home position. On every
  attribute access a non-ghost object will relink itself just behind the
  home position in the ring. Objects accessed least recently will
  eventually find themselves positioned after the home position.

  Occasionally other nodes are temporarily inserted in the ring as
  position markers. The cache contains a ring_lock flag which must be
  set and unset before and after doing so. Only if the flag is unset can
  the cache assume that all nodes are either his own home node, or nodes
  from persistent objects. This assumption is useful during the garbage
  collection process.

  The number of non-ghost objects is counted in self->non_ghost_count.
  The garbage collection process consists of traversing the ring, and
  deactivating (that is, turning into a ghost) every object until
  self->non_ghost_count is down to the target size, or until it
  reaches the home position again.

  Note that objects in the sticky or changed states are still kept in
  the ring, however they can not be deactivated. The garbage collection
  process must skip such objects, rather than deactivating them.

*/

static char cPickleCache_doc_string[] =
  "Defines the PickleCache used by ZODB Connection objects.\n"
  "\n"
  "$Id$\n";

#define DONT_USE_CPERSISTENCECAPI
#include "cPersistence.h"
#include "structmember.h"
#include <time.h>
#include <stddef.h>
#undef Py_FindMethod


/* Python string objects to speed lookups; set by module init. */
static PyObject *py__p_changed;
static PyObject *py__p_deactivate;
static PyObject *py__p_jar;
static PyObject *py__p_oid;

static cPersistenceCAPIstruct *cPersistenceCAPI;

/* This object is the pickle cache.  The CACHE_HEAD macro guarantees
   that layout of this struct is the same as the start of
   ccobject_head in cPersistence.c */
typedef struct
{
    CACHE_HEAD
    int klass_count;                     /* count of persistent classes */
    PyObject *data;                      /* oid -> object dict */
    PyObject *jar;                       /* Connection object */
    int cache_size;                      /* target number of items in cache */
    Py_ssize_t cache_size_bytes;       /* target total estimated size of
                                            items in cache */

    /* Most of the time the ring contains only:
    * many nodes corresponding to persistent objects
    * one 'home' node from the cache.
    In some cases it is handy to temporarily add other types
    of node into the ring as placeholders. 'ring_lock' is a boolean
    indicating that someone has already done this. Currently this
    is only used by the garbage collection code. */

    int ring_lock;

    /* 'cache_drain_resistance' controls how quickly the cache size will drop
        when it is smaller than the configured size. A value of zero means it
        will not drop below the configured size (suitable for most caches).
        Otherwise, it will remove cache_non_ghost_count/cache_drain_resistance
        items from the cache every time (suitable for rarely used caches, such
        as those associated with Zope versions. */

    int cache_drain_resistance;

} ccobject;

static int cc_ass_sub(ccobject *self, PyObject *key, PyObject *v);

/* ---------------------------------------------------------------- */

#define OBJECT_FROM_RING(SELF, HERE)                                    \
  ((cPersistentObject *)(((char *)here) - offsetof(cPersistentObject, ring)))

/* Insert self into the ring, following after. */
static void
insert_after(CPersistentRing *self, CPersistentRing *after)
{
    assert(self != NULL);
    assert(after != NULL);
    self->r_prev = after;
    self->r_next = after->r_next;
    after->r_next->r_prev = self;
    after->r_next = self;
}

/* Remove self from the ring. */
static void
unlink_from_ring(CPersistentRing *self)
{
    assert(self != NULL);
    self->r_prev->r_next = self->r_next;
    self->r_next->r_prev = self->r_prev;
}

static int
scan_gc_items(ccobject *self, int target, Py_ssize_t target_bytes)
{
    /* This function must only be called with the ring lock held,
        because it places non-object placeholders in the ring.
    */
    cPersistentObject *object;
    CPersistentRing *here;
    CPersistentRing before_original_home;
    int result = -1;   /* guilty until proved innocent */

    /* Scan the ring, from least to most recently used, deactivating
    * up-to-date objects, until we either find the ring_home again or
    * or we've ghosted enough objects to reach the target size.
    * Tricky:  __getattr__ and __del__ methods can do anything, and in
    * particular if we ghostify an object with a __del__ method, that method
    * can load the object again, putting it back into the MRU part of the
    * ring.  Waiting to find ring_home again can thus cause an infinite
    * loop (Collector #1208).  So before_original_home records the MRU
    * position we start with, and we stop the scan when we reach that.
    */
    insert_after(&before_original_home, self->ring_home.r_prev);
    here = self->ring_home.r_next;   /* least recently used object */
    /* All objects should be deactivated when the objects count parameter
     * (target) is zero and the size limit parameter in bytes(target_bytes)
     * is also zero.
     *
     * Otherwise the objects should be collect while one of the following
     * conditions are True:
     *  - the ghost count is bigger than the number of objects limit(target).
     *  - the estimated size in bytes is bigger than the size limit in
     *    bytes(target_bytes).
     */
    while (here != &before_original_home &&
            (
             (!target && !target_bytes) ||
             (
              (target && self->non_ghost_count > target) ||
              (target_bytes && self->total_estimated_size > target_bytes)
             )
            )
          )
    {
        assert(self->ring_lock);
        assert(here != &self->ring_home);

        /* At this point we know that the ring only contains nodes
            from persistent objects, plus our own home node.  We know
            this because the ring lock is held.  We can safely assume
            the current ring node is a persistent object now we know it
            is not the home */
        object = OBJECT_FROM_RING(self, here);

        if (object->state == cPersistent_UPTODATE_STATE)
        {
            CPersistentRing placeholder;
            PyObject *method;
            PyObject *temp;
            int error_occurred = 0;
            /* deactivate it. This is the main memory saver. */

            /* Add a placeholder, a dummy node in the ring.  We need
                to do this to mark our position in the ring.  It is
                possible that the PyObject_GetAttr() call below will
                invoke a __getattr__() hook in Python.  Also possible
                that deactivation will lead to a __del__ method call.
                So another thread might run, and mutate the ring as a side
                effect of object accesses.  There's no predicting then where
                in the ring here->next will point after that.  The
                placeholder won't move as a side effect of calling Python
                code.
            */
            insert_after(&placeholder, here);
            method = PyObject_GetAttr((PyObject *)object, py__p_deactivate);
            if (method == NULL)
                error_occurred = 1;
            else
            {
                temp = PyObject_CallObject(method, NULL);
                Py_DECREF(method);
                if (temp == NULL)
                    error_occurred = 1;
                else
                    Py_DECREF(temp);
            }

            here = placeholder.r_next;
            unlink_from_ring(&placeholder);
            if (error_occurred)
                goto Done;
        }
        else
            here = here->r_next;
    }
    result = 0;
Done:
    unlink_from_ring(&before_original_home);
    return result;
}

static PyObject *
lockgc(ccobject *self, int target_size, Py_ssize_t target_size_bytes)
{
    /* This is thread-safe because of the GIL, and there's nothing
    * in between checking the ring_lock and acquiring it that calls back
    * into Python.
    */
    if (self->ring_lock)
    {
        Py_INCREF(Py_None);
        return Py_None;
    }

    self->ring_lock = 1;
    if (scan_gc_items(self, target_size, target_size_bytes) < 0)
    {
        self->ring_lock = 0;
        return NULL;
    }
    self->ring_lock = 0;

    Py_INCREF(Py_None);
    return Py_None;
}

static PyObject *
cc_incrgc(ccobject *self, PyObject *args)
{
    int obsolete_arg = -999;
    int starting_size = self->non_ghost_count;
    int target_size = self->cache_size;
    Py_ssize_t target_size_bytes = self->cache_size_bytes;

    if (self->cache_drain_resistance >= 1)
    {
        /* This cache will gradually drain down to a small size. Check
            a (small) number of objects proportional to the current size */

        int target_size_2 = (starting_size - 1
                            - starting_size / self->cache_drain_resistance);
        if (target_size_2 < target_size)
            target_size = target_size_2;
    }


    if (!PyArg_ParseTuple(args, "|i:incrgc", &obsolete_arg))
        return NULL;

    if (obsolete_arg != -999
        &&
        (PyErr_Warn(PyExc_DeprecationWarning,
                    "No argument expected")
        < 0))
        return NULL;

    return lockgc(self, target_size, target_size_bytes);
}

static PyObject *
cc_full_sweep(ccobject *self, PyObject *args)
{
    int dt = -999;

    /* TODO:  This should be deprecated;  */

    if (!PyArg_ParseTuple(args, "|i:full_sweep", &dt))
        return NULL;
    if (dt == -999)
        return lockgc(self, 0, 0);
    else
        return cc_incrgc(self, args);
}
Loading ...