/*****************************************************************************
Copyright (c) 2001, 2002 Zope Foundation and Contributors.
All Rights Reserved.
This software is subject to the provisions of the Zope Public License,
Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
FOR A PARTICULAR PURPOSE
****************************************************************************/
/*
Objects are stored under three different regimes:
Regime 1: Persistent Classes
Persistent Classes are part of ZClasses. They are stored in the
self->data dictionary, and are never garbage collected.
The klass_items() method returns a sequence of (oid,object) tuples for
every Persistent Class, which should make it possible to implement
garbage collection in Python if necessary.
Regime 2: Ghost Objects
There is no benefit to keeping a ghost object which has no external
references, therefore a weak reference scheme is used to ensure that
ghost objects are removed from memory as soon as possible, when the
last external reference is lost.
Ghost objects are stored in the self->data dictionary. Normally a
dictionary keeps a strong reference on its values, however this
reference count is 'stolen'.
This weak reference scheme leaves a dangling reference, in the
dictionary, when the last external reference is lost. To clean up this
dangling reference the persistent object dealloc function calls
self->cache->_oid_unreferenced(self->oid). The cache looks up the oid
in the dictionary, ensures it points to an object whose reference
count is zero, then removes it from the dictionary. Before removing
the object from the dictionary it must temporarily resurrect the
object in much the same way that class instances are resurrected
before their __del__ is called.
Since ghost objects are stored under a different regime to non-ghost
objects, an extra ghostify function in cPersistenceAPI replaces
self->state=GHOST_STATE assignments that were common in other
persistent classes (such as BTrees).
Regime 3: Non-Ghost Objects
Non-ghost objects are stored in two data structures: the dictionary
mapping oids to objects and a doubly-linked list that encodes the
order in which the objects were accessed. The dictionary reference is
borrowed, as it is for ghosts. The list reference is a new reference;
the list stores recently used objects, even if they are otherwise
unreferenced, to avoid loading the object from the database again.
The doubly-link-list nodes contain next and previous pointers linking
together the cache and all non-ghost persistent objects.
The node embedded in the cache is the home position. On every
attribute access a non-ghost object will relink itself just behind the
home position in the ring. Objects accessed least recently will
eventually find themselves positioned after the home position.
Occasionally other nodes are temporarily inserted in the ring as
position markers. The cache contains a ring_lock flag which must be
set and unset before and after doing so. Only if the flag is unset can
the cache assume that all nodes are either his own home node, or nodes
from persistent objects. This assumption is useful during the garbage
collection process.
The number of non-ghost objects is counted in self->non_ghost_count.
The garbage collection process consists of traversing the ring, and
deactivating (that is, turning into a ghost) every object until
self->non_ghost_count is down to the target size, or until it
reaches the home position again.
Note that objects in the sticky or changed states are still kept in
the ring, however they can not be deactivated. The garbage collection
process must skip such objects, rather than deactivating them.
*/
static char cPickleCache_doc_string[] =
"Defines the PickleCache used by ZODB Connection objects.\n"
"\n"
"$Id$\n";
#define DONT_USE_CPERSISTENCECAPI
#include "cPersistence.h"
#include "structmember.h"
#include <time.h>
#include <stddef.h>
#undef Py_FindMethod
/* Python string objects to speed lookups; set by module init. */
static PyObject *py__p_changed;
static PyObject *py__p_deactivate;
static PyObject *py__p_jar;
static PyObject *py__p_oid;
static cPersistenceCAPIstruct *cPersistenceCAPI;
/* This object is the pickle cache. The CACHE_HEAD macro guarantees
that layout of this struct is the same as the start of
ccobject_head in cPersistence.c */
typedef struct
{
CACHE_HEAD
int klass_count; /* count of persistent classes */
PyObject *data; /* oid -> object dict */
PyObject *jar; /* Connection object */
int cache_size; /* target number of items in cache */
Py_ssize_t cache_size_bytes; /* target total estimated size of
items in cache */
/* Most of the time the ring contains only:
* many nodes corresponding to persistent objects
* one 'home' node from the cache.
In some cases it is handy to temporarily add other types
of node into the ring as placeholders. 'ring_lock' is a boolean
indicating that someone has already done this. Currently this
is only used by the garbage collection code. */
int ring_lock;
/* 'cache_drain_resistance' controls how quickly the cache size will drop
when it is smaller than the configured size. A value of zero means it
will not drop below the configured size (suitable for most caches).
Otherwise, it will remove cache_non_ghost_count/cache_drain_resistance
items from the cache every time (suitable for rarely used caches, such
as those associated with Zope versions. */
int cache_drain_resistance;
} ccobject;
static int cc_ass_sub(ccobject *self, PyObject *key, PyObject *v);
/* ---------------------------------------------------------------- */
#define OBJECT_FROM_RING(SELF, HERE) \
((cPersistentObject *)(((char *)here) - offsetof(cPersistentObject, ring)))
/* Insert self into the ring, following after. */
static void
insert_after(CPersistentRing *self, CPersistentRing *after)
{
assert(self != NULL);
assert(after != NULL);
self->r_prev = after;
self->r_next = after->r_next;
after->r_next->r_prev = self;
after->r_next = self;
}
/* Remove self from the ring. */
static void
unlink_from_ring(CPersistentRing *self)
{
assert(self != NULL);
self->r_prev->r_next = self->r_next;
self->r_next->r_prev = self->r_prev;
}
static int
scan_gc_items(ccobject *self, int target, Py_ssize_t target_bytes)
{
/* This function must only be called with the ring lock held,
because it places non-object placeholders in the ring.
*/
cPersistentObject *object;
CPersistentRing *here;
CPersistentRing before_original_home;
int result = -1; /* guilty until proved innocent */
/* Scan the ring, from least to most recently used, deactivating
* up-to-date objects, until we either find the ring_home again or
* or we've ghosted enough objects to reach the target size.
* Tricky: __getattr__ and __del__ methods can do anything, and in
* particular if we ghostify an object with a __del__ method, that method
* can load the object again, putting it back into the MRU part of the
* ring. Waiting to find ring_home again can thus cause an infinite
* loop (Collector #1208). So before_original_home records the MRU
* position we start with, and we stop the scan when we reach that.
*/
insert_after(&before_original_home, self->ring_home.r_prev);
here = self->ring_home.r_next; /* least recently used object */
/* All objects should be deactivated when the objects count parameter
* (target) is zero and the size limit parameter in bytes(target_bytes)
* is also zero.
*
* Otherwise the objects should be collect while one of the following
* conditions are True:
* - the ghost count is bigger than the number of objects limit(target).
* - the estimated size in bytes is bigger than the size limit in
* bytes(target_bytes).
*/
while (here != &before_original_home &&
(
(!target && !target_bytes) ||
(
(target && self->non_ghost_count > target) ||
(target_bytes && self->total_estimated_size > target_bytes)
)
)
)
{
assert(self->ring_lock);
assert(here != &self->ring_home);
/* At this point we know that the ring only contains nodes
from persistent objects, plus our own home node. We know
this because the ring lock is held. We can safely assume
the current ring node is a persistent object now we know it
is not the home */
object = OBJECT_FROM_RING(self, here);
if (object->state == cPersistent_UPTODATE_STATE)
{
CPersistentRing placeholder;
PyObject *method;
PyObject *temp;
int error_occurred = 0;
/* deactivate it. This is the main memory saver. */
/* Add a placeholder, a dummy node in the ring. We need
to do this to mark our position in the ring. It is
possible that the PyObject_GetAttr() call below will
invoke a __getattr__() hook in Python. Also possible
that deactivation will lead to a __del__ method call.
So another thread might run, and mutate the ring as a side
effect of object accesses. There's no predicting then where
in the ring here->next will point after that. The
placeholder won't move as a side effect of calling Python
code.
*/
insert_after(&placeholder, here);
method = PyObject_GetAttr((PyObject *)object, py__p_deactivate);
if (method == NULL)
error_occurred = 1;
else
{
temp = PyObject_CallObject(method, NULL);
Py_DECREF(method);
if (temp == NULL)
error_occurred = 1;
else
Py_DECREF(temp);
}
here = placeholder.r_next;
unlink_from_ring(&placeholder);
if (error_occurred)
goto Done;
}
else
here = here->r_next;
}
result = 0;
Done:
unlink_from_ring(&before_original_home);
return result;
}
static PyObject *
lockgc(ccobject *self, int target_size, Py_ssize_t target_size_bytes)
{
/* This is thread-safe because of the GIL, and there's nothing
* in between checking the ring_lock and acquiring it that calls back
* into Python.
*/
if (self->ring_lock)
{
Py_INCREF(Py_None);
return Py_None;
}
self->ring_lock = 1;
if (scan_gc_items(self, target_size, target_size_bytes) < 0)
{
self->ring_lock = 0;
return NULL;
}
self->ring_lock = 0;
Py_INCREF(Py_None);
return Py_None;
}
static PyObject *
cc_incrgc(ccobject *self, PyObject *args)
{
int obsolete_arg = -999;
int starting_size = self->non_ghost_count;
int target_size = self->cache_size;
Py_ssize_t target_size_bytes = self->cache_size_bytes;
if (self->cache_drain_resistance >= 1)
{
/* This cache will gradually drain down to a small size. Check
a (small) number of objects proportional to the current size */
int target_size_2 = (starting_size - 1
- starting_size / self->cache_drain_resistance);
if (target_size_2 < target_size)
target_size = target_size_2;
}
if (!PyArg_ParseTuple(args, "|i:incrgc", &obsolete_arg))
return NULL;
if (obsolete_arg != -999
&&
(PyErr_Warn(PyExc_DeprecationWarning,
"No argument expected")
< 0))
return NULL;
return lockgc(self, target_size, target_size_bytes);
}
static PyObject *
cc_full_sweep(ccobject *self, PyObject *args)
{
int dt = -999;
/* TODO: This should be deprecated; */
if (!PyArg_ParseTuple(args, "|i:full_sweep", &dt))
return NULL;
if (dt == -999)
return lockgc(self, 0, 0);
else
return cc_incrgc(self, args);
}
Loading ...