Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / statsmodels   python

Repository URL to install this package:

Version: 0.11.1 

/ sandbox / nonparametric / densityorthopoly.py

# -*- coding: utf-8 -*-
# some cut and paste characters are not ASCII
'''density estimation based on orthogonal polynomials


Author: Josef Perktold
Created: 2011-05017
License: BSD

2 versions work: based on Fourier, FPoly, and chebychev T, ChebyTPoly
also hermite polynomials, HPoly, works
other versions need normalization


TODO:

* check fourier case again:  base is orthonormal,
  but needs offsetfact = 0 and does not integrate to 1, rescaled looks good
* hermite: works but DensityOrthoPoly requires currently finite bounds
  I use it with offsettfactor 0.5 in example
* not implemented methods:
  - add bonafide density correction
  - add transformation to domain of polynomial base - DONE
    possible problem: what is the behavior at the boundary,
    offsetfact requires more work, check different cases, add as option
    moved to polynomial class by default, as attribute
* convert examples to test cases
* need examples with large density on boundary, beta ?
* organize poly classes in separate module, check new numpy.polynomials,
  polyvander
* MISE measures, order selection, ...

enhancements:
  * other polynomial bases: especially for open and half open support
  * wavelets
  * local or piecewise approximations


'''
from scipy import stats, integrate, special

import numpy as np


sqr2 = np.sqrt(2.)

class FPoly(object):
    '''Orthonormal (for weight=1) Fourier Polynomial on [0,1]

    orthonormal polynomial but density needs corfactor that I do not see what
    it is analytically

    parameterization on [0,1] from

    Sam Efromovich: Orthogonal series density estimation,
    2010 John Wiley & Sons, Inc. WIREs Comp Stat 2010 2 467-476


    '''

    def __init__(self, order):
        self.order = order
        self.domain = (0, 1)
        self.intdomain = self.domain

    def __call__(self, x):
        if self.order == 0:
            return np.ones_like(x)
        else:
            return sqr2 * np.cos(np.pi * self.order * x)

class F2Poly(object):
    '''Orthogonal (for weight=1) Fourier Polynomial on [0,pi]

    is orthogonal but first component does not square-integrate to 1
    final result seems to need a correction factor of sqrt(pi)
    _corfactor = sqrt(pi) from integrating the density

    Parameterization on [0, pi] from

    Peter Hall, Cross-Validation and the Smoothing of Orthogonal Series Density
    Estimators, JOURNAL OF MULTIVARIATE ANALYSIS 21, 189-206 (1987)

    '''

    def __init__(self, order):
        self.order = order
        self.domain = (0, np.pi)
        self.intdomain = self.domain
        self.offsetfactor = 0

    def __call__(self, x):
        if self.order == 0:
            return np.ones_like(x) / np.sqrt(np.pi)
        else:
            return sqr2 * np.cos(self.order * x) / np.sqrt(np.pi)

class ChebyTPoly(object):
    '''Orthonormal (for weight=1) Chebychev Polynomial on (-1,1)


    Notes
    -----
    integration requires to stay away from boundary, offsetfactor > 0
    maybe this implies that we cannot use it for densities that are > 0 at
    boundary ???

    or maybe there is a mistake close to the boundary, sometimes integration works.

    '''

    def __init__(self, order):
        self.order = order
        from scipy.special import chebyt
        self.poly = chebyt(order)
        self.domain = (-1, 1)
        self.intdomain = (-1+1e-6, 1-1e-6)
        #not sure if I need this, in integration nans are possible  on the boundary
        self.offsetfactor = 0.01  #required for integration


    def __call__(self, x):
        if self.order == 0:
            return np.ones_like(x) / (1-x**2)**(1/4.) /np.sqrt(np.pi)

        else:
            return self.poly(x) / (1-x**2)**(1/4.) /np.sqrt(np.pi) *np.sqrt(2)


logpi2 = np.log(np.pi)/2

class HPoly(object):
    '''Orthonormal (for weight=1) Hermite Polynomial, uses finite bounds

    for current use with DensityOrthoPoly domain is defined as [-6,6]

    '''
    def __init__(self, order):
        self.order = order
        from scipy.special import hermite
        self.poly = hermite(order)
        self.domain = (-6, +6)
        self.offsetfactor = 0.5  # note this is

    def __call__(self, x):
        k = self.order

        lnfact = -(1./2)*(k*np.log(2.) + special.gammaln(k+1) + logpi2) - x*x/2
        fact = np.exp(lnfact)

        return self.poly(x) * fact

def polyvander(x, polybase, order=5):
    polyarr = np.column_stack([polybase(i)(x) for i in range(order)])
    return polyarr

def inner_cont(polys, lower, upper, weight=None):
    '''inner product of continuous function (with weight=1)

    Parameters
    ----------
    polys : list of callables
        polynomial instances
    lower : float
        lower integration limit
    upper : float
        upper integration limit
    weight : callable or None
        weighting function

    Returns
    -------
    innp : ndarray
        symmetric 2d square array with innerproduct of all function pairs
    err : ndarray
        numerical error estimate from scipy.integrate.quad, same dimension as innp

    Examples
    --------
    >>> from scipy.special import chebyt
    >>> polys = [chebyt(i) for i in range(4)]
    >>> r, e = inner_cont(polys, -1, 1)
    >>> r
    array([[ 2.        ,  0.        , -0.66666667,  0.        ],
           [ 0.        ,  0.66666667,  0.        , -0.4       ],
           [-0.66666667,  0.        ,  0.93333333,  0.        ],
           [ 0.        , -0.4       ,  0.        ,  0.97142857]])

    '''
    n_polys = len(polys)
    innerprod = np.empty((n_polys, n_polys))
    innerprod.fill(np.nan)
    interr = np.zeros((n_polys, n_polys))

    for i in range(n_polys):
        for j in range(i+1):
            p1 = polys[i]
            p2 = polys[j]
            if weight is not None:
                innp, err = integrate.quad(lambda x: p1(x)*p2(x)*weight(x),
                                       lower, upper)
            else:
                innp, err = integrate.quad(lambda x: p1(x)*p2(x), lower, upper)
            innerprod[i,j] = innp
            interr[i,j] = err
            if not i == j:
                innerprod[j,i] = innp
                interr[j,i] = err

    return innerprod, interr


def is_orthonormal_cont(polys, lower, upper, rtol=0, atol=1e-08):
    '''check whether functions are orthonormal

    Parameters
    ----------
    polys : list of polynomials or function

    Returns
    -------
    is_orthonormal : bool
        is False if the innerproducts are not close to 0 or 1

    Notes
    -----
    this stops as soon as the first deviation from orthonormality is found.

    Examples
    --------
    >>> from scipy.special import chebyt
    >>> polys = [chebyt(i) for i in range(4)]
    >>> r, e = inner_cont(polys, -1, 1)
    >>> r
    array([[ 2.        ,  0.        , -0.66666667,  0.        ],
           [ 0.        ,  0.66666667,  0.        , -0.4       ],
           [-0.66666667,  0.        ,  0.93333333,  0.        ],
           [ 0.        , -0.4       ,  0.        ,  0.97142857]])
    >>> is_orthonormal_cont(polys, -1, 1, atol=1e-6)
    False

    >>> polys = [ChebyTPoly(i) for i in range(4)]
    >>> r, e = inner_cont(polys, -1, 1)
    >>> r
    array([[  1.00000000e+00,   0.00000000e+00,  -9.31270888e-14,
              0.00000000e+00],
           [  0.00000000e+00,   1.00000000e+00,   0.00000000e+00,
             -9.47850712e-15],
           [ -9.31270888e-14,   0.00000000e+00,   1.00000000e+00,
              0.00000000e+00],
           [  0.00000000e+00,  -9.47850712e-15,   0.00000000e+00,
              1.00000000e+00]])
    >>> is_orthonormal_cont(polys, -1, 1, atol=1e-6)
    True

    '''
    for i in range(len(polys)):
        for j in range(i+1):
            p1 = polys[i]
            p2 = polys[j]
            innerprod = integrate.quad(lambda x: p1(x)*p2(x), lower, upper)[0]
            #print i,j, innerprod
            if not np.allclose(innerprod, i==j, rtol=rtol, atol=atol):
                return False
    return True



#new versions


class DensityOrthoPoly(object):
    '''Univariate density estimation by orthonormal series expansion


    Uses an orthonormal polynomial basis to approximate a univariate density.


    currently all arguments can be given to fit, I might change it to requiring
    arguments in __init__ instead.
    '''

    def __init__(self, polybase=None, order=5):
        if polybase is not None:
            self.polybase = polybase
            self.polys = polys = [polybase(i) for i in range(order)]
        #try:
        #self.offsetfac = 0.05
        #self.offsetfac = polys[0].offsetfactor #polys maybe not defined yet
        self._corfactor = 1
        self._corshift = 0


    def fit(self, x, polybase=None, order=5, limits=None):
        '''estimate the orthogonal polynomial approximation to the density

        '''
        if polybase is None:
            polys = self.polys[:order]
        else:
            self.polybase = polybase
            self.polys = polys = [polybase(i) for i in range(order)]

        #move to init ?
        if not hasattr(self, 'offsetfac'):
            self.offsetfac = polys[0].offsetfactor


        xmin, xmax = x.min(), x.max()
        if limits is None:
            self.offset = offset = (xmax - xmin) * self.offsetfac
            limits = self.limits = (xmin - offset, xmax + offset)

        interval_length = limits[1] - limits[0]
        xinterval = xmax - xmin
        # need to cover (half-)open intervalls
        self.shrink = 1. / interval_length #xinterval/interval_length
        offset = (interval_length - xinterval ) / 2.
        self.shift = xmin - offset

        self.x = x = self._transform(x)

        coeffs = [(p(x)).mean() for p in polys]
        self.coeffs = coeffs
        self.polys = polys
        self._verify()  #verify that it is a proper density

        return self #coeffs, polys

    def evaluate(self, xeval, order=None):
        xeval = self._transform(xeval)
        if order is None:
            order = len(self.polys)
        res = sum(c*p(xeval) for c, p in list(zip(self.coeffs, self.polys))[:order])
        res = self._correction(res)
        return res

    def __call__(self, xeval):
        '''alias for evaluate, except no order argument'''
        return self.evaluate(xeval)

    def _verify(self):
        '''check for bona fide density correction

        currently only checks that density integrates to 1
Loading ...