Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / statsmodels   python

Repository URL to install this package:

Version: 0.11.1 

/ sandbox / nonparametric / kde2.py

# -*- coding: utf-8 -*-
from statsmodels.compat.python import lzip

import numpy as np

from statsmodels.tools.validation import array_like
from . import kernels


#TODO: should this be a function?
class KDE(object):
    """
    Kernel Density Estimator

    Parameters
    ----------
    x : array_like
        N-dimensional array from which the density is to be estimated
    kernel : Kernel Class
        Should be a class from *
    """
    #TODO: amend docs for Nd case?
    def __init__(self, x, kernel=None):
        x = array_like(x, "x", maxdim=2, contiguous=True)
        if x.ndim == 1:
            x = x[:,None]

        nobs, n_series = x.shape

        if kernel is None:
            kernel = kernels.Gaussian()  # no meaningful bandwidth yet

        if n_series > 1:
            if isinstance( kernel, kernels.CustomKernel ):
                kernel = kernels.NdKernel(n_series, kernels = kernel)

        self.kernel = kernel
        self.n = n_series  #TODO change attribute
        self.x = x

    def density(self, x):
        return self.kernel.density(self.x, x)

    def __call__(self, x, h="scott"):
        return np.array([self.density(xx) for xx in x])

    def evaluate(self, x, h="silverman"):
        density = self.kernel.density
        return np.array([density(xx) for xx in x])


if __name__ == "__main__":
    from numpy import random
    import matplotlib.pyplot as plt
    import statsmodels.nonparametric.bandwidths as bw
    from statsmodels.sandbox.nonparametric.testdata import kdetest

    # 1-D case
    random.seed(142)
    x = random.standard_t(4.2, size = 50)
    h = bw.bw_silverman(x)
    #NOTE: try to do it with convolution
    support = np.linspace(-10,10,512)


    kern = kernels.Gaussian(h = h)
    kde = KDE( x, kern)
    print(kde.density(1.015469))
    print(0.2034675)
    Xs = np.arange(-10,10,0.1)

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(Xs, kde(Xs), "-")
    ax.set_ylim(-10, 10)
    ax.set_ylim(0,0.4)


    # 2-D case
    x = lzip(kdetest.faithfulData["eruptions"], kdetest.faithfulData["waiting"])
    x = np.array(x)
    x = (x - x.mean(0))/x.std(0)
    nobs = x.shape[0]
    H = kdetest.Hpi
    kern = kernels.NdKernel( 2 )
    kde = KDE( x, kern )
    print(kde.density( np.matrix( [1,2 ]))) #.T
    plt.figure()
    plt.plot(x[:,0], x[:,1], 'o')


    n_grid = 50
    xsp = np.linspace(x.min(0)[0], x.max(0)[0], n_grid)
    ysp = np.linspace(x.min(0)[1], x.max(0)[1], n_grid)
#    xsorted = np.sort(x)
#    xlow = xsorted[nobs/4]
#    xupp = xsorted[3*nobs/4]
#    xsp = np.linspace(xlow[0], xupp[0], n_grid)
#    ysp = np.linspace(xlow[1], xupp[1], n_grid)
    xr, yr = np.meshgrid(xsp, ysp)
    kde_vals = np.array([kde.density( np.matrix( [xi, yi ]) ) for xi, yi in
               zip(xr.ravel(), yr.ravel())])
    plt.contour(xsp, ysp, kde_vals.reshape(n_grid, n_grid))

    plt.show()


    # 5 D case
#    random.seed(142)
#    mu = [1.0, 4.0, 3.5, -2.4, 0.0]
#    sigma = np.matrix(
#        [[ 0.6 - 0.1*abs(i-j) if i != j else 1.0 for j in xrange(5)] for i in xrange(5)])
#    x = random.multivariate_normal(mu, sigma, size = 100)
#    kern = kernel.Gaussian()
#    kde = KernelEstimate( x, kern )