Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / statsmodels   python

Repository URL to install this package:

Version: 0.11.1 

/ sandbox / nonparametric / dgp_examples.py

# -*- coding: utf-8 -*-
"""Examples of non-linear functions for non-parametric regression

Created on Sat Jan 05 20:21:22 2013

Author: Josef Perktold
"""

import numpy as np

## Functions

def fg1(x):
    '''Fan and Gijbels example function 1

    '''
    return x + 2 * np.exp(-16 * x**2)

def fg1eu(x):
    '''Eubank similar to Fan and Gijbels example function 1

    '''
    return x + 0.5 * np.exp(-50 * (x - 0.5)**2)

def fg2(x):
    '''Fan and Gijbels example function 2

    '''
    return np.sin(2 * x) + 2 * np.exp(-16 * x**2)

def func1(x):
    '''made up example with sin, square

    '''
    return np.sin(x * 5) / x + 2. * x - 1. * x**2

## Classes with Data Generating Processes

doc = {'description':
'''Base Class for Univariate non-linear example

    Does not work on it's own.
    needs additional at least self.func
''',
'ref': ''}

class _UnivariateFunction(object):
    #Base Class for Univariate non-linear example.
    #Does not work on it's own. needs additionally at least self.func
    __doc__ = '''%(description)s

    Parameters
    ----------
    nobs : int
        number of observations to simulate
    x : None or 1d array
        If x is given then it is used for the exogenous variable instead of
        creating a random sample
    distr_x : None or distribution instance
        Only used if x is None. The rvs method is used to create a random
        sample of the exogenous (explanatory) variable.
    distr_noise : None or distribution instance
        The rvs method is used to create a random sample of the errors.

    Attributes
    ----------
    x : ndarray, 1-D
        exogenous or explanatory variable. x is sorted.
    y : ndarray, 1-D
        endogenous or response variable
    y_true : ndarray, 1-D
        expected values of endogenous or response variable, i.e. values of y
        without noise
    func : callable
        underlying function (defined by subclass)

    %(ref)s
    ''' #% doc

    def __init__(self, nobs=200, x=None, distr_x=None, distr_noise=None):

        if x is None:
            if distr_x is None:
                x = np.random.normal(loc=0, scale=self.s_x, size=nobs)
            else:
                x = distr_x.rvs(size=nobs)
            x.sort()

        self.x = x

        if distr_noise is None:
            noise = np.random.normal(loc=0, scale=self.s_noise, size=nobs)
        else:
            noise = distr_noise.rvs(size=nobs)

        if hasattr(self, 'het_scale'):
            noise *= self.het_scale(self.x)

        #self.func = fg1
        self.y_true = y_true = self.func(x)
        self.y = y_true + noise


    def plot(self, scatter=True, ax=None):
        '''plot the mean function and optionally the scatter of the sample

        Parameters
        ----------
        scatter: bool
            If true, then add scatterpoints of sample to plot.
        ax : None or matplotlib axis instance
            If None, then a matplotlib.pyplot figure is created, otherwise
            the given axis, ax, is used.

        Returns
        -------
        Figure
            This is either the created figure instance or the one associated
            with ax if ax is given.

        '''
        if ax is None:
            import matplotlib.pyplot as plt
            fig = plt.figure()
            ax = fig.add_subplot(1, 1, 1)

        if scatter:
            ax.plot(self.x, self.y, 'o', alpha=0.5)

        xx = np.linspace(self.x.min(), self.x.max(), 100)
        ax.plot(xx, self.func(xx), lw=2, color='b', label='dgp mean')
        return ax.figure

doc = {'description':
'''Fan and Gijbels example function 1

linear trend plus a hump
''',
'ref':
'''
References
----------
Fan, Jianqing, and Irene Gijbels. 1992. "Variable Bandwidth and Local
Linear Regression Smoothers."
The Annals of Statistics 20 (4) (December): 2008-2036. doi:10.2307/2242378.

'''}

class UnivariateFanGijbels1(_UnivariateFunction):
    __doc__ = _UnivariateFunction.__doc__ % doc


    def __init__(self, nobs=200, x=None, distr_x=None, distr_noise=None):
        self.s_x = 1.
        self.s_noise = 0.7
        self.func = fg1
        super(self.__class__, self).__init__(nobs=nobs, x=x,
                                             distr_x=distr_x,
                                             distr_noise=distr_noise)

doc['description'] =\
'''Fan and Gijbels example function 2

sin plus a hump
'''

class UnivariateFanGijbels2(_UnivariateFunction):
    __doc__ = _UnivariateFunction.__doc__ % doc

    def __init__(self, nobs=200, x=None, distr_x=None, distr_noise=None):
        self.s_x = 1.
        self.s_noise = 0.5
        self.func = fg2
        super(self.__class__, self).__init__(nobs=nobs, x=x,
                                             distr_x=distr_x,
                                             distr_noise=distr_noise)

class UnivariateFanGijbels1EU(_UnivariateFunction):
    '''

    Eubank p.179f
    '''

    def __init__(self, nobs=50, x=None, distr_x=None, distr_noise=None):
        if distr_x is None:
            from scipy import stats
            distr_x = stats.uniform
        self.s_noise = 0.15
        self.func = fg1eu
        super(self.__class__, self).__init__(nobs=nobs, x=x,
                                             distr_x=distr_x,
                                             distr_noise=distr_noise)

class UnivariateFunc1(_UnivariateFunction):
    '''

    made up, with sin and quadratic trend
    '''

    def __init__(self, nobs=200, x=None, distr_x=None, distr_noise=None):
        if x is None and distr_x is None:
            from scipy import stats
            distr_x = stats.uniform(-2, 4)
        else:
            nobs = x.shape[0]
        self.s_noise = 2.
        self.func = func1
        super(UnivariateFunc1, self).__init__(nobs=nobs, x=x,
                                             distr_x=distr_x,
                                             distr_noise=distr_noise)

    def het_scale(self, x):
        return np.sqrt(np.abs(3+x))