"""
Empirical likelihood inference on descriptive statistics
This module conducts hypothesis tests and constructs confidence
intervals for the mean, variance, skewness, kurtosis and correlation.
If matplotlib is installed, this module can also generate multivariate
confidence region plots as well as mean-variance contour plots.
See _OptFuncts docstring for technical details and optimization variable
definitions.
General References:
------------------
Owen, A. (2001). "Empirical Likelihood." Chapman and Hall
"""
import numpy as np
from scipy import optimize
from scipy.stats import chi2, skew, kurtosis
from statsmodels.base.optimizer import _fit_newton
import itertools
from statsmodels.graphics import utils
def DescStat(endog):
"""
Returns an instance to conduct inference on descriptive statistics
via empirical likelihood. See DescStatUV and DescStatMV for more
information.
Parameters
----------
endog : ndarray
Array of data
Returns : DescStat instance
If k=1, the function returns a univariate instance, DescStatUV.
If k>1, the function returns a multivariate instance, DescStatMV.
"""
if endog.ndim == 1:
endog = endog.reshape(len(endog), 1)
if endog.shape[1] == 1:
return DescStatUV(endog)
if endog.shape[1] > 1:
return DescStatMV(endog)
class _OptFuncts(object):
"""
A class that holds functions that are optimized/solved.
The general setup of the class is simple. Any method that starts with
_opt_ creates a vector of estimating equations named est_vect such that
np.dot(p, (est_vect))=0 where p is the weight on each
observation as a 1 x n array and est_vect is n x k. Then _modif_Newton is
called to determine the optimal p by solving for the Lagrange multiplier
(eta) in the profile likelihood maximization problem. In the presence
of nuisance parameters, _opt_ functions are optimized over to profile
out the nuisance parameters.
Any method starting with _ci_limits calculates the log likelihood
ratio for a specific value of a parameter and then subtracts a
pre-specified critical value. This is solved so that llr - crit = 0.
"""
def __init__(self, endog):
pass
def _log_star(self, eta, est_vect, weights, nobs):
"""
Transforms the log of observation probabilities in terms of the
Lagrange multiplier to the log 'star' of the probabilities.
Parameters
----------
eta : float
Lagrange multiplier
est_vect : ndarray (n,k)
Estimating equations vector
wts : nx1 array
Observation weights
Returns
------
data_star : ndarray
The weighted logstar of the estimting equations
Notes
-----
This function is only a placeholder for the _fit_Newton.
The function value is not used in optimization and the optimal value
is disregarded when computing the log likelihood ratio.
"""
data_star = np.log(weights) + (np.sum(weights) +\
np.dot(est_vect, eta))
idx = data_star < 1. / nobs
not_idx = ~idx
nx = nobs * data_star[idx]
data_star[idx] = np.log(1. / nobs) - 1.5 + nx * (2. - nx / 2)
data_star[not_idx] = np.log(data_star[not_idx])
return data_star
def _hess(self, eta, est_vect, weights, nobs):
"""
Calculates the hessian of a weighted empirical likelihood
problem.
Parameters
----------
eta : ndarray, (1,m)
Lagrange multiplier in the profile likelihood maximization
est_vect : ndarray (n,k)
Estimating equations vector
weights : 1darray
Observation weights
Returns
-------
hess : m x m array
Weighted hessian used in _wtd_modif_newton
"""
#eta = np.squeeze(eta)
data_star_doub_prime = np.sum(weights) + np.dot(est_vect, eta)
idx = data_star_doub_prime < 1. / nobs
not_idx = ~idx
data_star_doub_prime[idx] = - nobs ** 2
data_star_doub_prime[not_idx] = - (data_star_doub_prime[not_idx]) ** -2
wtd_dsdp = weights * data_star_doub_prime
return np.dot(est_vect.T, wtd_dsdp[:, None] * est_vect)
def _grad(self, eta, est_vect, weights, nobs):
"""
Calculates the gradient of a weighted empirical likelihood
problem
Parameters
----------
eta : ndarray, (1,m)
Lagrange multiplier in the profile likelihood maximization
est_vect : ndarray, (n,k)
Estimating equations vector
weights : 1darray
Observation weights
Returns
-------
gradient : ndarray (m,1)
The gradient used in _wtd_modif_newton
"""
#eta = np.squeeze(eta)
data_star_prime = np.sum(weights) + np.dot(est_vect, eta)
idx = data_star_prime < 1. / nobs
not_idx = ~idx
data_star_prime[idx] = nobs * (2 - nobs * data_star_prime[idx])
data_star_prime[not_idx] = 1. / data_star_prime[not_idx]
return np.dot(weights * data_star_prime, est_vect)
def _modif_newton(self, eta, est_vect, weights):
"""
Modified Newton's method for maximizing the log 'star' equation. This
function calls _fit_newton to find the optimal values of eta.
Parameters
----------
eta : ndarray, (1,m)
Lagrange multiplier in the profile likelihood maximization
est_vect : ndarray, (n,k)
Estimating equations vector
weights : 1darray
Observation weights
Returns
-------
params : 1xm array
Lagrange multiplier that maximizes the log-likelihood
"""
nobs = len(est_vect)
f = lambda x0: - np.sum(self._log_star(x0, est_vect, weights, nobs))
grad = lambda x0: - self._grad(x0, est_vect, weights, nobs)
hess = lambda x0: - self._hess(x0, est_vect, weights, nobs)
kwds = {'tol': 1e-8}
eta = eta.squeeze()
res = _fit_newton(f, grad, eta, (), kwds, hess=hess, maxiter=50, \
disp=0)
return res[0]
def _find_eta(self, eta):
"""
Finding the root of sum(xi-h0)/(1+eta(xi-mu)) solves for
eta when computing ELR for univariate mean.
Parameters
----------
eta : float
Lagrange multiplier in the empirical likelihood maximization
Returns
-------
llr : float
n times the log likelihood value for a given value of eta
"""
return np.sum((self.endog - self.mu0) / \
(1. + eta * (self.endog - self.mu0)))
def _ci_limits_mu(self, mu):
"""
Calculates the difference between the log likelihood of mu_test and a
specified critical value.
Parameters
----------
mu : float
Hypothesized value of the mean.
Returns
-------
diff : float
The difference between the log likelihood value of mu0 and
a specified value.
"""
return self.test_mean(mu)[0] - self.r0
def _find_gamma(self, gamma):
"""
Finds gamma that satisfies
sum(log(n * w(gamma))) - log(r0) = 0
Used for confidence intervals for the mean
Parameters
----------
gamma : float
Lagrange multiplier when computing confidence interval
Returns
-------
diff : float
The difference between the log-liklihood when the Lagrange
multiplier is gamma and a pre-specified value
"""
denom = np.sum((self.endog - gamma) ** -1)
new_weights = (self.endog - gamma) ** -1 / denom
return -2 * np.sum(np.log(self.nobs * new_weights)) - \
self.r0
def _opt_var(self, nuisance_mu, pval=False):
"""
This is the function to be optimized over a nuisance mean parameter
to determine the likelihood ratio for the variance
Parameters
----------
nuisance_mu : float
Value of a nuisance mean parameter
Returns
-------
llr : float
Log likelihood of a pre-specified variance holding the nuisance
parameter constant
"""
endog = self.endog
nobs = self.nobs
sig_data = ((endog - nuisance_mu) ** 2 \
- self.sig2_0)
mu_data = (endog - nuisance_mu)
est_vect = np.column_stack((mu_data, sig_data))
eta_star = self._modif_newton(np.array([1. / nobs,
1. / nobs]), est_vect,
np.ones(nobs) * (1. / nobs))
denom = 1 + np.dot(eta_star, est_vect.T)
self.new_weights = 1. / nobs * 1. / denom
llr = np.sum(np.log(nobs * self.new_weights))
if pval: # Used for contour plotting
return chi2.sf(-2 * llr, 1)
return -2 * llr
def _ci_limits_var(self, var):
"""
Used to determine the confidence intervals for the variance.
It calls test_var and when called by an optimizer,
finds the value of sig2_0 that is chi2.ppf(significance-level)
Parameters
----------
var_test : float
Hypothesized value of the variance
Returns
-------
diff : float
The difference between the log likelihood ratio at var_test and a
pre-specified value.
"""
return self.test_var(var)[0] - self.r0
def _opt_skew(self, nuis_params):
"""
Called by test_skew. This function is optimized over
nuisance parameters mu and sigma
Parameters
----------
nuis_params : 1darray
An array with a nuisance mean and variance parameter
Returns
-------
llr : float
The log likelihood ratio of a pre-specified skewness holding
the nuisance parameters constant.
"""
endog = self.endog
nobs = self.nobs
mu_data = endog - nuis_params[0]
sig_data = ((endog - nuis_params[0]) ** 2) - nuis_params[1]
skew_data = ((((endog - nuis_params[0]) ** 3) /
(nuis_params[1] ** 1.5))) - self.skew0
est_vect = np.column_stack((mu_data, sig_data, skew_data))
eta_star = self._modif_newton(np.array([1. / nobs,
1. / nobs,
1. / nobs]), est_vect,
np.ones(nobs) * (1. / nobs))
denom = 1. + np.dot(eta_star, est_vect.T)
self.new_weights = 1. / nobs * 1. / denom
llr = np.sum(np.log(nobs * self.new_weights))
return -2 * llr
def _opt_kurt(self, nuis_params):
"""
Called by test_kurt. This function is optimized over
nuisance parameters mu and sigma
Parameters
----------
Loading ...