# -*- coding: utf-8 -*-
#pylint: disable-msg=W0142
"""Statistical power, solving for nobs, ... - trial version
Created on Sat Jan 12 21:48:06 2013
Author: Josef Perktold
Example
roundtrip - root with respect to all variables
calculated, desired
nobs 33.367204205 33.367204205
effect 0.5 0.5
alpha 0.05 0.05
power 0.8 0.8
TODO:
refactoring
- rename beta -> power, beta (type 2 error is beta = 1-power) DONE
- I think the current implementation can handle any kinds of extra keywords
(except for maybe raising meaningful exceptions
- streamline code, I think internally classes can be merged
how to extend to k-sample tests?
user interface for different tests that map to the same (internal) test class
- sequence of arguments might be inconsistent,
arg and/or kwds so python checks what's required and what can be None.
- templating for docstrings ?
"""
from statsmodels.compat.python import iteritems
import numpy as np
from scipy import stats, optimize
from statsmodels.tools.rootfinding import brentq_expanding
def ttest_power(effect_size, nobs, alpha, df=None, alternative='two-sided'):
'''Calculate power of a ttest
'''
d = effect_size
if df is None:
df = nobs - 1
if alternative in ['two-sided', '2s']:
alpha_ = alpha / 2. #no inplace changes, does not work
elif alternative in ['smaller', 'larger']:
alpha_ = alpha
else:
raise ValueError("alternative has to be 'two-sided', 'larger' " +
"or 'smaller'")
pow_ = 0
if alternative in ['two-sided', '2s', 'larger']:
crit_upp = stats.t.isf(alpha_, df)
#print crit_upp, df, d*np.sqrt(nobs)
# use private methods, generic methods return nan with negative d
if np.any(np.isnan(crit_upp)):
# avoid endless loop, https://github.com/scipy/scipy/issues/2667
pow_ = np.nan
else:
pow_ = stats.nct._sf(crit_upp, df, d*np.sqrt(nobs))
if alternative in ['two-sided', '2s', 'smaller']:
crit_low = stats.t.ppf(alpha_, df)
#print crit_low, df, d*np.sqrt(nobs)
if np.any(np.isnan(crit_low)):
pow_ = np.nan
else:
pow_ += stats.nct._cdf(crit_low, df, d*np.sqrt(nobs))
return pow_
def normal_power(effect_size, nobs, alpha, alternative='two-sided', sigma=1.):
'''Calculate power of a normal distributed test statistic
'''
d = effect_size
if alternative in ['two-sided', '2s']:
alpha_ = alpha / 2. #no inplace changes, does not work
elif alternative in ['smaller', 'larger']:
alpha_ = alpha
else:
raise ValueError("alternative has to be 'two-sided', 'larger' " +
"or 'smaller'")
pow_ = 0
if alternative in ['two-sided', '2s', 'larger']:
crit = stats.norm.isf(alpha_)
pow_ = stats.norm.sf(crit - d*np.sqrt(nobs)/sigma)
if alternative in ['two-sided', '2s', 'smaller']:
crit = stats.norm.ppf(alpha_)
pow_ += stats.norm.cdf(crit - d*np.sqrt(nobs)/sigma)
return pow_
def ftest_anova_power(effect_size, nobs, alpha, k_groups=2, df=None):
'''power for ftest for one way anova with k equal sized groups
nobs total sample size, sum over all groups
should be general nobs observations, k_groups restrictions ???
'''
df_num = nobs - k_groups
df_denom = k_groups - 1
crit = stats.f.isf(alpha, df_denom, df_num)
pow_ = stats.ncf.sf(crit, df_denom, df_num, effect_size**2 * nobs)
return pow_#, crit
def ftest_power(effect_size, df_num, df_denom, alpha, ncc=1):
'''Calculate the power of a F-test.
Parameters
----------
effect_size : float
standardized effect size, mean divided by the standard deviation.
effect size has to be positive.
df_num : int or float
numerator degrees of freedom.
df_denom : int or float
denominator degrees of freedom.
alpha : float in interval (0,1)
significance level, e.g. 0.05, is the probability of a type I
error, that is wrong rejections if the Null Hypothesis is true.
ncc : int
degrees of freedom correction for non-centrality parameter.
see Notes
Returns
-------
power : float
Power of the test, e.g. 0.8, is one minus the probability of a
type II error. Power is the probability that the test correctly
rejects the Null Hypothesis if the Alternative Hypothesis is true.
Notes
-----
sample size is given implicitly by df_num
set ncc=0 to match t-test, or f-test in LikelihoodModelResults.
ncc=1 matches the non-centrality parameter in R::pwr::pwr.f2.test
ftest_power with ncc=0 should also be correct for f_test in regression
models, with df_num and d_denom as defined there. (not verified yet)
'''
nc = effect_size**2 * (df_denom + df_num + ncc)
crit = stats.f.isf(alpha, df_denom, df_num)
pow_ = stats.ncf.sf(crit, df_denom, df_num, nc)
return pow_ #, crit, nc
#class based implementation
#--------------------------
class Power(object):
'''Statistical Power calculations, Base Class
so far this could all be class methods
'''
def __init__(self, **kwds):
self.__dict__.update(kwds)
# used only for instance level start values
self.start_ttp = dict(effect_size=0.01, nobs=10., alpha=0.15,
power=0.6, nobs1=10., ratio=1,
df_num=10, df_denom=3 # for FTestPower
)
# TODO: nobs1 and ratio are for ttest_ind,
# need start_ttp for each test/class separately,
# possible rootfinding problem for effect_size, starting small seems to
# work
from collections import defaultdict
self.start_bqexp = defaultdict(dict)
for key in ['nobs', 'nobs1', 'df_num', 'df_denom']:
self.start_bqexp[key] = dict(low=2., start_upp=50.)
for key in ['df_denom']:
self.start_bqexp[key] = dict(low=1., start_upp=50.)
for key in ['ratio']:
self.start_bqexp[key] = dict(low=1e-8, start_upp=2)
for key in ['alpha']:
self.start_bqexp[key] = dict(low=1e-12, upp=1 - 1e-12)
def power(self, *args, **kwds):
raise NotImplementedError
def _power_identity(self, *args, **kwds):
power_ = kwds.pop('power')
return self.power(*args, **kwds) - power_
def solve_power(self, **kwds):
'''solve for any one of the parameters of a t-test
for t-test the keywords are:
effect_size, nobs, alpha, power
exactly one needs to be ``None``, all others need numeric values
*attaches*
cache_fit_res : list
Cache of the result of the root finding procedure for the latest
call to ``solve_power``, mainly for debugging purposes.
The first element is the success indicator, one if successful.
The remaining elements contain the return information of the up to
three solvers that have been tried.
'''
#TODO: maybe use explicit kwds,
# nicer but requires inspect? and not generic across tests
# I'm duplicating this in the subclass to get informative docstring
key = [k for k,v in iteritems(kwds) if v is None]
#print kwds, key
if len(key) != 1:
raise ValueError('need exactly one keyword that is None')
key = key[0]
if key == 'power':
del kwds['power']
return self.power(**kwds)
if kwds['effect_size'] == 0:
import warnings
from statsmodels.tools.sm_exceptions import HypothesisTestWarning
warnings.warn('Warning: Effect size of 0 detected', HypothesisTestWarning)
if key == 'power':
return kwds['alpha']
if key == 'alpha':
return kwds['power']
else:
raise ValueError('Cannot detect an effect-size of 0. Try changing your effect-size.')
self._counter = 0
def func(x):
kwds[key] = x
fval = self._power_identity(**kwds)
self._counter += 1
#print self._counter,
if self._counter > 500:
raise RuntimeError('possible endless loop (500 NaNs)')
if np.isnan(fval):
return np.inf
else:
return fval
#TODO: I'm using the following so I get a warning when start_ttp is not defined
try:
start_value = self.start_ttp[key]
except KeyError:
start_value = 0.9
import warnings
from statsmodels.tools.sm_exceptions import ValueWarning
warnings.warn('Warning: using default start_value for {0}'.format(key), ValueWarning)
fit_kwds = self.start_bqexp[key]
fit_res = []
#print vars()
try:
val, res = brentq_expanding(func, full_output=True, **fit_kwds)
failed = False
fit_res.append(res)
except ValueError:
failed = True
fit_res.append(None)
success = None
if (not failed) and res.converged:
success = 1
else:
# try backup
# TODO: check more cases to make this robust
if not np.isnan(start_value):
val, infodict, ier, msg = optimize.fsolve(func, start_value,
full_output=True) #scalar
#val = optimize.newton(func, start_value) #scalar
fval = infodict['fvec']
fit_res.append(infodict)
else:
ier = -1
fval = 1
fit_res.append([None])
if ier == 1 and np.abs(fval) < 1e-4 :
success = 1
else:
#print infodict
if key in ['alpha', 'power', 'effect_size']:
val, r = optimize.brentq(func, 1e-8, 1-1e-8,
full_output=True) #scalar
success = 1 if r.converged else 0
fit_res.append(r)
else:
success = 0
if not success == 1:
import warnings
from statsmodels.tools.sm_exceptions import (ConvergenceWarning,
convergence_doc)
warnings.warn(convergence_doc, ConvergenceWarning)
#attach fit_res, for reading only, should be needed only for debugging
fit_res.insert(0, success)
self.cache_fit_res = fit_res
return val
def plot_power(self, dep_var='nobs', nobs=None, effect_size=None,
alpha=0.05, ax=None, title=None, plt_kwds=None, **kwds):
"""
Plot power with number of observations or effect size on x-axis
Parameters
----------
dep_var : {'nobs', 'effect_size', 'alpha'}
This specifies which variable is used for the horizontal axis.
If dep_var='nobs' (default), then one curve is created for each
value of ``effect_size``. If dep_var='effect_size' or alpha, then
one curve is created for each value of ``nobs``.
nobs : {scalar, array_like}
specifies the values of the number of observations in the plot
effect_size : {scalar, array_like}
specifies the values of the effect_size in the plot
alpha : {float, array_like}
The significance level (type I error) used in the power
calculation. Can only be more than a scalar, if ``dep_var='alpha'``
ax : None or axis instance
If ax is None, than a matplotlib figure is created. If ax is a
matplotlib axis instance, then it is reused, and the plot elements
are created with it.
title : str
title for the axis. Use an empty string, ``''``, to avoid a title.
plt_kwds : {None, dict}
not used yet
kwds : dict
These remaining keyword arguments are used as arguments to the
power function. Many power function support ``alternative`` as a
keyword argument, two-sample test support ``ratio``.
Returns
-------
Figure
If `ax` is None, the created figure. Otherwise the figure to which
`ax` is connected.
Notes
Loading ...