Repository URL to install this package:
|
Version:
0.3.1 ▾
|
'''Multivariate Distribution
Probability of a multivariate t distribution
Now also mvstnormcdf has tests against R mvtnorm
Still need non-central t, extra options, and convenience function for
location, scale version.
Author: Josef Perktold
License: BSD (3-clause)
Reference:
Genz and Bretz for formula
'''
import numpy as np
from scipy import integrate, stats, special
from scipy.stats import chi,chi2
from extras import mvnormcdf, mvstdnormcdf
from numpy import exp as np_exp
from numpy import log as np_log
from scipy.special import gamma as sps_gamma
from scipy.special import gammaln as sps_gammaln
def chi2_pdf(self, x, df):
'''pdf of chi-square distribution'''
#from scipy.stats.distributions
Px = x**(df/2.0-1)*np.exp(-x/2.0)
Px /= special.gamma(df/2.0)* 2**(df/2.0)
return Px
def chi_pdf(x, df):
tmp = (df-1.)*np_log(x) + (-x*x*0.5) - (df*0.5-1)*np_log(2.0) \
- sps_gammaln(df*0.5)
return np_exp(tmp)
#return x**(df-1.)*np_exp(-x*x*0.5)/(2.0)**(df*0.5-1)/sps_gamma(df*0.5)
def chi_logpdf(x, df):
tmp = (df-1.)*np_log(x) + (-x*x*0.5) - (df*0.5-1)*np_log(2.0) \
- sps_gammaln(df*0.5)
return tmp
def funbgh(s, a, b, R, df):
sqrt_df = np.sqrt(df+0.5)
ret = chi_logpdf(s,df)
ret += np_log(mvstdnormcdf(s*a/sqrt_df, s*b/sqrt_df, R,
maxpts=1000000, abseps=1e-6))
ret = np_exp(ret)
return ret
def funbgh2(s, a, b, R, df):
n = len(a)
sqrt_df = np.sqrt(df)
#np.power(s, df-1) * np_exp(-s*s*0.5)
return np_exp((df-1)*np_log(s)-s*s*0.5) \
* mvstdnormcdf(s*a/sqrt_df, s*b/sqrt_df, R[np.tril_indices(n, -1)],
maxpts=1000000, abseps=1e-4)
def bghfactor(df):
return np.power(2.0, 1-df*0.5) / sps_gamma(df*0.5)
def mvstdtprob(a, b, R, df, ieps=1e-5, quadkwds=None, mvstkwds=None):
'''probability of rectangular area of standard t distribution
assumes mean is zero and R is correlation matrix
Notes
-----
This function does not calculate the estimate of the combined error
between the underlying multivariate normal probability calculations
and the integration.
'''
kwds = dict(args=(a,b,R,df), epsabs=1e-4, epsrel=1e-2, limit=150)
if not quadkwds is None:
kwds.update(quadkwds)
#print kwds
res, err = integrate.quad(funbgh2, *chi.ppf([ieps,1-ieps], df),
**kwds)
prob = res * bghfactor(df)
return prob
#written by Enzo Michelangeli, style changes by josef-pktd
# Student's T random variable
def multivariate_t_rvs(m, S, df=np.inf, n=1):
'''generate random variables of multivariate t distribution
Parameters
----------
m : array_like
mean of random variable, length determines dimension of random variable
S : array_like
square array of covariance matrix
df : int or float
degrees of freedom
n : int
number of observations, return random array will be (n, len(m))
Returns
-------
rvs : ndarray, (n, len(m))
each row is an independent draw of a multivariate t distributed
random variable
'''
m = np.asarray(m)
d = len(m)
if df == np.inf:
x = 1.
else:
x = np.random.chisquare(df, n)/df
z = np.random.multivariate_normal(np.zeros(d),S,(n,))
return m + z/np.sqrt(x)[:,None] # same output format as random.multivariate_normal
if __name__ == '__main__':
corr = np.asarray([[1.0, 0, 0.5],[0,1,0],[0.5,0,1]])
corr_indep = np.asarray([[1.0, 0, 0],[0,1,0],[0,0,1]])
corr_equal = np.asarray([[1.0, 0.5, 0.5],[0.5,1,0.5],[0.5,0.5,1]])
R = corr_equal
a = np.array([-np.inf,-np.inf,-100.0])
a = np.array([-0.96,-0.96,-0.96])
b = np.array([0.0,0.0,0.0])
b = np.array([0.96,0.96, 0.96])
a[:] = -1
b[:] = 3
df = 10.
sqrt_df = np.sqrt(df)
print mvstdnormcdf(a, b, corr, abseps=1e-6)
#print integrate.quad(funbgh, 0, np.inf, args=(a,b,R,df))
print (stats.t.cdf(b[0], df) - stats.t.cdf(a[0], df))**3
s = 1
print mvstdnormcdf(s*a/sqrt_df, s*b/sqrt_df, R)
df=4
print mvstdtprob(a, b, R, df)
S = np.array([[1.,.5],[.5,1.]])
print multivariate_t_rvs([10.,20.], S, 2, 5)
nobs = 10000
rvst = multivariate_t_rvs([10.,20.], S, 2, nobs)
print np.sum((rvst<[10.,20.]).all(1),0) * 1. / nobs
print mvstdtprob(-np.inf*np.ones(2), np.zeros(2), R[:2,:2], 2)
'''
> lower <- -1
> upper <- 3
> df <- 4
> corr <- diag(3)
> delta <- rep(0, 3)
> pmvt(lower=lower, upper=upper, delta=delta, df=df, corr=corr)
[1] 0.5300413
attr(,"error")
[1] 4.321136e-05
attr(,"msg")
[1] "Normal Completion"
> (pt(upper, df) - pt(lower, df))**3
[1] 0.4988254
'''