'''gradient/Jacobian of normal and t loglikelihood
use chain rule
normal derivative wrt mu, sigma and beta
new version: loc-scale distributions, derivative wrt loc, scale
also includes "standardized" t distribution (for use in GARCH)
* use sympy for derivative of loglike wrt shape parameters
it works for df of t distribution dlog(gamma(a))da = polygamma(0,a) check
polygamma is available in scipy.special
* get loc-scale example to work with mean = X*b
* write some full unit test examples
A: josef-pktd
import numpy as np
from scipy import special
from scipy.special import gammaln
def norm_lls(y, params):
'''normal loglikelihood given observations and mean mu and variance sigma2
y : ndarray, 1d
normally distributed random variable
params: ndarray, (nobs, 2)
array of mean, variance (mu, sigma2) with observations in rows
lls : ndarray
contribution to loglikelihood for each observation
mu, sigma2 = params.T
lls = -0.5*(np.log(2*np.pi) + np.log(sigma2) + (y-mu)**2/sigma2)
return lls
def norm_lls_grad(y, params):
'''Jacobian of normal loglikelihood wrt mean mu and variance sigma2
y : ndarray, 1d
normally distributed random variable
params: ndarray, (nobs, 2)
array of mean, variance (mu, sigma2) with observations in rows
grad : array (nobs, 2)
derivative of loglikelihood for each observation wrt mean in first
column, and wrt variance in second column
this is actually the derivative wrt sigma not sigma**2, but evaluated
with parameter sigma2 = sigma**2
mu, sigma2 = params.T
dllsdmu = (y-mu)/sigma2
dllsdsigma2 = ((y-mu)**2/sigma2 - 1)/np.sqrt(sigma2)
return np.column_stack((dllsdmu, dllsdsigma2))
def mean_grad(x, beta):
'''gradient/Jacobian for d (x*beta)/ d beta
return x
def normgrad(y, x, params):
'''Jacobian of normal loglikelihood wrt mean mu and variance sigma2
y : ndarray, 1d
normally distributed random variable with mean x*beta, and variance sigma2
x : ndarray, 2d
explanatory variables, observation in rows, variables in columns
params: array_like, (nvars + 1)
array of coefficients and variance (beta, sigma2)
grad : array (nobs, 2)
derivative of loglikelihood for each observation wrt mean in first
column, and wrt scale (sigma) in second column
assume params = (beta, sigma2)
TODO: for heteroscedasticity need sigma to be a 1d array
beta = params[:-1]
sigma2 = params[-1]*np.ones((len(y),1))
dmudbeta = mean_grad(x, beta)
mu = np.dot(x, beta)
#print(beta, sigma2)
params2 = np.column_stack((mu,sigma2))
dllsdms = norm_lls_grad(y,params2)
grad = np.column_stack((dllsdms[:,:1]*dmudbeta, dllsdms[:,:1]))
return grad
def tstd_lls(y, params, df):
'''t loglikelihood given observations and mean mu and variance sigma2 = 1
y : ndarray, 1d
normally distributed random variable
params: ndarray, (nobs, 2)
array of mean, variance (mu, sigma2) with observations in rows
df : int
degrees of freedom of the t distribution
lls : ndarray
contribution to loglikelihood for each observation
parametrized for garch
mu, sigma2 = params.T
df = df*1.0
#lls = gammaln((df+1)/2.) - gammaln(df/2.) - 0.5*np.log((df-2)*np.pi)
#lls -= (df+1)/2. * np.log(1. + (y-mu)**2/(df-2.)/sigma2) + 0.5 * np.log(sigma2)
lls = gammaln((df+1)/2.) - gammaln(df/2.) - 0.5*np.log((df-2)*np.pi)
lls -= (df+1)/2. * np.log(1. + (y-mu)**2/(df-2)/sigma2) + 0.5 * np.log(sigma2)
return lls
def norm_dlldy(y):
'''derivative of log pdf of standard normal with respect to y
return -y
def tstd_pdf(x, df):
'''pdf for standardized (not standard) t distribution, variance is one
r = np.array(df*1.0)
Px = np.exp(special.gammaln((r+1)/2.)-special.gammaln(r/2.))/np.sqrt((r-2)*np.pi)
Px /= (1+(x**2)/(r-2))**((r+1)/2.)
return Px
def ts_lls(y, params, df):
'''t loglikelihood given observations and mean mu and variance sigma2 = 1
y : ndarray, 1d
normally distributed random variable
params: ndarray, (nobs, 2)
array of mean, variance (mu, sigma2) with observations in rows
df : int
degrees of freedom of the t distribution
lls : ndarray
contribution to loglikelihood for each observation
parametrized for garch
normalized/rescaled so that sigma2 is the variance
>>> df = 10; sigma = 1.
>>> stats.t.stats(df, loc=0., scale=sigma.*np.sqrt((df-2.)/df))
(array(0.0), array(1.0))
>>> sigma = np.sqrt(2.)
>>> stats.t.stats(df, loc=0., scale=sigma*np.sqrt((df-2.)/df))
(array(0.0), array(2.0))
print(y, params, df)
mu, sigma2 = params.T
df = df*1.0
#lls = gammaln((df+1)/2.) - gammaln(df/2.) - 0.5*np.log((df-2)*np.pi)
#lls -= (df+1)/2. * np.log(1. + (y-mu)**2/(df-2.)/sigma2) + 0.5 * np.log(sigma2)
lls = gammaln((df+1)/2.) - gammaln(df/2.) - 0.5*np.log((df)*np.pi)
lls -= (df+1.)/2. * np.log(1. + (y-mu)**2/(df)/sigma2) + 0.5 * np.log(sigma2)
return lls
def ts_dlldy(y, df):
'''derivative of log pdf of standard t with respect to y
y : array_like
data points of random variable at which loglike is evaluated
df : array_like
degrees of freedom,shape parameters of log-likelihood function
of t distribution
dlldy : ndarray
derivative of loglikelihood wrt random variable y evaluated at the
points given in y
with mean 0 and scale 1, but variance is df/(df-2)
df = df*1.
#(df+1)/2. / (1 + y**2/(df-2.)) * 2.*y/(df-2.)
#return -(df+1)/(df-2.) / (1 + y**2/(df-2.)) * y
return -(df+1)/(df) / (1 + y**2/(df)) * y
def tstd_dlldy(y, df):
'''derivative of log pdf of standardized t with respect to y
y : array_like
data points of random variable at which loglike is evaluated
df : array_like
degrees of freedom,shape parameters of log-likelihood function
of t distribution
dlldy : ndarray
derivative of loglikelihood wrt random variable y evaluated at the
points given in y
parametrized for garch, standardized to variance=1
#(df+1)/2. / (1 + y**2/(df-2.)) * 2.*y/(df-2.)
return -(df+1)/(df-2.) / (1 + y**2/(df-2.)) * y
#return (df+1)/(df) / (1 + y**2/(df)) * y
def locscale_grad(y, loc, scale, dlldy, *args):
'''derivative of log-likelihood with respect to location and scale
y : array_like
data points of random variable at which loglike is evaluated
loc : float
location parameter of distribution
scale : float
scale parameter of distribution
dlldy : function
derivative of loglikelihood fuction wrt. random variable x
args : array_like
shape parameters of log-likelihood function
dlldloc : ndarray
derivative of loglikelihood wrt location evaluated at the
points given in y
dlldscale : ndarray
derivative of loglikelihood wrt scale evaluated at the
points given in y
yst = (y-loc)/scale #ystandardized
dlldloc = -dlldy(yst, *args) / scale
dlldscale = -1./scale - dlldy(yst, *args) * (y-loc)/scale**2
return dlldloc, dlldscale
if __name__ == '__main__':
verbose = 0
if verbose:
sig = 0.1
beta = np.ones(2)
rvs = np.random.randn(10,3)
x = rvs[:,1:]
y = np.dot(x,beta) + sig*rvs[:,0]
params = [1,1,1]
print(normgrad(y, x, params))
dllfdbeta = (y-np.dot(x, beta))[:,None]*x #for sigma = 1
print(locscale_grad(y, np.dot(x, beta), 1, norm_dlldy))
print(y-np.dot(x, beta))
from scipy import stats, misc
def llt(y,loc,scale,df):
return np.log(stats.t.pdf(y, df, loc=loc, scale=scale))
def lltloc(loc,y,scale,df):
return np.log(stats.t.pdf(y, df, loc=loc, scale=scale))
def lltscale(scale,y,loc,df):
return np.log(stats.t.pdf(y, df, loc=loc, scale=scale))
def llnorm(y,loc,scale):
return np.log(stats.norm.pdf(y, loc=loc, scale=scale))
def llnormloc(loc,y,scale):
return np.log(stats.norm.pdf(y, loc=loc, scale=scale))
def llnormscale(scale,y,loc):
return np.log(stats.norm.pdf(y, loc=loc, scale=scale))
if verbose:
print('\ngradient of t')
print(misc.derivative(llt, 1, dx=1e-6, n=1, args=(0,1,10), order=3))
print('t ', locscale_grad(1, 0, 1, tstd_dlldy, 10))
print('ts', locscale_grad(1, 0, 1, ts_dlldy, 10))
print(misc.derivative(llt, 1.5, dx=1e-10, n=1, args=(0,1,20), order=3),)
print('ts', locscale_grad(1.5, 0, 1, ts_dlldy, 20))
print(misc.derivative(llt, 1.5, dx=1e-10, n=1, args=(0,2,20), order=3),)
print('ts', locscale_grad(1.5, 0, 2, ts_dlldy, 20))
print(misc.derivative(llt, 1.5, dx=1e-10, n=1, args=(1,2,20), order=3),)
print('ts', locscale_grad(1.5, 1, 2, ts_dlldy, 20))
print(misc.derivative(lltloc, 1, dx=1e-10, n=1, args=(1.5,2,20), order=3),)
print(misc.derivative(lltscale, 2, dx=1e-10, n=1, args=(1.5,1,20), order=3))
y,loc,scale,df = 1.5, 1, 2, 20
print('ts', locscale_grad(y,loc,scale, ts_dlldy, 20))
print(misc.derivative(lltloc, loc, dx=1e-10, n=1, args=(y,scale,df), order=3),)
print(misc.derivative(lltscale, scale, dx=1e-10, n=1, args=(y,loc,df), order=3))
print('\ngradient of norm')
print(misc.derivative(llnorm, 1, dx=1e-6, n=1, args=(0,1), order=3))
print(locscale_grad(1, 0, 1, norm_dlldy))
y,loc,scale = 1.5, 1, 2
print('ts', locscale_grad(y,loc,scale, norm_dlldy))
print(misc.derivative(llnormloc, loc, dx=1e-10, n=1, args=(y,scale), order=3),)
print(misc.derivative(llnormscale, scale, dx=1e-10, n=1, args=(y,loc), order=3))
y,loc,scale = 1.5, 0, 1
print('ts', locscale_grad(y,loc,scale, norm_dlldy))
print(misc.derivative(llnormloc, loc, dx=1e-10, n=1, args=(y,scale), order=3),)
print(misc.derivative(llnormscale, scale, dx=1e-10, n=1, args=(y,loc), order=3))
#print('still something wrong with handling of scale and variance'
#looks ok now
print('\nloglike of t')
print(tstd_lls(1, np.array([0,1]), 100), llt(1,0,1,100), 'differently standardized')
print(tstd_lls(1, np.array([0,1]), 10), llt(1,0,1,10), 'differently standardized')
print(ts_lls(1, np.array([0,1]), 10), llt(1,0,1,10))
print(tstd_lls(1, np.array([0,1.*10./8.]), 10), llt(1.,0,1.,10))
print(ts_lls(1, np.array([0,1]), 100), llt(1,0,1,100))
print(tstd_lls(1, np.array([0,1]), 10), llt(1,0,1.*np.sqrt(8/10.),10))
from numpy.testing import assert_almost_equal
params =[(0, 1), (1.,1.), (0.,2.), ( 1., 2.)]
yt = np.linspace(-2.,2.,11)
for loc,scale in params:
dlldlo = misc.derivative(llnormloc, loc, dx=1e-10, n=1, args=(yt,scale), order=3)
dlldsc = misc.derivative(llnormscale, scale, dx=1e-10, n=1, args=(yt,loc), order=3)
gr = locscale_grad(yt, loc, scale, norm_dlldy)
assert_almost_equal(dlldlo, gr[0], 5, err_msg='deriv loc')
assert_almost_equal(dlldsc, gr[1], 5, err_msg='deriv scale')
for df in [3, 10, 100]:
for loc,scale in params:
dlldlo = misc.derivative(lltloc, loc, dx=1e-10, n=1, args=(yt,scale,df), order=3)
dlldsc = misc.derivative(lltscale, scale, dx=1e-10, n=1, args=(yt,loc,df), order=3)
gr = locscale_grad(yt, loc, scale, ts_dlldy, df)
assert_almost_equal(dlldlo, gr[0], 4, err_msg='deriv loc')
assert_almost_equal(dlldsc, gr[1], 4, err_msg='deriv scale')
assert_almost_equal(ts_lls(yt, np.array([loc, scale**2]), df),
llt(yt,loc,scale,df), 5,
assert_almost_equal(tstd_lls(yt, np.array([loc, scale**2]), df),
llt(yt,loc,scale*np.sqrt((df-2.)/df),df), 5,