# -*- coding: utf-8 -*-
"""some measures for evaluation of prediction, tests and model selection
Created on Tue Nov 08 15:23:20 2011
Author: Josef Perktold
License: BSD-3
"""
import numpy as np
from statsmodels.tools.validation import array_like
def mse(x1, x2, axis=0):
"""mean squared error
Parameters
----------
x1, x2 : array_like
The performance measure depends on the difference between these two
arrays.
axis : int
axis along which the summary statistic is calculated
Returns
-------
mse : ndarray or float
mean squared error along given axis.
Notes
-----
If ``x1`` and ``x2`` have different shapes, then they need to broadcast.
This uses ``numpy.asanyarray`` to convert the input. Whether this is the
desired result or not depends on the array subclass, for example
numpy matrices will silently produce an incorrect result.
"""
x1 = np.asanyarray(x1)
x2 = np.asanyarray(x2)
return np.mean((x1-x2)**2, axis=axis)
def rmse(x1, x2, axis=0):
"""root mean squared error
Parameters
----------
x1, x2 : array_like
The performance measure depends on the difference between these two
arrays.
axis : int
axis along which the summary statistic is calculated
Returns
-------
rmse : ndarray or float
root mean squared error along given axis.
Notes
-----
If ``x1`` and ``x2`` have different shapes, then they need to broadcast.
This uses ``numpy.asanyarray`` to convert the input. Whether this is the
desired result or not depends on the array subclass, for example
numpy matrices will silently produce an incorrect result.
"""
x1 = np.asanyarray(x1)
x2 = np.asanyarray(x2)
return np.sqrt(mse(x1, x2, axis=axis))
def maxabs(x1, x2, axis=0):
"""maximum absolute error
Parameters
----------
x1, x2 : array_like
The performance measure depends on the difference between these two
arrays.
axis : int
axis along which the summary statistic is calculated
Returns
-------
maxabs : ndarray or float
maximum absolute difference along given axis.
Notes
-----
If ``x1`` and ``x2`` have different shapes, then they need to broadcast.
This uses ``numpy.asanyarray`` to convert the input. Whether this is the
desired result or not depends on the array subclass.
"""
x1 = np.asanyarray(x1)
x2 = np.asanyarray(x2)
return np.max(np.abs(x1-x2), axis=axis)
def meanabs(x1, x2, axis=0):
"""mean absolute error
Parameters
----------
x1, x2 : array_like
The performance measure depends on the difference between these two
arrays.
axis : int
axis along which the summary statistic is calculated
Returns
-------
meanabs : ndarray or float
mean absolute difference along given axis.
Notes
-----
If ``x1`` and ``x2`` have different shapes, then they need to broadcast.
This uses ``numpy.asanyarray`` to convert the input. Whether this is the
desired result or not depends on the array subclass.
"""
x1 = np.asanyarray(x1)
x2 = np.asanyarray(x2)
return np.mean(np.abs(x1-x2), axis=axis)
def medianabs(x1, x2, axis=0):
"""median absolute error
Parameters
----------
x1, x2 : array_like
The performance measure depends on the difference between these two
arrays.
axis : int
axis along which the summary statistic is calculated
Returns
-------
medianabs : ndarray or float
median absolute difference along given axis.
Notes
-----
If ``x1`` and ``x2`` have different shapes, then they need to broadcast.
This uses ``numpy.asanyarray`` to convert the input. Whether this is the
desired result or not depends on the array subclass.
"""
x1 = np.asanyarray(x1)
x2 = np.asanyarray(x2)
return np.median(np.abs(x1-x2), axis=axis)
def bias(x1, x2, axis=0):
"""bias, mean error
Parameters
----------
x1, x2 : array_like
The performance measure depends on the difference between these two
arrays.
axis : int
axis along which the summary statistic is calculated
Returns
-------
bias : ndarray or float
bias, or mean difference along given axis.
Notes
-----
If ``x1`` and ``x2`` have different shapes, then they need to broadcast.
This uses ``numpy.asanyarray`` to convert the input. Whether this is the
desired result or not depends on the array subclass.
"""
x1 = np.asanyarray(x1)
x2 = np.asanyarray(x2)
return np.mean(x1-x2, axis=axis)
def medianbias(x1, x2, axis=0):
"""median bias, median error
Parameters
----------
x1, x2 : array_like
The performance measure depends on the difference between these two
arrays.
axis : int
axis along which the summary statistic is calculated
Returns
-------
medianbias : ndarray or float
median bias, or median difference along given axis.
Notes
-----
If ``x1`` and ``x2`` have different shapes, then they need to broadcast.
This uses ``numpy.asanyarray`` to convert the input. Whether this is the
desired result or not depends on the array subclass.
"""
x1 = np.asanyarray(x1)
x2 = np.asanyarray(x2)
return np.median(x1-x2, axis=axis)
def vare(x1, x2, ddof=0, axis=0):
"""variance of error
Parameters
----------
x1, x2 : array_like
The performance measure depends on the difference between these two
arrays.
axis : int
axis along which the summary statistic is calculated
Returns
-------
vare : ndarray or float
variance of difference along given axis.
Notes
-----
If ``x1`` and ``x2`` have different shapes, then they need to broadcast.
This uses ``numpy.asanyarray`` to convert the input. Whether this is the
desired result or not depends on the array subclass.
"""
x1 = np.asanyarray(x1)
x2 = np.asanyarray(x2)
return np.var(x1-x2, ddof=ddof, axis=axis)
def stde(x1, x2, ddof=0, axis=0):
"""standard deviation of error
Parameters
----------
x1, x2 : array_like
The performance measure depends on the difference between these two
arrays.
axis : int
axis along which the summary statistic is calculated
Returns
-------
stde : ndarray or float
standard deviation of difference along given axis.
Notes
-----
If ``x1`` and ``x2`` have different shapes, then they need to broadcast.
This uses ``numpy.asanyarray`` to convert the input. Whether this is the
desired result or not depends on the array subclass.
"""
x1 = np.asanyarray(x1)
x2 = np.asanyarray(x2)
return np.std(x1-x2, ddof=ddof, axis=axis)
def iqr(x1, x2, axis=0):
"""
Interquartile range of error
Parameters
----------
x1 : array_like
One of the inputs into the IQR calculation.
x2 : array_like
The other input into the IQR calculation.
axis : {None, int}
axis along which the summary statistic is calculated
Returns
-------
irq : {float, ndarray}
Interquartile range along given axis.
Notes
-----
If ``x1`` and ``x2`` have different shapes, then they must broadcast.
"""
x1 = array_like(x1, 'x1', dtype=None, ndim=None)
x2 = array_like(x2, 'x1', dtype=None, ndim=None)
if axis is None:
x1 = x1.ravel()
x2 = x2.ravel()
axis = 0
xdiff = np.sort(x1 - x2, axis=axis)
nobs = x1.shape[axis]
idx = np.round((nobs-1) * np.array([0.25, 0.75])).astype(int)
sl = [slice(None)] * xdiff.ndim
sl[axis] = idx
iqr = np.diff(xdiff[tuple(sl)], axis=axis)
iqr = np.squeeze(iqr) # drop reduced dimension
return iqr
# Information Criteria
# ---------------------
def aic(llf, nobs, df_modelwc):
"""Akaike information criterion
Parameters
----------
llf : float
value of the loglikelihood
nobs : int
number of observations
df_modelwc : int
number of parameters including constant
Returns
-------
aic : float
information criterion
References
----------
https://en.wikipedia.org/wiki/Akaike_information_criterion
"""
return -2. * llf + 2. * df_modelwc
def aicc(llf, nobs, df_modelwc):
"""Akaike information criterion (AIC) with small sample correction
Parameters
----------
llf : float
value of the loglikelihood
nobs : int
number of observations
df_modelwc : int
number of parameters including constant
Returns
-------
aicc : float
information criterion
References
----------
https://en.wikipedia.org/wiki/Akaike_information_criterion#AICc
"""
Loading ...