import numpy as np
from numpy.testing import assert_almost_equal
from pytest import raises as assert_raises, warns as assert_warns
import statsmodels.stats.dist_dependence_measures as ddm
from statsmodels.datasets import get_rdataset
class TestDistDependenceMeasures(object):
@classmethod
def setup_class(cls):
"""
Values were obtained via the R `energy` package.
R code:
------
> dcov.test(x, y, R=200)
dCov independence test (permutation test)
data: index 1, replicates 200
nV^2 = 45829, p-value = 0.004975
sample estimates:
dCov
47.86925
> DCOR(x, y)
$dCov
[1] 47.86925
$dCor
[1] 0.9999704
$dVarX
[1] 47.28702
$dVarY
[1] 48.46151
"""
np.random.seed(3)
cls.x = np.array(range(1, 101)).reshape((20, 5))
cls.y = cls.x + np.log(cls.x)
cls.dcor_exp = 0.9999704
cls.dcov_exp = 47.86925
cls.dvar_x_exp = 47.28702
cls.dvar_y_exp = 48.46151
cls.pval_emp_exp = 0.004975
cls.test_stat_emp_exp = 45829
# The values above are functions of the following values, and
# therefore when the above group of variables is computed correctly
# it means this group of variables was also correctly calculated.
cls.S_exp = 5686.03162
cls.test_stat_asym_exp = 2.8390102
cls.pval_asym_exp = 0.00452
def test_input_validation_nobs(self):
with assert_raises(ValueError, match="same number of observations"):
ddm.distance_covariance_test(self.x[:2, :], self.y)
def test_input_validation_unknown_method(self):
with assert_raises(ValueError, match="Unknown 'method' parameter"):
ddm.distance_covariance_test(self.x, self.y, method="wrong_name")
def test_statistic_value_asym_method(self):
statistic, pval, method = ddm.distance_covariance_test(
self.x, self.y, method="asym")
assert method == "asym"
assert_almost_equal(statistic, self.test_stat_asym_exp, 4)
assert_almost_equal(pval, self.pval_asym_exp, 3)
def test_statistic_value_emp_method(self):
statistic, pval, method = ddm.distance_covariance_test(
self.x, self.y, method="emp")
assert method == "emp"
assert_almost_equal(statistic, self.test_stat_emp_exp, 0)
assert_almost_equal(pval, self.pval_emp_exp, 3)
def test_fallback_to_asym_method(self):
match_text = "The asymptotic approximation will be used"
with assert_warns(UserWarning, match=match_text):
statistic, pval, _ = ddm.distance_covariance_test(
self.x, self.y, method="emp", B=200
)
assert_almost_equal(statistic, self.test_stat_emp_exp, 0)
assert_almost_equal(pval, self.pval_asym_exp, 3)
def test_statistics_for_2d_input(self):
stats = ddm.distance_statistics(self.x, self.y)
assert_almost_equal(stats.test_statistic, self.test_stat_emp_exp, 0)
assert_almost_equal(stats.distance_correlation, self.dcor_exp, 4)
assert_almost_equal(stats.distance_covariance, self.dcov_exp, 4)
assert_almost_equal(stats.dvar_x, self.dvar_x_exp, 4)
assert_almost_equal(stats.dvar_y, self.dvar_y_exp, 4)
assert_almost_equal(stats.S, self.S_exp, 4)
def test_statistics_for_1d_input(self):
x = np.array(range(1, 21))
y = x + np.log(x)
stats = ddm.distance_statistics(x, y)
# Values were obtained using the R `energy` package
assert_almost_equal(stats.test_statistic, 398.94623, 5)
assert_almost_equal(stats.distance_correlation, 0.9996107, 4)
assert_almost_equal(stats.distance_covariance, 4.4662414, 4)
assert_almost_equal(stats.dvar_x, 4.2294799, 4)
assert_almost_equal(stats.dvar_y, 4.7199304, 4)
assert_almost_equal(stats.S, 49.880200, 4)
def test_results_on_the_iris_dataset(self):
"""
R code example from the `energy` package documentation for
`energy::distance_covariance.test`:
> x <- iris[1:50, 1:4]
> y <- iris[51:100, 1:4]
> set.seed(1)
> dcov.test(x, y, R=200)
dCov independence test (permutation test)
data: index 1, replicates 200
nV^2 = 0.5254, p-value = 0.9552
sample estimates:
dCov
0.1025087
"""
iris = get_rdataset("iris").data.values[:, :4]
x = iris[:50]
y = iris[50:100]
stats = ddm.distance_statistics(x, y)
assert_almost_equal(stats.test_statistic, 0.5254, 4)
assert_almost_equal(stats.distance_correlation, 0.3060479, 4)
assert_almost_equal(stats.distance_covariance, 0.1025087, 4)
assert_almost_equal(stats.dvar_x, 0.2712927, 4)
assert_almost_equal(stats.dvar_y, 0.4135274, 4)
assert_almost_equal(stats.S, 0.667456, 4)
test_statistic, _, method = ddm.distance_covariance_test(x, y, B=199)
assert_almost_equal(test_statistic, 0.5254, 4)
assert method == "emp"
def test_results_on_the_quakes_dataset(self):
"""
R code:
------
> data("quakes")
> x = quakes[1:50, 1:3]
> y = quakes[51:100, 1:3]
> dcov.test(x, y, R=200)
dCov independence test (permutation test)
data: index 1, replicates 200
nV^2 = 45046, p-value = 0.4577
sample estimates:
dCov
30.01526
"""
quakes = get_rdataset("quakes").data.values[:, :3]
x = quakes[:50]
y = quakes[50:100]
stats = ddm.distance_statistics(x, y)
assert_almost_equal(np.round(stats.test_statistic), 45046, 0)
assert_almost_equal(stats.distance_correlation, 0.1894193, 4)
assert_almost_equal(stats.distance_covariance, 30.01526, 4)
assert_almost_equal(stats.dvar_x, 170.1702, 4)
assert_almost_equal(stats.dvar_y, 147.5545, 4)
assert_almost_equal(stats.S, 52265, 0)
test_statistic, _, method = ddm.distance_covariance_test(x, y, B=199)
assert_almost_equal(np.round(test_statistic), 45046, 0)
assert method == "emp"
def test_dcor(self):
assert_almost_equal(ddm.distance_correlation(self.x, self.y),
self.dcor_exp, 4)
def test_dcov(self):
assert_almost_equal(ddm.distance_covariance(self.x, self.y),
self.dcov_exp, 4)
def test_dvar(self):
assert_almost_equal(ddm.distance_variance(self.x),
self.dvar_x_exp, 4)