import numpy as np
from numpy.testing import (assert_almost_equal, assert_equal, assert_raises)
from statsmodels.base.transform import (BoxCox)
from statsmodels.datasets import macrodata
class TestTransform:
@classmethod
def setup_class(cls):
data = macrodata.load_pandas()
cls.x = data.data['realgdp'].values
cls.bc = BoxCox()
def test_nonpositive(self):
# Testing negative values
y = [1, -1, 1]
assert_raises(ValueError, self.bc.transform_boxcox, y)
# Testing nonzero
y = [1, 0, 1]
assert_raises(ValueError, self.bc.transform_boxcox, y)
def test_invalid_bounds(self):
# more than two bounds
assert_raises(ValueError, self.bc._est_lambda, self.x, (-3, 2, 3))
# upper bound <= lower bound
assert_raises(ValueError, self.bc._est_lambda, self.x, (2, -1))
def test_unclear_methods(self):
# Both _est_lambda and untransform have a method argument that should
# be tested.
assert_raises(ValueError, self.bc._est_lambda,
self.x, (-1, 2), 'test')
assert_raises(ValueError, self.bc.untransform_boxcox,
self.x, 1, 'test')
def test_unclear_scale_parameter(self):
# bc.guerrero allows for 'mad' and 'sd', for the MAD and Standard
# Deviation, respectively
assert_raises(ValueError, self.bc._est_lambda,
self.x, scale='test')
# Next, check if mad/sd work:
self.bc._est_lambda(self.x, scale='mad')
self.bc._est_lambda(self.x, scale='MAD')
self.bc._est_lambda(self.x, scale='sd')
self.bc._est_lambda(self.x, scale='SD')
def test_valid_guerrero(self):
# `l <- BoxCox.lambda(x, method="guerrero")` on a ts object
# with frequency 4 (BoxCox.lambda defaults to 2, but we use
# Guerrero and Perera (2004) as a guideline)
lmbda = self.bc._est_lambda(self.x, method='guerrero', window_length=4)
assert_almost_equal(lmbda, 0.507624, 4)
# `l <- BoxCox.lambda(x, method="guerrero")` with the default grouping
# parameter (namely, window_length=2).
lmbda = self.bc._est_lambda(self.x, method='guerrero', window_length=2)
assert_almost_equal(lmbda, 0.513893, 4)
def test_guerrero_robust_scale(self):
# The lambda is derived from a manual check of the values for the MAD.
# Compare also the result for the standard deviation on R=4: 0.5076,
# i.e. almost the same value.
lmbda = self.bc._est_lambda(self.x, scale='mad')
assert_almost_equal(lmbda, 0.488621, 4)
def test_loglik_lambda_estimation(self):
# 0.2 is the value returned by `BoxCox.lambda(x, method="loglik")`
lmbda = self.bc._est_lambda(self.x, method='loglik')
assert_almost_equal(lmbda, 0.2, 1)
def test_boxcox_transformation_methods(self):
# testing estimated lambda vs. provided. Should result in almost
# the same transformed data. Value taken from R.
y_transformed_no_lambda = self.bc.transform_boxcox(self.x)
y_transformed_lambda = self.bc.transform_boxcox(self.x, 0.507624)
assert_almost_equal(y_transformed_no_lambda[0],
y_transformed_lambda[0], 3)
# a perfectly increasing set has a constant variance over the entire
# series, hence stabilising should result in the same scale: lmbda = 1.
y, lmbda = self.bc.transform_boxcox(np.arange(1, 100))
assert_almost_equal(lmbda, 1., 5)
def test_zero_lambda(self):
# zero lambda should be a log transform.
y_transform_zero_lambda, lmbda = self.bc.transform_boxcox(self.x, 0.)
assert_equal(lmbda, 0.)
assert_almost_equal(y_transform_zero_lambda, np.log(self.x), 5)
def test_naive_back_transformation(self):
# test both transformations functions -> 0. and .5
y_zero_lambda = self.bc.transform_boxcox(self.x, 0.)
y_half_lambda = self.bc.transform_boxcox(self.x, .5)
y_zero_lambda_un = self.bc.untransform_boxcox(*y_zero_lambda,
method='naive')
y_half_lambda_un = self.bc.untransform_boxcox(*y_half_lambda,
method='naive')
assert_almost_equal(self.x, y_zero_lambda_un, 5)
assert_almost_equal(self.x, y_half_lambda_un, 5)