import numpy as np
import pytest
from sklearn.base import clone
from sklearn.base import BaseEstimator
from sklearn.base import TransformerMixin
from sklearn.dummy import DummyRegressor
from sklearn.utils._testing import assert_allclose
from sklearn.utils._testing import assert_warns_message
from sklearn.utils._testing import assert_no_warnings
from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, Lasso
from sklearn import datasets
from sklearn.compose import TransformedTargetRegressor
friedman = datasets.make_friedman1(random_state=0)
def test_transform_target_regressor_error():
X, y = friedman
# provide a transformer and functions at the same time
regr = TransformedTargetRegressor(regressor=LinearRegression(),
transformer=StandardScaler(),
func=np.exp, inverse_func=np.log)
with pytest.raises(ValueError,
match="'transformer' and functions"
" 'func'/'inverse_func' cannot both be set."):
regr.fit(X, y)
# fit with sample_weight with a regressor which does not support it
sample_weight = np.ones((y.shape[0],))
regr = TransformedTargetRegressor(regressor=Lasso(),
transformer=StandardScaler())
with pytest.raises(TypeError, match=r"fit\(\) got an unexpected "
"keyword argument 'sample_weight'"):
regr.fit(X, y, sample_weight=sample_weight)
# func is given but inverse_func is not
regr = TransformedTargetRegressor(func=np.exp)
with pytest.raises(ValueError, match="When 'func' is provided, "
"'inverse_func' must also be provided"):
regr.fit(X, y)
def test_transform_target_regressor_invertible():
X, y = friedman
regr = TransformedTargetRegressor(regressor=LinearRegression(),
func=np.sqrt, inverse_func=np.log,
check_inverse=True)
assert_warns_message(UserWarning, "The provided functions or transformer"
" are not strictly inverse of each other.",
regr.fit, X, y)
regr = TransformedTargetRegressor(regressor=LinearRegression(),
func=np.sqrt, inverse_func=np.log)
regr.set_params(check_inverse=False)
assert_no_warnings(regr.fit, X, y)
def _check_standard_scaled(y, y_pred):
y_mean = np.mean(y, axis=0)
y_std = np.std(y, axis=0)
assert_allclose((y - y_mean) / y_std, y_pred)
def _check_shifted_by_one(y, y_pred):
assert_allclose(y + 1, y_pred)
def test_transform_target_regressor_functions():
X, y = friedman
regr = TransformedTargetRegressor(regressor=LinearRegression(),
func=np.log, inverse_func=np.exp)
y_pred = regr.fit(X, y).predict(X)
# check the transformer output
y_tran = regr.transformer_.transform(y.reshape(-1, 1)).squeeze()
assert_allclose(np.log(y), y_tran)
assert_allclose(y, regr.transformer_.inverse_transform(
y_tran.reshape(-1, 1)).squeeze())
assert y.shape == y_pred.shape
assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X)))
# check the regressor output
lr = LinearRegression().fit(X, regr.func(y))
assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel())
def test_transform_target_regressor_functions_multioutput():
X = friedman[0]
y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T
regr = TransformedTargetRegressor(regressor=LinearRegression(),
func=np.log, inverse_func=np.exp)
y_pred = regr.fit(X, y).predict(X)
# check the transformer output
y_tran = regr.transformer_.transform(y)
assert_allclose(np.log(y), y_tran)
assert_allclose(y, regr.transformer_.inverse_transform(y_tran))
assert y.shape == y_pred.shape
assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X)))
# check the regressor output
lr = LinearRegression().fit(X, regr.func(y))
assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel())
@pytest.mark.parametrize("X,y", [friedman,
(friedman[0],
np.vstack((friedman[1],
friedman[1] ** 2 + 1)).T)])
def test_transform_target_regressor_1d_transformer(X, y):
# All transformer in scikit-learn expect 2D data. FunctionTransformer with
# validate=False lift this constraint without checking that the input is a
# 2D vector. We check the consistency of the data shape using a 1D and 2D y
# array.
transformer = FunctionTransformer(func=lambda x: x + 1,
inverse_func=lambda x: x - 1)
regr = TransformedTargetRegressor(regressor=LinearRegression(),
transformer=transformer)
y_pred = regr.fit(X, y).predict(X)
assert y.shape == y_pred.shape
# consistency forward transform
y_tran = regr.transformer_.transform(y)
_check_shifted_by_one(y, y_tran)
assert y.shape == y_pred.shape
# consistency inverse transform
assert_allclose(y, regr.transformer_.inverse_transform(
y_tran).squeeze())
# consistency of the regressor
lr = LinearRegression()
transformer2 = clone(transformer)
lr.fit(X, transformer2.fit_transform(y))
y_lr_pred = lr.predict(X)
assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred))
assert_allclose(regr.regressor_.coef_, lr.coef_)
@pytest.mark.parametrize("X,y", [friedman,
(friedman[0],
np.vstack((friedman[1],
friedman[1] ** 2 + 1)).T)])
def test_transform_target_regressor_2d_transformer(X, y):
# Check consistency with transformer accepting only 2D array and a 1D/2D y
# array.
transformer = StandardScaler()
regr = TransformedTargetRegressor(regressor=LinearRegression(),
transformer=transformer)
y_pred = regr.fit(X, y).predict(X)
assert y.shape == y_pred.shape
# consistency forward transform
if y.ndim == 1: # create a 2D array and squeeze results
y_tran = regr.transformer_.transform(y.reshape(-1, 1)).squeeze()
else:
y_tran = regr.transformer_.transform(y)
_check_standard_scaled(y, y_tran)
assert y.shape == y_pred.shape
# consistency inverse transform
assert_allclose(y, regr.transformer_.inverse_transform(
y_tran).squeeze())
# consistency of the regressor
lr = LinearRegression()
transformer2 = clone(transformer)
if y.ndim == 1: # create a 2D array and squeeze results
lr.fit(X, transformer2.fit_transform(y.reshape(-1, 1)).squeeze())
else:
lr.fit(X, transformer2.fit_transform(y))
y_lr_pred = lr.predict(X)
assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred))
assert_allclose(regr.regressor_.coef_, lr.coef_)
def test_transform_target_regressor_2d_transformer_multioutput():
# Check consistency with transformer accepting only 2D array and a 2D y
# array.
X = friedman[0]
y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T
transformer = StandardScaler()
regr = TransformedTargetRegressor(regressor=LinearRegression(),
transformer=transformer)
y_pred = regr.fit(X, y).predict(X)
assert y.shape == y_pred.shape
# consistency forward transform
y_tran = regr.transformer_.transform(y)
_check_standard_scaled(y, y_tran)
assert y.shape == y_pred.shape
# consistency inverse transform
assert_allclose(y, regr.transformer_.inverse_transform(
y_tran).squeeze())
# consistency of the regressor
lr = LinearRegression()
transformer2 = clone(transformer)
lr.fit(X, transformer2.fit_transform(y))
y_lr_pred = lr.predict(X)
assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred))
assert_allclose(regr.regressor_.coef_, lr.coef_)
def test_transform_target_regressor_multi_to_single():
X = friedman[0]
y = np.transpose([friedman[1], (friedman[1] ** 2 + 1)])
def func(y):
out = np.sqrt(y[:, 0] ** 2 + y[:, 1] ** 2)
return out[:, np.newaxis]
def inverse_func(y):
return y
tt = TransformedTargetRegressor(func=func, inverse_func=inverse_func,
check_inverse=False)
tt.fit(X, y)
y_pred_2d_func = tt.predict(X)
assert y_pred_2d_func.shape == (100, 1)
# force that the function only return a 1D array
def func(y):
return np.sqrt(y[:, 0] ** 2 + y[:, 1] ** 2)
tt = TransformedTargetRegressor(func=func, inverse_func=inverse_func,
check_inverse=False)
tt.fit(X, y)
y_pred_1d_func = tt.predict(X)
assert y_pred_1d_func.shape == (100, 1)
assert_allclose(y_pred_1d_func, y_pred_2d_func)
class DummyCheckerArrayTransformer(TransformerMixin, BaseEstimator):
def fit(self, X, y=None):
assert isinstance(X, np.ndarray)
return self
def transform(self, X):
assert isinstance(X, np.ndarray)
return X
def inverse_transform(self, X):
assert isinstance(X, np.ndarray)
return X
class DummyCheckerListRegressor(DummyRegressor):
def fit(self, X, y, sample_weight=None):
assert isinstance(X, list)
return super().fit(X, y, sample_weight)
def predict(self, X):
assert isinstance(X, list)
return super().predict(X)
def test_transform_target_regressor_ensure_y_array():
# check that the target ``y`` passed to the transformer will always be a
# numpy array. Similarly, if ``X`` is passed as a list, we check that the
# predictor receive as it is.
X, y = friedman
tt = TransformedTargetRegressor(transformer=DummyCheckerArrayTransformer(),
regressor=DummyCheckerListRegressor(),
check_inverse=False)
tt.fit(X.tolist(), y.tolist())
tt.predict(X.tolist())
with pytest.raises(AssertionError):
tt.fit(X, y.tolist())
with pytest.raises(AssertionError):
tt.predict(X)
class DummyTransformer(TransformerMixin, BaseEstimator):
"""Dummy transformer which count how many time fit was called."""
def __init__(self, fit_counter=0):
self.fit_counter = fit_counter
def fit(self, X, y=None):
self.fit_counter += 1
return self
def transform(self, X):
return X
def inverse_transform(self, X):
return X
@pytest.mark.parametrize("check_inverse", [False, True])
def test_transform_target_regressor_count_fit(check_inverse):
# regression test for gh-issue #11618
# check that we only call a single time fit for the transformer
X, y = friedman
ttr = TransformedTargetRegressor(
transformer=DummyTransformer(), check_inverse=check_inverse
)
ttr.fit(X, y)
assert ttr.transformer_.fit_counter == 1
class DummyRegressorWithExtraFitParams(DummyRegressor):
def fit(self, X, y, sample_weight=None, check_input=True):
# on the test below we force this to false, we make sure this is
# actually passed to the regressor
assert not check_input
return super().fit(X, y, sample_weight)
def test_transform_target_regressor_pass_fit_parameters():
X, y = friedman
regr = TransformedTargetRegressor(
regressor=DummyRegressorWithExtraFitParams(),
transformer=DummyTransformer()
)
regr.fit(X, y, check_input=False)
assert regr.transformer_.fit_counter == 1
def test_transform_target_regressor_route_pipeline():
X, y = friedman
regr = TransformedTargetRegressor(
regressor=DummyRegressorWithExtraFitParams(),
transformer=DummyTransformer()
)
estimators = [
('normalize', StandardScaler()), ('est', regr)
]
pip = Pipeline(estimators)
pip.fit(X, y, **{'est__check_input': False})
assert regr.transformer_.fit_counter == 1