Repository URL to install this package:
|
Version:
0.1.10 ▾
|
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from typing import Optional, Union, List
class SklearnTransformerWrapper(BaseEstimator, TransformerMixin):
"""
Wrapper for Scikit-learn pre-processing transformers,
to apply the transformer to a specified set of variables.
Parameters:
----------
variables : list or str
List of variables to transform. If a single variable, pass it as a string.
transformer : sklearn Transformer
A scikit-learn transformer instance (e.g., SimpleImputer, OrdinalEncoder).
"""
def __init__(
self,
variables: Optional[Union[List[str], str]] = None,
transformer: Optional[BaseEstimator] = None
):
if not variables or not transformer:
raise ValueError("Both 'variables' and 'transformer' must be provided.")
self.variables = variables if isinstance(variables, list) else [variables]
self.transformer = transformer
def fit(
self,
X: pd.DataFrame,
y: Optional[pd.Series] = None
) -> "SklearnTransformerWrapper":
"""
Fits the transformer to the selected variables.
Parameters:
----------
X : pd.DataFrame
The input DataFrame.
y : pd.Series, optional
The target variable, by default None.
Returns:
-------
self
"""
self._validate_dataframe(X)
self.transformer.fit(X[self.variables])
return self
def transform(self, X: pd.DataFrame) -> pd.DataFrame:
"""
Transforms the selected variables.
Parameters:
----------
X : pd.DataFrame
The input DataFrame.
Returns:
-------
pd.DataFrame
Transformed DataFrame.
"""
self._validate_dataframe(X)
X = X.copy()
X[self.variables] = self.transformer.transform(X[self.variables])
return X
@staticmethod
def _validate_dataframe(X: pd.DataFrame):
if not isinstance(X, pd.DataFrame):
raise TypeError("Input must be a pandas DataFrame.")
class TemporalVariableEstimator(BaseEstimator, TransformerMixin):
"""
Calculates the time difference between temporal variables and a reference variable.
Parameters:
----------
variables : list or str
List of temporal variables for which to calculate the time difference.
reference_variable : str
The reference temporal variable.
"""
def __init__(
self,
variables: Optional[Union[List[str], str]] = None,
reference_variable: Optional[str] = None
):
if not variables or not reference_variable:
raise ValueError(
"Both 'variables' and 'reference_variable' must be provided."
)
self.variables = variables if isinstance(variables, list) else [variables]
self.reference_variable = reference_variable
def fit(
self,
X: pd.DataFrame,
y: Optional[pd.Series] = None
) -> "TemporalVariableEstimator":
"""
No fitting needed; returns self for pipeline compatibility.
Parameters:
----------
X : pd.DataFrame
The input DataFrame.
y : pd.Series, optional
The target variable, by default None.
Returns:
-------
self
"""
self._validate_dataframe(X)
return self
def transform(self, X: pd.DataFrame) -> pd.DataFrame:
"""
Calculates the time difference and updates the DataFrame.
Parameters:
----------
X : pd.DataFrame
The input DataFrame.
Returns:
-------
pd.DataFrame
Transformed DataFrame with time differences.
"""
self._validate_dataframe(X)
X = X.copy()
for feature in self.variables:
X[feature] = X[self.reference_variable] - X[feature]
return X
@staticmethod
def _validate_dataframe(X: pd.DataFrame):
if not isinstance(X, pd.DataFrame):
raise TypeError("Input must be a pandas DataFrame.")
class DropUnnecessaryFeatures(BaseEstimator, TransformerMixin):
"""
Drops unnecessary features from a DataFrame.
Parameters:
----------
variables_to_drop : list or str
List of variables to drop. If a single variable, pass it as a string.
"""
def __init__(self, variables_to_drop: Optional[Union[List[str], str]] = None):
if not variables_to_drop:
raise ValueError("'variables_to_drop' must be provided.")
self.variables = (
variables_to_drop
if isinstance(variables_to_drop, list)
else [variables_to_drop]
)
def fit(
self,
X: pd.DataFrame,
y: Optional[pd.Series] = None
) -> "DropUnnecessaryFeatures":
"""
No fitting needed; returns self for pipeline compatibility.
Parameters:
----------
X : pd.DataFrame
The input DataFrame.
y : pd.Series, optional
The target variable, by default None.
Returns:
-------
self
"""
self._validate_dataframe(X)
return self
def transform(self, X: pd.DataFrame) -> pd.DataFrame:
"""
Drops the specified variables from the DataFrame.
Parameters:
----------
X : pd.DataFrame
The input DataFrame.
Returns:
-------
pd.DataFrame
DataFrame with specified variables dropped.
"""
self._validate_dataframe(X)
X = X.copy()
return X.drop(columns=self.variables, errors="ignore")
@staticmethod
def _validate_dataframe(X: pd.DataFrame):
if not isinstance(X, pd.DataFrame):
raise TypeError("Input must be a pandas DataFrame.")