Source code for feature_engine.transformation.reciprocal

# Authors: Soledad Galli <solegalli@protonmail.com>
# License: BSD 3 clause

from typing import List, Optional, Union

import numpy as np
import pandas as pd

from feature_engine.base_transformers import BaseNumericalTransformer
from feature_engine.validation import _return_tags
from feature_engine.variable_manipulation import _check_input_parameter_variables


[docs]class ReciprocalTransformer(BaseNumericalTransformer): """ The ReciprocalTransformer() applies the reciprocal transformation 1 / x to numerical variables. The ReciprocalTransformer() only works with numerical variables with non-zero values. If a variable contains the value 0, the transformer will raise an error. A list of variables can be passed as an argument. Alternatively, the transformer will automatically select and transform all numerical variables. Parameters ---------- variables: list, default=None The list of numerical variables to transform. If None, the transformer will automatically find and select all numerical variables. Attributes ---------- variables_: The group of variables that will be transformed. n_features_in_: The number of features in the train set used in fit. Methods ------- fit: This transformer does not learn parameters. transform: Apply the reciprocal 1 / x transformation. fit_transform: Fit to data, then transform it. inverse_transform: Convert the data back to the original representation. """ def __init__( self, variables: Union[None, int, str, List[Union[str, int]]] = None ) -> None: self.variables = _check_input_parameter_variables(variables)
[docs] def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None): """ This transformer does not learn parameters. Parameters ---------- X: Pandas DataFrame of shape = [n_samples, n_features]. The training input samples. Can be the entire dataframe, not just the variables to transform. y: pandas Series, default=None It is not needed in this transformer. You can pass y or None. Raises ------ TypeError - If the input is not a Pandas DataFrame - If any of the user provided variables are not numerical ValueError - If there are no numerical variables in the df or the df is empty - If the variable(s) contain null values - If some variables contain zero as values Returns ------- self """ # check input dataframe X = super().fit(X) # check if the variables contain the value 0 if (X[self.variables_] == 0).any().any(): raise ValueError( "Some variables contain the value zero, can't apply reciprocal " "transformation." ) self.n_features_in_ = X.shape[1] return self
[docs] def transform(self, X: pd.DataFrame) -> pd.DataFrame: """ Apply the reciprocal 1 / x transformation. Parameters ---------- X: Pandas DataFrame of shape = [n_samples, n_features] The data to be transformed. Raises ------ TypeError If the input is not a Pandas DataFrame ValueError - If the variable(s) contain null values - If the df has different number of features than the df used in fit() - If some variables contain zero values Returns ------- X: pandas dataframe The dataframe with the transformed variables. """ # check input dataframe and if class was fitted X = super().transform(X) # check if the variables contain the value 0 if (X[self.variables_] == 0).any().any(): raise ValueError( "Some variables contain the value zero, can't apply reciprocal " "transformation." ) # transform # for some reason reciprocal does not work with integers X.loc[:, self.variables_] = X.loc[:, self.variables_].astype("float") X.loc[:, self.variables_] = np.reciprocal(X.loc[:, self.variables_]) return X
[docs] def inverse_transform(self, X: pd.DataFrame) -> pd.DataFrame: """ Convert the data back to the original representation. Parameters ---------- X: Pandas DataFrame of shape = [n_samples, n_features] The data to be transformed. Raises ------ TypeError If the input is not a Pandas DataFrame ValueError - If the variable(s) contain null values - If the df has different number of features than the df used in fit() - If some variables contain zero values Returns ------- X: pandas dataframe The dataframe with the transformed variables. """ # inverse_transform return self.transform(X)
def _more_tags(self): tags_dict = _return_tags() # ======= this tests fail because the transformers throw an error # when the values are 0. Nothing to do with the test itself but # mostly with the data created and used in the test msg = ( "transformers raise errors when data contains zeroes, thus this check fails" ) tags_dict["_xfail_checks"]["check_estimators_dtypes"] = msg tags_dict["_xfail_checks"]["check_estimators_fit_returns_self"] = msg tags_dict["_xfail_checks"]["check_pipeline_consistency"] = msg tags_dict["_xfail_checks"]["check_estimators_overwrite_params"] = msg tags_dict["_xfail_checks"]["check_estimators_pickle"] = msg tags_dict["_xfail_checks"]["check_transformer_general"] = msg return tags_dict