Source code for feature_engine.transformation.reciprocal
# Authors: Soledad Galli <solegalli@protonmail.com>
# License: BSD 3 clause
from typing import List, Optional, Union
import numpy as np
import pandas as pd
from feature_engine.base_transformers import BaseNumericalTransformer
from feature_engine.validation import _return_tags
from feature_engine.variable_manipulation import _check_input_parameter_variables
[docs]class ReciprocalTransformer(BaseNumericalTransformer):
    """
    The ReciprocalTransformer() applies the reciprocal transformation 1 / x
    to numerical variables.
    The ReciprocalTransformer() only works with numerical variables with non-zero
    values. If a variable contains the value 0, the transformer will raise an error.
    A list of variables can be passed as an argument. Alternatively, the
    transformer will automatically select and transform all numerical
    variables.
    Parameters
    ----------
    variables: list, default=None
        The list of numerical variables to transform. If None, the transformer will
        automatically find and select all numerical variables.
    Attributes
    ----------
    variables_:
        The group of variables that will be transformed.
    n_features_in_:
        The number of features in the train set used in fit.
    Methods
    -------
    fit:
        This transformer does not learn parameters.
    transform:
        Apply the reciprocal 1 / x transformation.
    fit_transform:
        Fit to data, then transform it.
    inverse_transform:
        Convert the data back to the original representation.
    """
    def __init__(
        self, variables: Union[None, int, str, List[Union[str, int]]] = None
    ) -> None:
        self.variables = _check_input_parameter_variables(variables)
[docs]    def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
        """
        This transformer does not learn parameters.
        Parameters
        ----------
        X: Pandas DataFrame of shape = [n_samples, n_features].
            The training input samples. Can be the entire dataframe, not just the
            variables to transform.
        y: pandas Series, default=None
            It is not needed in this transformer. You can pass y or None.
        Raises
        ------
        TypeError
            - If the input is not a Pandas DataFrame
            - If any of the user provided variables are not numerical
        ValueError
            - If there are no numerical variables in the df or the df is empty
            - If the variable(s) contain null values
            - If some variables contain zero as values
        Returns
        -------
        self
        """
        # check input dataframe
        X = super().fit(X)
        # check if the variables contain the value 0
        if (X[self.variables_] == 0).any().any():
            raise ValueError(
                "Some variables contain the value zero, can't apply reciprocal "
                "transformation."
            )
        self.n_features_in_ = X.shape[1]
        return self
[docs]    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
        """
        Apply the reciprocal 1 / x transformation.
        Parameters
        ----------
        X: Pandas DataFrame of shape = [n_samples, n_features]
            The data to be transformed.
        Raises
        ------
        TypeError
            If the input is not a Pandas DataFrame
        ValueError
            - If the variable(s) contain null values
            - If the df has different number of features than the df used in fit()
            - If some variables contain zero values
        Returns
        -------
        X: pandas dataframe
            The dataframe with the transformed variables.
        """
        # check input dataframe and if class was fitted
        X = super().transform(X)
        # check if the variables contain the value 0
        if (X[self.variables_] == 0).any().any():
            raise ValueError(
                "Some variables contain the value zero, can't apply reciprocal "
                "transformation."
            )
        # transform
        # for some reason reciprocal does not work with integers
        X.loc[:, self.variables_] = X.loc[:, self.variables_].astype("float")
        X.loc[:, self.variables_] = np.reciprocal(X.loc[:, self.variables_])
        return X
[docs]    def inverse_transform(self, X: pd.DataFrame) -> pd.DataFrame:
        """
        Convert the data back to the original representation.
        Parameters
        ----------
        X: Pandas DataFrame of shape = [n_samples, n_features]
            The data to be transformed.
        Raises
        ------
        TypeError
            If the input is not a Pandas DataFrame
        ValueError
            - If the variable(s) contain null values
            - If the df has different number of features than the df used in fit()
            - If some variables contain zero values
        Returns
        -------
        X: pandas dataframe
            The dataframe with the transformed variables.
        """
        # inverse_transform
        return self.transform(X)
    def _more_tags(self):
        tags_dict = _return_tags()
        # =======  this tests fail because the transformers throw an error
        # when the values are 0. Nothing to do with the test itself but
        # mostly with the data created and used in the test
        msg = (
            "transformers raise errors when data contains zeroes, thus this check fails"
        )
        tags_dict["_xfail_checks"]["check_estimators_dtypes"] = msg
        tags_dict["_xfail_checks"]["check_estimators_fit_returns_self"] = msg
        tags_dict["_xfail_checks"]["check_pipeline_consistency"] = msg
        tags_dict["_xfail_checks"]["check_estimators_overwrite_params"] = msg
        tags_dict["_xfail_checks"]["check_estimators_pickle"] = msg
        tags_dict["_xfail_checks"]["check_transformer_general"] = msg
        return tags_dict