ballet primitives and pipelines

In lieu of a better documentation of the ML primitives and ML pipelines available in Ballet, we just show the JSON annotations. There is no comprehensive reference on the primitive and pipeline format, but see primitives reference and pipelines reference.

primitives

ballet.engineer_features

{
    "name": "ballet.engineer_features",
    "contributors": [
        "Micah Smith <micahs@mit.edu>"
    ],
    "documentation": "https://ballet.github.io/ballet/mlp_reference.html#ballet-engineer-features",
    "description": "Applies the feature engineering pipeline from the given Ballet project",
    "classifiers": {
        "type": "preprocessor",
        "subtype": "transformer"
    },
    "modalities": [],
    "primitive": "ballet.mlprimitives.make_engineer_features",
    "fit": {
        "method": "fit",
        "args": [
            {
                "name": "X",
                "type": "pandas.DataFrame"
            },
            {
                "name": "y",
                "type": "pandas.DataFrame"
            }
        ]
    },
    "produce": {
        "method": "transform",
        "args": [
            {
                "name": "X",
                "type": "pandas.DataFrame"
            }
        ],
        "output": [
            {
                "name": "X",
                "type": "pandas.DataFrame"
            }
        ]
    },
    "hyperparameters": {}
}

ballet.encode_target

{
    "name": "ballet.encode_target",
    "contributors": [
        "Micah Smith <micahs@mit.edu>"
    ],
    "documentation": "https://ballet.github.io/ballet/mlp_reference.html#ballet-encode-target",
    "description": "Applies the target encoder from the given Ballet project",
    "classifiers": {
        "type": "preprocessor",
        "subtype": "data_cleanup"
    },
    "modalities": [],
    "primitive": "ballet.mlprimitives.make_encode_target",
    "fit": {
        "method": "fit",
        "args": [
            {
                "name": "y",
                "type": "pandas.DataFrame"
            }
        ]
    },
    "produce": {
        "method": "transform",
        "args": [
            {
                "name": "y",
                "default": null,
                "type": "pandas.DataFrame"
            }
        ],
        "output": [
            {
                "name": "y",
                "type": "ndarray"
            }
        ]
    },
    "hyperparameters": {}
}

ballet.drop_missing_targets

{
    "name": "ballet.drop_missing_targets",
    "contributors": [
        "Micah Smith <micahs@mit.edu>"
    ],
    "documentation": "https://ballet.github.io/ballet/mlp_reference.html#ballet-drop-missing-targets",
    "description": "Drops rows from X and y that have missing values in y",
    "classifiers": {
        "type": "preprocessor",
        "subtype": "data_cleanup"
    },
    "modalities": [],
    "primitive": "ballet.mlprimitives.DropMissingTargets",
    "fit": {
        "method": "fit",
        "args": [
            {
                "name": "X",
                "type": "array"
            },
            {
                "name": "y",
                "type": "array"
            }
        ]
    },
    "produce": {
        "method": "transform",
        "args": [
            {
                "name": "X",
                "type": "array"
            },
            {
                "name": "y",
                "type": "array",
                "default": null
            }
        ],
        "output": [
            {
                "name": "X",
                "type": "array"
            },
            {
                "name": "y",
                "type": "array"
            }
        ]
    }
}

pipelines

ballet_rf_classifier

{
    "metadata": {
        "name": "ballet_rf_classifier",
        "data_type": "single_table",
        "task_type": "classification"
    },
    "primitives": [
        "ballet.engineer_features",
        "ballet.encode_target",
        "sklearn.ensemble.RandomForestClassifier"
    ]
}

ballet_rf_regressor

{
    "metadata": {
        "name": "ballet_rf_regressor",
        "data_type": "single_table",
        "task_type": "regression"
    },
    "primitives": [
        "ballet.engineer_features",
        "ballet.encode_target",
        "sklearn.ensemble.RandomForestRegressor"
    ]
}