sklearn/examples/miscellaneous/plot_pipeline_display.py

"""
=================================================================
Displaying Pipelines
=================================================================

The default configuration for displaying a pipeline in a Jupyter Notebook is
`'diagram'` where `set_config(display='diagram')`. To deactivate HTML representation,
use `set_config(display='text')`.

To see more detailed steps in the visualization of the pipeline, click on the
steps in the pipeline.
"""

# %%
# Displaying a Pipeline with a Preprocessing Step and Classifier
################################################################################
# This section constructs a :class:`~sklearn.pipeline.Pipeline` with a preprocessing
# step, :class:`~sklearn.preprocessing.StandardScaler`, and classifier,
# :class:`~sklearn.linear_model.LogisticRegression`, and displays its visual
# representation.

from sklearn import set_config
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

steps = [
    ("preprocessing", StandardScaler()),
    ("classifier", LogisticRegression()),
]
pipe = Pipeline(steps)

# %%
# To visualize the diagram, the default is `display='diagram'`.
set_config(display="diagram")
pipe  # click on the diagram below to see the details of each step

# %%
# To view the text pipeline, change to `display='text'`.
set_config(display="text")
pipe

# %%
# Put back the default display
set_config(display="diagram")

# %%
# Displaying a Pipeline Chaining Multiple Preprocessing Steps & Classifier
################################################################################
# This section constructs a :class:`~sklearn.pipeline.Pipeline` with multiple
# preprocessing steps, :class:`~sklearn.preprocessing.PolynomialFeatures` and
# :class:`~sklearn.preprocessing.StandardScaler`, and a classifier step,
# :class:`~sklearn.linear_model.LogisticRegression`, and displays its visual
# representation.

from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures, StandardScaler

steps = [
    ("standard_scaler", StandardScaler()),
    ("polynomial", PolynomialFeatures(degree=3)),
    ("classifier", LogisticRegression(C=2.0)),
]
pipe = Pipeline(steps)
pipe  # click on the diagram below to see the details of each step

# %%
# Displaying a Pipeline and Dimensionality Reduction and Classifier
################################################################################
# This section constructs a :class:`~sklearn.pipeline.Pipeline` with a
# dimensionality reduction step, :class:`~sklearn.decomposition.PCA`,
# a classifier, :class:`~sklearn.svm.SVC`, and displays its visual
# representation.

from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC

steps = [("reduce_dim", PCA(n_components=4)), ("classifier", SVC(kernel="linear"))]
pipe = Pipeline(steps)
pipe  # click on the diagram below to see the details of each step

# %%
# Displaying a Complex Pipeline Chaining a Column Transformer
################################################################################
# This section constructs a complex :class:`~sklearn.pipeline.Pipeline` with a
# :class:`~sklearn.compose.ColumnTransformer` and a classifier,
# :class:`~sklearn.linear_model.LogisticRegression`, and displays its visual
# representation.

import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler

numeric_preprocessor = Pipeline(
    steps=[
        ("imputation_mean", SimpleImputer(missing_values=np.nan, strategy="mean")),
        ("scaler", StandardScaler()),
    ]
)

categorical_preprocessor = Pipeline(
    steps=[
        (
            "imputation_constant",
            SimpleImputer(fill_value="missing", strategy="constant"),
        ),
        ("onehot", OneHotEncoder(handle_unknown="ignore")),
    ]
)

preprocessor = ColumnTransformer(
    [
        ("categorical", categorical_preprocessor, ["state", "gender"]),
        ("numerical", numeric_preprocessor, ["age", "weight"]),
    ]
)

pipe = make_pipeline(preprocessor, LogisticRegression(max_iter=500))
pipe  # click on the diagram below to see the details of each step

# %%
# Displaying a Grid Search over a Pipeline with a Classifier
################################################################################
# This section constructs a :class:`~sklearn.model_selection.GridSearchCV`
# over a :class:`~sklearn.pipeline.Pipeline` with
# :class:`~sklearn.ensemble.RandomForestClassifier` and displays its visual
# representation.

import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler

numeric_preprocessor = Pipeline(
    steps=[
        ("imputation_mean", SimpleImputer(missing_values=np.nan, strategy="mean")),
        ("scaler", StandardScaler()),
    ]
)

categorical_preprocessor = Pipeline(
    steps=[
        (
            "imputation_constant",
            SimpleImputer(fill_value="missing", strategy="constant"),
        ),
        ("onehot", OneHotEncoder(handle_unknown="ignore")),
    ]
)

preprocessor = ColumnTransformer(
    [
        ("categorical", categorical_preprocessor, ["state", "gender"]),
        ("numerical", numeric_preprocessor, ["age", "weight"]),
    ]
)

pipe = Pipeline(
    steps=[("preprocessor", preprocessor), ("classifier", RandomForestClassifier())]
)

param_grid = {
    "classifier__n_estimators": [200, 500],
    "classifier__max_features": ["auto", "sqrt", "log2"],
    "classifier__max_depth": [4, 5, 6, 7, 8],
    "classifier__criterion": ["gini", "entropy"],
}

grid_search = GridSearchCV(pipe, param_grid=param_grid, n_jobs=1)
grid_search  # click on the diagram below to see the details of each step
first commit 2024-08-05 09:32:03 +02:00			`"""`
			`=================================================================`
			`Displaying Pipelines`
			`=================================================================`

			`The default configuration for displaying a pipeline in a Jupyter Notebook is`
			`'diagram'` where `set_config(display='diagram')`. To deactivate HTML representation,
			use `set_config(display='text')`.

			`To see more detailed steps in the visualization of the pipeline, click on the`
			`steps in the pipeline.`
			`"""`

			`# %%`
			`# Displaying a Pipeline with a Preprocessing Step and Classifier`
			`################################################################################`
			# This section constructs a :class:`~sklearn.pipeline.Pipeline` with a preprocessing
			# step, :class:`~sklearn.preprocessing.StandardScaler`, and classifier,
			# :class:`~sklearn.linear_model.LogisticRegression`, and displays its visual
			`# representation.`

			`from sklearn import set_config`
			`from sklearn.linear_model import LogisticRegression`
			`from sklearn.pipeline import Pipeline`
			`from sklearn.preprocessing import StandardScaler`

			`steps = [`
			`("preprocessing", StandardScaler()),`
			`("classifier", LogisticRegression()),`
			`]`
			`pipe = Pipeline(steps)`

			`# %%`
			# To visualize the diagram, the default is `display='diagram'`.
			`set_config(display="diagram")`
			`pipe # click on the diagram below to see the details of each step`

			`# %%`
			# To view the text pipeline, change to `display='text'`.
			`set_config(display="text")`
			`pipe`

			`# %%`
			`# Put back the default display`
			`set_config(display="diagram")`

			`# %%`
			`# Displaying a Pipeline Chaining Multiple Preprocessing Steps & Classifier`
			`################################################################################`
			# This section constructs a :class:`~sklearn.pipeline.Pipeline` with multiple
			# preprocessing steps, :class:`~sklearn.preprocessing.PolynomialFeatures` and
			# :class:`~sklearn.preprocessing.StandardScaler`, and a classifier step,
			# :class:`~sklearn.linear_model.LogisticRegression`, and displays its visual
			`# representation.`

			`from sklearn.linear_model import LogisticRegression`
			`from sklearn.pipeline import Pipeline`
			`from sklearn.preprocessing import PolynomialFeatures, StandardScaler`

			`steps = [`
			`("standard_scaler", StandardScaler()),`
			`("polynomial", PolynomialFeatures(degree=3)),`
			`("classifier", LogisticRegression(C=2.0)),`
			`]`
			`pipe = Pipeline(steps)`
			`pipe # click on the diagram below to see the details of each step`

			`# %%`
			`# Displaying a Pipeline and Dimensionality Reduction and Classifier`
			`################################################################################`
			# This section constructs a :class:`~sklearn.pipeline.Pipeline` with a
			# dimensionality reduction step, :class:`~sklearn.decomposition.PCA`,
			# a classifier, :class:`~sklearn.svm.SVC`, and displays its visual
			`# representation.`

			`from sklearn.decomposition import PCA`
			`from sklearn.pipeline import Pipeline`
			`from sklearn.svm import SVC`

			`steps = [("reduce_dim", PCA(n_components=4)), ("classifier", SVC(kernel="linear"))]`
			`pipe = Pipeline(steps)`
			`pipe # click on the diagram below to see the details of each step`

			`# %%`
			`# Displaying a Complex Pipeline Chaining a Column Transformer`
			`################################################################################`
			# This section constructs a complex :class:`~sklearn.pipeline.Pipeline` with a
			# :class:`~sklearn.compose.ColumnTransformer` and a classifier,
			# :class:`~sklearn.linear_model.LogisticRegression`, and displays its visual
			`# representation.`

			`import numpy as np`

			`from sklearn.compose import ColumnTransformer`
			`from sklearn.impute import SimpleImputer`
			`from sklearn.linear_model import LogisticRegression`
			`from sklearn.pipeline import Pipeline, make_pipeline`
			`from sklearn.preprocessing import OneHotEncoder, StandardScaler`

			`numeric_preprocessor = Pipeline(`
			`steps=[`
			`("imputation_mean", SimpleImputer(missing_values=np.nan, strategy="mean")),`
			`("scaler", StandardScaler()),`
			`]`
			`)`

			`categorical_preprocessor = Pipeline(`
			`steps=[`
			`(`
			`"imputation_constant",`
			`SimpleImputer(fill_value="missing", strategy="constant"),`
			`),`
			`("onehot", OneHotEncoder(handle_unknown="ignore")),`
			`]`
			`)`

			`preprocessor = ColumnTransformer(`
			`[`
			`("categorical", categorical_preprocessor, ["state", "gender"]),`
			`("numerical", numeric_preprocessor, ["age", "weight"]),`
			`]`
			`)`

			`pipe = make_pipeline(preprocessor, LogisticRegression(max_iter=500))`
			`pipe # click on the diagram below to see the details of each step`

			`# %%`
			`# Displaying a Grid Search over a Pipeline with a Classifier`
			`################################################################################`
			# This section constructs a :class:`~sklearn.model_selection.GridSearchCV`
			# over a :class:`~sklearn.pipeline.Pipeline` with
			# :class:`~sklearn.ensemble.RandomForestClassifier` and displays its visual
			`# representation.`

			`import numpy as np`

			`from sklearn.compose import ColumnTransformer`
			`from sklearn.ensemble import RandomForestClassifier`
			`from sklearn.impute import SimpleImputer`
			`from sklearn.model_selection import GridSearchCV`
			`from sklearn.pipeline import Pipeline, make_pipeline`
			`from sklearn.preprocessing import OneHotEncoder, StandardScaler`

			`numeric_preprocessor = Pipeline(`
			`steps=[`
			`("imputation_mean", SimpleImputer(missing_values=np.nan, strategy="mean")),`
			`("scaler", StandardScaler()),`
			`]`
			`)`

			`categorical_preprocessor = Pipeline(`
			`steps=[`
			`(`
			`"imputation_constant",`
			`SimpleImputer(fill_value="missing", strategy="constant"),`
			`),`
			`("onehot", OneHotEncoder(handle_unknown="ignore")),`
			`]`
			`)`

			`preprocessor = ColumnTransformer(`
			`[`
			`("categorical", categorical_preprocessor, ["state", "gender"]),`
			`("numerical", numeric_preprocessor, ["age", "weight"]),`
			`]`
			`)`

			`pipe = Pipeline(`
			`steps=[("preprocessor", preprocessor), ("classifier", RandomForestClassifier())]`
			`)`

			`param_grid = {`
			`"classifier__n_estimators": [200, 500],`
			`"classifier__max_features": ["auto", "sqrt", "log2"],`
			`"classifier__max_depth": [4, 5, 6, 7, 8],`
			`"classifier__criterion": ["gini", "entropy"],`
			`}`

			`grid_search = GridSearchCV(pipe, param_grid=param_grid, n_jobs=1)`
			`grid_search # click on the diagram below to see the details of each step`