Now you'll compare the models we've covered using a single dataset — the **breast cancer dataset**. The target variable is the `'diagnosis'` column, where `1` represents malignant and `0` represents benign cases.

You will apply `GridSearchCV` to each model to find the best parameters. In this task, you'll use **recall** as the scoring metric because **minimizing false negatives** is crucial. To have `GridSearchCV` select the best parameters based on recall, set `scoring='recall'`.

import unittest
import importlib


def _dynamic_test(test_case, condition, success_message, failure_message):
    if condition:
        test_case._testMethodName = success_message
        test_case.assertTrue(True, success_message)
    else:
        test_case._testMethodName = failure_message
        test_case.fail(failure_message)


class TestUserCode(unittest.TestCase):

    def test_knn_params_is_correct(self):
        import user_code

        expected_value = {'n_neighbors': [3, 5, 7, 12]}

        variable = 'knn_params'
        actual_value = getattr(user_code, variable, None)
        if actual_value is None:
            condition = False
            failure_message = f"The `{variable}` variable is not declared."
        elif isinstance(actual_value, dict):
            condition = actual_value == expected_value
            failure_message = f"Expected `{variable}` to contain `{expected_value}`, but got `{actual_value}`."
        else:
            condition = False
            failure_message = f"`{variable}` is not a `dict`."

        _dynamic_test(
            self,
            condition,
            f"`{variable}` contains the correct values.",
            failure_message
        )

    def test_lr_params_is_correct(self):
        import user_code

        expected_value = {'C': [0.1, 1, 10]}

        variable = 'lr_params'
        actual_value = getattr(user_code, variable, None)
        if actual_value is None:
            condition = False
            failure_message = f"The `{variable}` variable is not declared."
        elif isinstance(actual_value, dict):
            condition = actual_value == expected_value
            failure_message = f"Expected `{variable}` to contain `{expected_value}`, but got `{actual_value}`."
        else:
            condition = False
            failure_message = f"`{variable}` is not a `dict`."

        _dynamic_test(
            self,
            condition,
            f"`{variable}` contains the correct values.",
            failure_message
        )

    def test_dt_params_is_correct(self):
        import user_code

        expected_value = {'max_depth': [2, 4, 6, 10], 'min_samples_leaf': [1, 2, 4, 7]}

        variable = 'dt_params'
        actual_value = getattr(user_code, variable, None)
        if actual_value is None:
            condition = False
            failure_message = f"The `{variable}` variable is not declared."
        elif isinstance(actual_value, dict):
            condition = actual_value == expected_value
            failure_message = f"Expected `{variable}` to contain `{expected_value}`, but got `{actual_value}`."
        else:
            condition = False
            failure_message = f"`{variable}` is not a `dict`."

        _dynamic_test(
            self,
            condition,
            f"`{variable}` contains the correct values.",
            failure_message
        )

    def test_rf_params_is_correct(self):
        import user_code

        expected_value = {'max_depth': [2, 4, 6], 'n_estimators': [20, 50, 100]}

        variable = 'rf_params'
        actual_value = getattr(user_code, variable, None)
        if actual_value is None:
            condition = False
            failure_message = f"The `{variable}` variable is not declared."
        elif isinstance(actual_value, dict):
            condition = actual_value == expected_value
            failure_message = f"Expected `{variable}` to contain `{expected_value}`, but got `{actual_value}`."
        else:
            condition = False
            failure_message = f"`{variable}` is not a `dict`."

        _dynamic_test(
            self,
            condition,
            f"`{variable}` contains the correct values.",
            failure_message
        )

    def test_knn_grid_is_correct(self):
        import user_code
        from sklearn.model_selection import GridSearchCV
        from sklearn.neighbors import KNeighborsClassifier

        param_grid = {'n_neighbors': [3, 5, 7, 12]}

        variable = 'knn_grid'
        actual_value = getattr(user_code, variable, None)
        if actual_value is None:
            condition = False
            failure_message = f"The `{variable}` variable is not declared."
        elif isinstance(actual_value, GridSearchCV):
            condition = isinstance(actual_value.estimator,
                                   KNeighborsClassifier) and actual_value.param_grid == param_grid
            failure_message = f"Expected `{variable}` to be a `GridSearchCV` with `estimator=KNeighborsClassifier()`, `param_grid={param_grid}`, but got `estimator={actual_value.estimator}`, `param_grid={actual_value.param_grid}`."
        else:
            condition = False
            failure_message = f"`{variable}` is not a `GridSearchCV`."

        _dynamic_test(
            self,
            condition,
            f"`{variable}` is a `GridSearchCV` with `estimator=knn` and `param_grid=knn_grid`.",
            failure_message
        )

    def test_lr_grid_is_correct(self):
        import user_code
        from sklearn.model_selection import GridSearchCV
        from sklearn.linear_model import LogisticRegression

        param_grid = {'C': [0.1, 1, 10]}

        variable = 'lr_grid'
        actual_value = getattr(user_code, variable, None)
        if actual_value is None:
            condition = False
            failure_message = f"The `{variable}` variable is not declared."
        elif isinstance(actual_value, GridSearchCV):
            condition = isinstance(actual_value.estimator,
                                   LogisticRegression) and actual_value.param_grid == param_grid
            failure_message = f"Expected `{variable}` to be a `GridSearchCV` with `estimator=LogisticRegression()`, `param_grid={param_grid}`, but got `estimator={actual_value.estimator}`, `param_grid={actual_value.param_grid}`."
        else:
            condition = False
            failure_message = f"`{variable}` is not a `GridSearchCV`."

        _dynamic_test(
            self,
            condition,
            f"`{variable}` is a `GridSearchCV` with `estimator=lr` and `param_grid=lr_grid`.",
            failure_message
        )

    def test_dt_grid_is_correct(self):
        import user_code
        from sklearn.model_selection import GridSearchCV
        from sklearn.tree import DecisionTreeClassifier

        param_grid = {'max_depth': [2, 4, 6, 10], 'min_samples_leaf': [1, 2, 4, 7]}

        variable = 'dt_grid'
        actual_value = getattr(user_code, variable, None)
        if actual_value is None:
            condition = False
            failure_message = f"The `{variable}` variable is not declared."
        elif isinstance(actual_value, GridSearchCV):
            condition = isinstance(actual_value.estimator,
                                   DecisionTreeClassifier) and actual_value.param_grid == param_grid
            failure_message = f"Expected `{variable}` to be a `GridSearchCV` with `estimator=DecisionTreeClassifier()`, `param_grid={param_grid}`, but got `estimator={actual_value.estimator}`, `param_grid={actual_value.param_grid}`."
        else:
            condition = False
            failure_message = f"`{variable}` is not a `GridSearchCV`."

        _dynamic_test(
            self,
            condition,
            f"`{variable}` is a `GridSearchCV` with `estimator=dt` and `param_grid=dt_grid`.",
            failure_message
        )

    def test_rf_grid_is_correct(self):
        import user_code
        from sklearn.model_selection import GridSearchCV
        from sklearn.ensemble import RandomForestClassifier

        param_grid = {'max_depth': [2, 4, 6], 'n_estimators': [20, 50, 100]}

        variable = 'rf_grid'
        actual_value = getattr(user_code, variable, None)
        if actual_value is None:
            condition = False
            failure_message = f"The `{variable}` variable is not declared."
        elif isinstance(actual_value, GridSearchCV):
            condition = isinstance(actual_value.estimator,
                                   RandomForestClassifier) and actual_value.param_grid == param_grid
            failure_message = f"Expected `{variable}` to be a `GridSearchCV` with `estimator=RandomForestClassifier()`, `param_grid={param_grid}`, but got `estimator={actual_value.estimator}`, `param_grid={actual_value.param_grid}`."
        else:
            condition = False
            failure_message = f"`{variable}` is not a `GridSearchCV`."

        _dynamic_test(
            self,
            condition,
            f"`{variable}` is a `GridSearchCV` with `estimator=rf` and `param_grid=rf_grid`.",
            failure_message
        )


if __name__ == '__main__':
    unittest.main()

test_main.py

Master the core classification algorithms that power modern machine learning. Explore how models like k-NN, logistic regression, decision trees, and random forests make predictions, evaluate their accuracy, and understand when to use each. Build the skills to compare models and choose the best one for your data.

Discover how the k-nearest neighbors algorithm makes predictions based on similarity. Learn to handle multiple features, tune parameters, and apply cross-validation to improve accuracy.

Understand how logistic regression models probabilities and classifies outcomes. Practice implementing it, interpreting decision boundaries, and applying regularization to prevent overfitting.

Learn how decision trees split data into meaningful groups based on feature values. Explore how parameters like tree depth and minimum samples per leaf affect model performance and generalization.

Explore how random forests combine multiple decision trees to improve accuracy and robustness. Understand the role of randomness and apply this ensemble method to real-world data.

Evaluate models using metrics such as accuracy, precision, recall, and F1-score. Learn to interpret confusion matrices and compare multiple classifiers to identify the best-performing model.

Challenge: Comparing Models

Solution

Challenge: Comparing Models

Solution