`RandomizedSearchCV` funciona como o `GridSearchCV`, mas em vez de verificar **todas** as combinações de hiperparâmetros, avalia um **subconjunto aleatório**.
No exemplo abaixo, a grade contém 100 combinações. O `GridSearchCV` testa todas elas, enquanto o `RandomizedSearchCV` pode amostrar, por exemplo, 20 — controlado por `n_iter`. Isso torna o ajuste **mais rápido**, geralmente encontrando uma pontuação próxima da melhor.

import unittest
import pandas as pd
import numpy as np

def _dynamic_test(test_case, condition, success_message, failure_message):
    if condition:
        test_case._testMethodName = success_message
        test_case.assertTrue(True, success_message)
    else:
        test_case._testMethodName = failure_message
        test_case.fail(failure_message)

class TestGridAndRandomizedSearch(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        # ÐÐ°Ð½Ñ Ð· ÑÐ¼Ð¾Ð²Ð¸
        cls.df = pd.read_csv(
            'https://codefinity-content-media.s3.eu-west-1.amazonaws.com/a65bbc96-309e-4df9-a790-a1eb8c815a1c/penguins_pipelined.csv'
        )
        cls.X, cls.y = cls.df.drop('species', axis=1), cls.df['species']
        import user_code
        cls.uc = user_code  # Ð¾ÑÑÐºÑÑÑÑÑÑ: randomized, grid, model, param_grid, X, y

    def test_randomizedcv_initialized_with_niter_20(self):
        from sklearn.model_selection import RandomizedSearchCV
        uc = self.uc
        rnd = getattr(uc, 'randomized', None)
        cond = isinstance(rnd, RandomizedSearchCV) and getattr(rnd, 'n_iter', None) == 20
        _dynamic_test(
            self,
            cond,
            "RandomizedSearchCV initialized with n_iter=20",
            "RandomizedSearchCV must be initialized with n_iter=20"
        )

    def test_gridsearchcv_initialized(self):
        from sklearn.model_selection import GridSearchCV
        uc = self.uc
        grd = getattr(uc, 'grid', None)
        cond = isinstance(grd, GridSearchCV)
        _dynamic_test(
            self,
            cond,
            "GridSearchCV initialized with provided param_grid",
            "GridSearchCV must be initialized with provided param_grid"
        )

    def test_param_space_keys(self):
        uc = self.uc
        expected_keys = {'n_neighbors', 'weights', 'p'}
        # Ð£ RandomizedSearchCV Ð¿Ð°ÑÐ°Ð¼ÐµÑÑÐ¸ Ð¼Ð¾Ð¶ÑÑÑ Ð·Ð±ÐµÑÑÐ³Ð°ÑÐ¸ÑÑ ÑÐº param_distributions
        rnd_space = getattr(uc.randomized, 'param_distributions', None)
        grd_space = getattr(uc.grid, 'param_grid', None)
        rnd_ok = isinstance(rnd_space, dict) and set(rnd_space.keys()) == expected_keys
        grd_ok = isinstance(grd_space, dict) and set(grd_space.keys()) == expected_keys
        cond = rnd_ok and grd_ok
        _dynamic_test(
            self,
            cond,
            "Hyperparameter space includes n_neighbors, weights, p for both searches",
            "Hyperparameter space must include n_neighbors, weights, p for both searches"
        )

    def test_estimator_is_knn(self):
        from sklearn.neighbors import KNeighborsClassifier
        uc = self.uc
        is_knn_rnd = isinstance(getattr(uc.randomized, 'estimator', None), KNeighborsClassifier)
        is_knn_grd = isinstance(getattr(uc.grid, 'estimator', None), KNeighborsClassifier)
        cond = is_knn_rnd and is_knn_grd
        _dynamic_test(
            self,
            cond,
            "Both searches use KNeighborsClassifier as estimator",
            "Both searches must use KNeighborsClassifier as estimator"
        )

    def test_both_searches_fitted_and_have_best_attributes(self):
        uc = self.uc
        try:
            attrs_ok = all(
                hasattr(obj, 'best_estimator_') and hasattr(obj, 'best_score_') and hasattr(obj, 'cv_results_')
                for obj in (uc.randomized, uc.grid)
            )
        except Exception:
            attrs_ok = False
        _dynamic_test(
            self,
            attrs_ok,
            "Both searches are fitted (best_estimator_, best_score_, cv_results_ are available)",
            "Both searches must be fitted (best_estimator_, best_score_, cv_results_ must be available)"
        )

    def test_grid_and_randomized_scores_are_finite(self):
        uc = self.uc
        try:
            grid_score = float(uc.grid.best_score_)
            rnd_score = float(uc.randomized.best_score_)
            cond = np.isfinite(grid_score) and np.isfinite(rnd_score)
        except Exception:
            cond = False
        _dynamic_test(
            self,
            cond,
            "Best scores of GridSearchCV and RandomizedSearchCV are finite numbers",
            "Best scores of GridSearchCV and RandomizedSearchCV must be finite numbers"
        )

if __name__ == "__main__":
    unittest.main()


test_code.py

Tente executar o código várias vezes. O `RandomizedSearchCV` pode igualar a pontuação do grid search quando amostra aleatoriamente os **melhores** hiperparâmetros.


Nota

O aprendizado de máquina está presente em todos os lugares atualmente. Quer aprender por conta própria? Este curso é uma introdução ao mundo do aprendizado de máquina para que você compreenda os conceitos básicos, trabalhe com o Scikit-learn – a biblioteca mais popular para ML – e desenvolva seu primeiro projeto de aprendizado de máquina.
Este curso é destinado a estudantes com conhecimentos básicos em Python, Pandas e Numpy.

Aprenda os conceitos de Machine Learning e o fluxo de trabalho de projetos de ML.

O pré-processamento é provavelmente a etapa mais importante de um projeto de ML. Este capítulo aborda as etapas de pré-processamento necessárias para praticamente qualquer conjunto de dados.

Um pipeline é uma maneira organizada de combinar todas as etapas de pré-processamento, bem como um modelo. Pipelines facilitam muito o treinamento e a utilização de um modelo.

A modelagem é a etapa mais divertida de um projeto de ML. Vamos aprender a construir, ajustar e avaliar o modelo!

Desafio: Ajuste de Hiperparâmetros com RandomizedSearchCV

Solução