`RandomizedSearchCV` funciona como `GridSearchCV`, pero en lugar de comprobar **todas** las combinaciones de hiperparámetros, evalúa un **subconjunto aleatorio**.
En el ejemplo siguiente, la cuadrícula contiene 100 combinaciones. `GridSearchCV` prueba todas ellas, mientras que `RandomizedSearchCV` puede muestrear, por ejemplo, 20 — controlado por `n_iter`. Esto hace que el ajuste sea **más rápido**, y normalmente encuentra una puntuación cercana a la mejor.

import unittest
import pandas as pd
import numpy as np

def _dynamic_test(test_case, condition, success_message, failure_message):
    if condition:
        test_case._testMethodName = success_message
        test_case.assertTrue(True, success_message)
    else:
        test_case._testMethodName = failure_message
        test_case.fail(failure_message)

class TestGridAndRandomizedSearch(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        # ÐÐ°Ð½Ñ Ð· ÑÐ¼Ð¾Ð²Ð¸
        cls.df = pd.read_csv(
            'https://codefinity-content-media.s3.eu-west-1.amazonaws.com/a65bbc96-309e-4df9-a790-a1eb8c815a1c/penguins_pipelined.csv'
        )
        cls.X, cls.y = cls.df.drop('species', axis=1), cls.df['species']
        import user_code
        cls.uc = user_code  # Ð¾ÑÑÐºÑÑÑÑÑÑ: randomized, grid, model, param_grid, X, y

    def test_randomizedcv_initialized_with_niter_20(self):
        from sklearn.model_selection import RandomizedSearchCV
        uc = self.uc
        rnd = getattr(uc, 'randomized', None)
        cond = isinstance(rnd, RandomizedSearchCV) and getattr(rnd, 'n_iter', None) == 20
        _dynamic_test(
            self,
            cond,
            "RandomizedSearchCV initialized with n_iter=20",
            "RandomizedSearchCV must be initialized with n_iter=20"
        )

    def test_gridsearchcv_initialized(self):
        from sklearn.model_selection import GridSearchCV
        uc = self.uc
        grd = getattr(uc, 'grid', None)
        cond = isinstance(grd, GridSearchCV)
        _dynamic_test(
            self,
            cond,
            "GridSearchCV initialized with provided param_grid",
            "GridSearchCV must be initialized with provided param_grid"
        )

    def test_param_space_keys(self):
        uc = self.uc
        expected_keys = {'n_neighbors', 'weights', 'p'}
        # Ð£ RandomizedSearchCV Ð¿Ð°ÑÐ°Ð¼ÐµÑÑÐ¸ Ð¼Ð¾Ð¶ÑÑÑ Ð·Ð±ÐµÑÑÐ³Ð°ÑÐ¸ÑÑ ÑÐº param_distributions
        rnd_space = getattr(uc.randomized, 'param_distributions', None)
        grd_space = getattr(uc.grid, 'param_grid', None)
        rnd_ok = isinstance(rnd_space, dict) and set(rnd_space.keys()) == expected_keys
        grd_ok = isinstance(grd_space, dict) and set(grd_space.keys()) == expected_keys
        cond = rnd_ok and grd_ok
        _dynamic_test(
            self,
            cond,
            "Hyperparameter space includes n_neighbors, weights, p for both searches",
            "Hyperparameter space must include n_neighbors, weights, p for both searches"
        )

    def test_estimator_is_knn(self):
        from sklearn.neighbors import KNeighborsClassifier
        uc = self.uc
        is_knn_rnd = isinstance(getattr(uc.randomized, 'estimator', None), KNeighborsClassifier)
        is_knn_grd = isinstance(getattr(uc.grid, 'estimator', None), KNeighborsClassifier)
        cond = is_knn_rnd and is_knn_grd
        _dynamic_test(
            self,
            cond,
            "Both searches use KNeighborsClassifier as estimator",
            "Both searches must use KNeighborsClassifier as estimator"
        )

    def test_both_searches_fitted_and_have_best_attributes(self):
        uc = self.uc
        try:
            attrs_ok = all(
                hasattr(obj, 'best_estimator_') and hasattr(obj, 'best_score_') and hasattr(obj, 'cv_results_')
                for obj in (uc.randomized, uc.grid)
            )
        except Exception:
            attrs_ok = False
        _dynamic_test(
            self,
            attrs_ok,
            "Both searches are fitted (best_estimator_, best_score_, cv_results_ are available)",
            "Both searches must be fitted (best_estimator_, best_score_, cv_results_ must be available)"
        )

    def test_grid_and_randomized_scores_are_finite(self):
        uc = self.uc
        try:
            grid_score = float(uc.grid.best_score_)
            rnd_score = float(uc.randomized.best_score_)
            cond = np.isfinite(grid_score) and np.isfinite(rnd_score)
        except Exception:
            cond = False
        _dynamic_test(
            self,
            cond,
            "Best scores of GridSearchCV and RandomizedSearchCV are finite numbers",
            "Best scores of GridSearchCV and RandomizedSearchCV must be finite numbers"
        )

if __name__ == "__main__":
    unittest.main()


test_code.py

Intenta ejecutar el código varias veces. `RandomizedSearchCV` puede igualar la puntuación de grid search cuando selecciona aleatoriamente los **mejores** hiperparámetros.


Nota

El aprendizaje automático se utiliza actualmente en todas partes. ¿Quieres aprenderlo por ti mismo? Este curso es una introducción al mundo del aprendizaje automático para que puedas aprender los conceptos básicos, trabajar con Scikit-learn – la biblioteca más popular para ML – y construir tu primer proyecto de aprendizaje automático.
Este curso está dirigido a estudiantes con conocimientos básicos de Python, Pandas y Numpy.

Aprenda los conceptos de Machine Learning y el flujo de trabajo de un proyecto de ML.

El preprocesamiento es probablemente la etapa más importante de un proyecto de ML. Este capítulo abarca los pasos de preprocesamiento necesarios para casi cualquier conjunto de datos.

Una tubería es una forma ordenada de combinar todos los pasos de preprocesamiento junto con un modelo. Las tuberías facilitan considerablemente el entrenamiento y uso de un modelo.

El modelado es la etapa más divertida de un proyecto de ML. Aprendamos a construir, ajustar y evaluar el modelo.

Desafío: Ajuste de Hiperparámetros con RandomizedSearchCV

Solución