Scorri per mostrare il menu


              123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
            
import logging
import mlflow
import mlflow.sklearn
from mlflow.models import infer_signature
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")

# Load data
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target, test_size=0.2, random_state=42
)

# Optionally set/announce experiment
experiment_name = "IrisRandomForest"
mlflow.set_experiment(experiment_name)
logging.info(f"Using experiment: {experiment_name}")

# Define different hyperparameter sets
hyperparams_list = [
    {"n_estimators": 50, "max_depth": 2},
    {"n_estimators": 100, "max_depth": 3},
    {"n_estimators": 150, "max_depth": 4}
]

for params in hyperparams_list:
    run_name = f"rf_n{params['n_estimators']}_d{params['max_depth']}"
    logging.info(f"Starting run: {run_name} with params: {params}")
    with mlflow.start_run(run_name=run_name) as run:
        clf = RandomForestClassifier(
            n_estimators=params["n_estimators"],
            max_depth=params["max_depth"],
            random_state=42
        )
        clf.fit(X_train, y_train)
        preds = clf.predict(X_test)
        acc = accuracy_score(y_test, preds)
        logging.info(f"Accuracy: {acc:.4f}")
        
        # Log hyperparameters and metrics
        mlflow.log_params(params)
        mlflow.log_metric("accuracy", acc)
        # Log the model with input example and signature to avoid warnings
        input_example = X_test[:5]
        signature = infer_signature(X_test, preds)
        mlflow.sklearn.log_model(clf, "model", input_example=input_example, signature=signature)
        run_id = run.info.run_id
        artifact_uri = mlflow.get_artifact_uri()
        logging.info(f"Run ID: {run_id}")
        logging.info(f"Artifact URI: {artifact_uri}")

Dopo aver registrato più run con diversi iperparametri, è possibile confrontare i risultati utilizzando l'interfaccia utente di MLflow o l'API. Per utilizzare l'interfaccia utente di MLflow, avviarla eseguendo mlflow ui nel terminale. L'interfaccia mostra tutte le run, i relativi parametri e metriche, consentendo di ordinare o filtrare per accuracy, valori degli iperparametri o tag. È possibile selezionare più run per confrontare le prestazioni affiancate e scegliere il modello migliore in base alla metrica di valutazione. In alternativa, è possibile utilizzare l'API Python di MLflow per cercare le run e recuperare quella migliore in modo programmatico, utile per automatizzare la selezione del modello nei flussi di lavoro di produzione.

Nota

Una denominazione e una gestione coerente dei tag delle run rendono gestibili progetti di grandi dimensioni. Utilizzare nomi descrittivi per le run e tag per tracciare lo scopo dell'esperimento, la versione del dataset o il gruppo di ricerca degli iperparametri. Questa pratica consente di identificare e confrontare rapidamente le run rilevanti man mano che il progetto cresce.

Tutto è chiaro?

Grazie per i tuoi commenti!

Sezione 1. Capitolo 6

Chieda ad AI

Chieda pure quello che desidera o provi una delle domande suggerite per iniziare la nostra conversazione

Confronto e Gestione delle Esecuzioni


              123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
            
import logging
import mlflow
import mlflow.sklearn
from mlflow.models import infer_signature
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")

# Load data
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target, test_size=0.2, random_state=42
)

# Optionally set/announce experiment
experiment_name = "IrisRandomForest"
mlflow.set_experiment(experiment_name)
logging.info(f"Using experiment: {experiment_name}")

# Define different hyperparameter sets
hyperparams_list = [
    {"n_estimators": 50, "max_depth": 2},
    {"n_estimators": 100, "max_depth": 3},
    {"n_estimators": 150, "max_depth": 4}
]

for params in hyperparams_list:
    run_name = f"rf_n{params['n_estimators']}_d{params['max_depth']}"
    logging.info(f"Starting run: {run_name} with params: {params}")
    with mlflow.start_run(run_name=run_name) as run:
        clf = RandomForestClassifier(
            n_estimators=params["n_estimators"],
            max_depth=params["max_depth"],
            random_state=42
        )
        clf.fit(X_train, y_train)
        preds = clf.predict(X_test)
        acc = accuracy_score(y_test, preds)
        logging.info(f"Accuracy: {acc:.4f}")
        
        # Log hyperparameters and metrics
        mlflow.log_params(params)
        mlflow.log_metric("accuracy", acc)
        # Log the model with input example and signature to avoid warnings
        input_example = X_test[:5]
        signature = infer_signature(X_test, preds)
        mlflow.sklearn.log_model(clf, "model", input_example=input_example, signature=signature)
        run_id = run.info.run_id
        artifact_uri = mlflow.get_artifact_uri()
        logging.info(f"Run ID: {run_id}")
        logging.info(f"Artifact URI: {artifact_uri}")

Nota

Tutto è chiaro?

Grazie per i tuoi commenti!

Sezione 1. Capitolo 6