Je gaat nu een regressiemodel bouwen met een praktijkvoorbeeld. Je hebt een bestand, `houses_simple.csv`, dat informatie bevat over huizenprijzen met de oppervlakte als kenmerk.

import pandas as pd

df = pd.read_csv('https://codefinity-content-media.s3.eu-west-1.amazonaws.com/b22d1166-efda-45e8-979e-6c3ecfc566fc/houses_simple.csv')
print(df.head())

De volgende stap is het toewijzen van variabelen en het visualiseren van de dataset:

import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('https://codefinity-content-media.s3.eu-west-1.amazonaws.com/b22d1166-efda-45e8-979e-6c3ecfc566fc/houses_simple.csv')
X = df['square_feet']
y = df['price']

plt.scatter(X, y, alpha=0.5)
plt.show()

In het voorbeeld met de lengte van een persoon was het veel eenvoudiger om je een lijn voor te stellen die goed bij de gegevens past.

Maar nu vertonen onze gegevens veel meer variatie, omdat de doelvariabele sterk afhankelijk is van andere factoren zoals leeftijd, locatie, interieur, enzovoort.
Desondanks is de opdracht om de lijn te construeren die het beste bij de beschikbare gegevens past; deze zal de trend weergeven. De `LinearRegression`-klasse uit `scikit-learn` dient hiervoor te worden gebruikt.

import unittest
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

# Helper for dynamic test names
def _dynamic_test(test_case, condition, ok_msg, fail_msg):
    if condition:
        test_case._testMethodName = ok_msg
        test_case.assertTrue(True)
    else:
        test_case._testMethodName = fail_msg
        test_case.fail(fail_msg)

class TestUserCode(unittest.TestCase):
    def test_y_is_price(self):
        import user_code
        
        condition = (
            hasattr(user_code, "y") and 
            isinstance(user_code.y, pd.Series) and 
            user_code.y.name == "price"
        )
        
        _dynamic_test(
            self, 
            condition, 
            "The `y` variable correctly contains the `price` column.", 
            "Expected `y` to be assigned as df['price']."
        )
    
    def test_X_reshaped_is_2d(self):
        import user_code
        
        condition = (
            hasattr(user_code, "X_reshaped") and 
            isinstance(user_code.X_reshaped, np.ndarray) and 
            user_code.X_reshaped.ndim == 2 and
            user_code.X_reshaped.shape[1] == 1
        )
        
        _dynamic_test(
            self, 
            condition, 
            "The `X_reshaped` is a 2D NumPy array.", 
            "Expected `X_reshaped` to be reshaped using .reshape(-1, 1)."
        )
    
    def test_regression_model_is_sklearn(self):
        import user_code
        
        condition = (
            hasattr(user_code, "regression_model") and 
            isinstance(user_code.regression_model, LinearRegression)
        )
        
        _dynamic_test(
            self, 
            condition, 
            "The model is an instance of LinearRegression.", 
            "Expected `regression_model` to be initialized as LinearRegression()."
        )
        
    def test_model_is_fitted(self):
        import user_code
        from sklearn.exceptions import NotFittedError
        
        try:
            # Check if fitted by inspecting attributes
            is_fitted = hasattr(user_code.regression_model, "coef_")
            condition = is_fitted
        except:
            condition = False
            
        _dynamic_test(
            self, 
            condition, 
            "The model is fitted.", 
            "Expected the model to be trained using the .fit() method."
        )
    
    def test_X_new_reshaped_correct(self):
        import user_code
        
        condition = (
            hasattr(user_code, "X_new_reshaped") and 
            isinstance(user_code.X_new_reshaped, np.ndarray) and 
            user_code.X_new_reshaped.shape == (3, 1)
        )
        
        _dynamic_test(
            self, 
            condition, 
            "The `X_new_reshaped` array has the correct shape (3, 1).", 
            "Expected `X_new_reshaped` to be reshaped using .reshape(-1, 1)."
        )
    
    def test_y_pred_is_correct(self):
        import user_code
        
        condition = (
            hasattr(user_code, "y_pred") and 
            isinstance(user_code.y_pred, np.ndarray) and 
            user_code.y_pred.size == 3
        )
        
        _dynamic_test(
            self, 
            condition, 
            "The `y_pred` array contains predictions.", 
            "Expected `y_pred` to be the result of the .predict() method."
        )

if __name__ == "__main__":
    unittest.main()

test_code.py

Beheers de kernalgoritmen van supervised learning en implementeer deze met Scikit-learn. Verken lineaire en polynomiale regressie voor prijsvoorspelling en maak de overstap naar classificatie met k-NN, Logistische Regressie en Beslissingsbomen. Leer modellen evalueren via cross-validatie, overfitting beheersen met regularisatie en hyperparameters optimaliseren. Bouw robuuste voorspellende systemen en definieer complexe beslissingsgrenzen voor multi-klasse classificatietaken.

Uitdaging: Het Voorspellen van Huizenprijzen

Oplossing