You will now build a real-world example regression model. You have a file, `houses_simple.csv`, that holds information about housing prices with its area as a feature.

import pandas as pd

df = pd.read_csv('https://codefinity-content-media.s3.eu-west-1.amazonaws.com/b22d1166-efda-45e8-979e-6c3ecfc566fc/houses_simple.csv')
print(df.head())

The next step is to assign variables and visualize the dataset:

import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('https://codefinity-content-media.s3.eu-west-1.amazonaws.com/b22d1166-efda-45e8-979e-6c3ecfc566fc/houses_simple.csv')
X = df['square_feet']
y = df['price']

plt.scatter(X, y, alpha=0.5)
plt.show()

In the example with a person's height, it was much easier to imagine a line fitting the data well.

But now our data has much more variance since the target highly depends on many other things like age, location, interior, etc.
Anyway, the task is to build the line that best fits the data we have; it will show the trend. The `LinearRegression` class from `scikit-learn` should be used for that.

import unittest
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

# Helper for dynamic test names
def _dynamic_test(test_case, condition, ok_msg, fail_msg):
    if condition:
        test_case._testMethodName = ok_msg
        test_case.assertTrue(True)
    else:
        test_case._testMethodName = fail_msg
        test_case.fail(fail_msg)

class TestUserCode(unittest.TestCase):
    def test_y_is_price(self):
        import user_code
        
        condition = (
            hasattr(user_code, "y") and 
            isinstance(user_code.y, pd.Series) and 
            user_code.y.name == "price"
        )
        
        _dynamic_test(
            self, 
            condition, 
            "The `y` variable correctly contains the `price` column.", 
            "Expected `y` to be assigned as df['price']."
        )
    
    def test_X_reshaped_is_2d(self):
        import user_code
        
        condition = (
            hasattr(user_code, "X_reshaped") and 
            isinstance(user_code.X_reshaped, np.ndarray) and 
            user_code.X_reshaped.ndim == 2 and
            user_code.X_reshaped.shape[1] == 1
        )
        
        _dynamic_test(
            self, 
            condition, 
            "The `X_reshaped` is a 2D NumPy array.", 
            "Expected `X_reshaped` to be reshaped using .reshape(-1, 1)."
        )
    
    def test_regression_model_is_sklearn(self):
        import user_code
        
        condition = (
            hasattr(user_code, "regression_model") and 
            isinstance(user_code.regression_model, LinearRegression)
        )
        
        _dynamic_test(
            self, 
            condition, 
            "The model is an instance of LinearRegression.", 
            "Expected `regression_model` to be initialized as LinearRegression()."
        )
        
    def test_model_is_fitted(self):
        import user_code
        from sklearn.exceptions import NotFittedError
        
        try:
            # Check if fitted by inspecting attributes
            is_fitted = hasattr(user_code.regression_model, "coef_")
            condition = is_fitted
        except:
            condition = False
            
        _dynamic_test(
            self, 
            condition, 
            "The model is fitted.", 
            "Expected the model to be trained using the .fit() method."
        )
    
    def test_X_new_reshaped_correct(self):
        import user_code
        
        condition = (
            hasattr(user_code, "X_new_reshaped") and 
            isinstance(user_code.X_new_reshaped, np.ndarray) and 
            user_code.X_new_reshaped.shape == (3, 1)
        )
        
        _dynamic_test(
            self, 
            condition, 
            "The `X_new_reshaped` array has the correct shape (3, 1).", 
            "Expected `X_new_reshaped` to be reshaped using .reshape(-1, 1)."
        )
    
    def test_y_pred_is_correct(self):
        import user_code
        
        condition = (
            hasattr(user_code, "y_pred") and 
            isinstance(user_code.y_pred, np.ndarray) and 
            user_code.y_pred.size == 3
        )
        
        _dynamic_test(
            self, 
            condition, 
            "The `y_pred` array contains predictions.", 
            "Expected `y_pred` to be the result of the .predict() method."
        )

if __name__ == "__main__":
    unittest.main()

test_code.py

Master the core algorithms of supervised learning and implement them using Scikit-learn. Explore linear and polynomial regression for price prediction, and transition into classification using k-NN, Logistic Regression, and Decision Trees. Learn to evaluate models through cross-validation, manage overfitting with regularization, and optimize hyperparameters. Build robust predictive systems and define complex decision boundaries for multi-class classification tasks.

Challenge: Predicting House Prices

Solution

Challenge: Predicting House Prices

Solution