import unittest
import importlib
import numpy as np
import pandas as pd
import seaborn as sns

def _dynamic_test(test_case, condition, success_msg, failure_msg):
    if condition:
        test_case._testMethodName = success_msg
        test_case.assertTrue(True, success_msg)
    else:
        test_case._testMethodName = failure_msg
        test_case.fail(failure_msg)

class TestTitanicPreprocessing(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.user_code = importlib.import_module("user_code")
        cls.data = cls.user_code.processed_data

    def test_no_missing_values(self):
        """Check that no missing values remain after preprocessing."""
        cond = not self.data.isnull().values.any()
        _dynamic_test(
            self,
            cond,
            "All missing values were successfully handled.",
            "There are still missing values in the processed dataset."
        )

    def test_categorical_encoding(self):
        """Check that categorical encoding via get_dummies() was applied."""
        cols = self.data.columns
        cond = any("sex_" in c for c in cols) and any("embarked_" in c for c in cols)
        _dynamic_test(
            self,
            cond,
            "Categorical columns successfully encoded with get_dummies().",
            "Categorical columns 'sex' or 'embarked' were not properly encoded."
        )

    def test_standard_scaling(self):
        """Check that 'age' and 'fare' were scaled correctly."""
        df = self.data
        mean_age = round(df["age_scaled"].mean(), 1)
        mean_fare = round(df["fare_scaled"].mean(), 1)
        std_age = round(df["age_scaled"].std(), 1)
        std_fare = round(df["fare_scaled"].std(), 1)
        cond = abs(mean_age) < 0.2 and abs(mean_fare) < 0.2 and 0.8 < std_age < 1.2 and 0.8 < std_fare < 1.2
        _dynamic_test(
            self,
            cond,
            "'age' and 'fare' successfully standardized (mean≈0, std≈1).",
            "'age_scaled' or 'fare_scaled' not standardized correctly."
        )

    def test_family_size_feature(self):
        """Check that 'family_size' feature was added correctly."""
        df = self.data
        cond = "family_size" in df.columns and (
            df["family_size"] == df["sibsp"] + df["parch"] + 1
        ).all()
        _dynamic_test(
            self,
            cond,
            "'family_size' feature successfully created and computed.",
            "'family_size' feature missing or incorrectly calculated."
        )

    def test_pipeline_function_exists(self):
        """Check that preprocess_titanic() function exists and returns DataFrame."""
        func = getattr(self.user_code, "preprocess_titanic", None)
        cond = callable(func) and isinstance(self.data, pd.DataFrame)
        _dynamic_test(
            self,
            cond,
            "preprocess_titanic() function successfully defined and returns a DataFrame.",
            "preprocess_titanic() missing or incorrect return type."
        )

if __name__ == "__main__":
    unittest.main(argv=["first-arg-is-ignored"], exit=False)

test_main.py

Lær praktiske teknikker til at rense, transformere og konstruere data til maskinlæring ved hjælp af Python. Dette kursus dækker essentielle forbehandlingsskridt, feature-skabelse og praktiske udfordringer til at forberede data til modellering.

Behersk de grundlæggende trin til at rense og forberede rådata til analyse og maskinlæring.

Omform rådata til anvendelige funktioner til maskinlæringsmodeller.

Udvikling og udvælgelse af features for at forbedre modellens ydeevne og fortolkelighed.

Udfordring: Forbehandlingspipeline

Løsning