import unittest
import importlib
import numpy as np
import pandas as pd
import seaborn as sns

def _dynamic_test(test_case, condition, success_msg, failure_msg):
    if condition:
        test_case._testMethodName = success_msg
        test_case.assertTrue(True, success_msg)
    else:
        test_case._testMethodName = failure_msg
        test_case.fail(failure_msg)

class TestTitanicPreprocessing(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.user_code = importlib.import_module("user_code")
        cls.data = cls.user_code.processed_data

    def test_no_missing_values(self):
        """Check that no missing values remain after preprocessing."""
        cond = not self.data.isnull().values.any()
        _dynamic_test(
            self,
            cond,
            "All missing values were successfully handled.",
            "There are still missing values in the processed dataset."
        )

    def test_categorical_encoding(self):
        """Check that categorical encoding via get_dummies() was applied."""
        cols = self.data.columns
        cond = any("sex_" in c for c in cols) and any("embarked_" in c for c in cols)
        _dynamic_test(
            self,
            cond,
            "Categorical columns successfully encoded with get_dummies().",
            "Categorical columns 'sex' or 'embarked' were not properly encoded."
        )

    def test_standard_scaling(self):
        """Check that 'age' and 'fare' were scaled correctly."""
        df = self.data
        mean_age = round(df["age_scaled"].mean(), 1)
        mean_fare = round(df["fare_scaled"].mean(), 1)
        std_age = round(df["age_scaled"].std(), 1)
        std_fare = round(df["fare_scaled"].std(), 1)
        cond = abs(mean_age) < 0.2 and abs(mean_fare) < 0.2 and 0.8 < std_age < 1.2 and 0.8 < std_fare < 1.2
        _dynamic_test(
            self,
            cond,
            "'age' and 'fare' successfully standardized (mean≈0, std≈1).",
            "'age_scaled' or 'fare_scaled' not standardized correctly."
        )

    def test_family_size_feature(self):
        """Check that 'family_size' feature was added correctly."""
        df = self.data
        cond = "family_size" in df.columns and (
            df["family_size"] == df["sibsp"] + df["parch"] + 1
        ).all()
        _dynamic_test(
            self,
            cond,
            "'family_size' feature successfully created and computed.",
            "'family_size' feature missing or incorrectly calculated."
        )

    def test_pipeline_function_exists(self):
        """Check that preprocess_titanic() function exists and returns DataFrame."""
        func = getattr(self.user_code, "preprocess_titanic", None)
        cond = callable(func) and isinstance(self.data, pd.DataFrame)
        _dynamic_test(
            self,
            cond,
            "preprocess_titanic() function successfully defined and returns a DataFrame.",
            "preprocess_titanic() missing or incorrect return type."
        )

if __name__ == "__main__":
    unittest.main(argv=["first-arg-is-ignored"], exit=False)

test_main.py

教師あり学習のための生データ準備に必要な基本的手法を網羅しています。欠損値の処理、カテゴリカル特徴量のエンコーディング、数値データのスケーリングおよび変換、モデルの性能と信頼性を向上させる有意義な特徴量の作成に焦点を当てています。


チャレンジ：前処理パイプライン

解答