import unittest
import re
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.exceptions import NotFittedError

def _dynamic_test(tc, cond, ok, bad):
    if cond:
        tc._testMethodName = ok
        tc.assertTrue(True, ok)
    else:
        tc._testMethodName = bad
        tc.fail(bad)

class TestUserCode(unittest.TestCase):
    """Structural tests: verify steps exist and objects are properly built.
    No numeric equality checks.
    """

    def test_required_variables_declared(self):
        import user_code
        names = [
            "df", "numeric_features", "categorical_features",
            "X", "y", "X_train", "X_test", "y_train", "y_test",
            "preprocess", "pipe", "y_pred", "test_accuracy"
        ]
        cond = all(hasattr(user_code, n) for n in names)
        _dynamic_test(
            self, cond,
            "All required variables are declared.",
            f"Expected variables {names} to be declared."
        )

    def test_split_types(self):
        import user_code
        cond = (
            isinstance(user_code.X_train, pd.DataFrame) and
            isinstance(user_code.X_test, pd.DataFrame)
        )
        _dynamic_test(
            self, cond,
            "Train/test splits exist with DataFrame features.",
            "Expected X_train and X_test to be pandas DataFrames."
        )

    def test_preprocess_is_columntransformer(self):
        import user_code
        cond = isinstance(user_code.preprocess, ColumnTransformer)
        _dynamic_test(
            self, cond,
            "`preprocess` is a ColumnTransformer.",
            "Expected `preprocess` to be sklearn.compose.ColumnTransformer."
        )

    def test_pipeline_is_built(self):
        import user_code
        cond = isinstance(user_code.pipe, Pipeline) and "preprocess" in user_code.pipe.named_steps
        _dynamic_test(
            self, cond,
            "`pipe` is a Pipeline with a `preprocess` step.",
            "Expected a sklearn Pipeline with a `preprocess` step."
        )

    def test_columntransformer_contains_scaler_and_ohe(self):
        import user_code
        # Look for any StandardScaler and any OneHotEncoder inside transformers
        tr = user_code.preprocess.transformers
        has_scaler = any(isinstance(t[1], StandardScaler) for t in tr if isinstance(t[1], (StandardScaler, OneHotEncoder)))
        has_ohe    = any(isinstance(t[1], OneHotEncoder) for t in tr if isinstance(t[1], (StandardScaler, OneHotEncoder)))
        # Also allow nested pipelines inside ColumnTransformer
        for _, est, _cols in tr:
            if isinstance(est, Pipeline):
                if any(isinstance(s[1], StandardScaler) for s in est.steps):
                    has_scaler = True
                if any(isinstance(s[1], OneHotEncoder) for s in est.steps):
                    has_ohe = True
        cond = has_scaler and has_ohe
        _dynamic_test(
            self, cond,
            "The ColumnTransformer includes scaling for numeric and one-hot for categoricals.",
            "Expected StandardScaler and OneHotEncoder to be present in `preprocess`."
        )

    def test_pipeline_is_fitted_and_predicts(self):
        import user_code
        try:
            preds = user_code.pipe.predict(user_code.X_test)
            cond = preds is not None and len(preds) == len(user_code.X_test)
        except NotFittedError:
            cond = False
        except Exception:
            cond = False
        _dynamic_test(
            self, cond,
            "The pipeline is fitted and can predict on the test set.",
            "Expected `pipe.fit(X_train, y_train)` before predicting on X_test."
        )

    def test_accuracy_is_computed_and_printed(self):
        import user_code
        cond = hasattr(user_code, "test_accuracy")
        _dynamic_test(
            self, cond,
            "`test_accuracy` is computed.",
            "Expected `test_accuracy = accuracy_score(y_test, y_pred)` to be computed."
        )

    def test_no_whole_dataset_fit_hint(self):
        """Soft guard against leakage: ensure train_test_split is used in source."""
        with open("user_code.py", "r") as f:
            src = f.read()
        cond = "train_test_split" in src
        _dynamic_test(
            self, cond,
            "Train/test split is present in the code (hint against leakage).",
            "Expected a train/test split before fitting."
        )

if __name__ == "__main__":
    unittest.main()


test_main.py

A comprehensive exploration of feature scaling, normalization, and data preprocessing techniques essential for effective machine learning. This course covers the mathematical foundations, intuition, practical implementation, and impact of various scaling methods on model performance.

Explore the core motivations for feature scaling, including its mathematical basis and practical impact on machine learning algorithms.

Investigate L1, L2, and Max normalization, their mathematical foundations, and their effects on data geometry.

Understand the concepts of covariance, correlation, and the whitening transformation for decorrelating features.

Analyze the impact of feature scaling on model optimization, convergence, and performance.

Learn to select appropriate scaling and normalization techniques, avoid data leakage, and build robust preprocessing pipelines.

Challenge: Build a Preprocessing Pipeline

Ratkaisu