To put your understanding of machine learning in trading to the test, you'll tackle a hands-on challenge: predicting the next day's market direction using logistic regression. You'll begin with a hardcoded DataFrame of closing prices, engineer lagged return features, and use scikit-learn's logistic regression to classify whether the next day's return is positive or negative. You'll then evaluate the model's accuracy and display a confusion matrix to summarize its predictive performance.

Start by creating a DataFrame with daily closing prices. From these prices, calculate daily returns and then generate lagged versions of these returns to use as input features for your model. The target variable will be whether the following day's return is positive (market up) or not (market down or unchanged).


You can experiment by adjusting the number of lagged features or changing the train-test split to see how it affects the model's performance. For more on logistic regression and confusion matrices, consult the scikit-learn documentation.

Note


import unittest
import user_code
import ast
import re   
import importlib
import csv
import unittest
import importlib
import sys
import io
import numpy as np
import pandas as pd
from contextlib import redirect_stdout

class TestTask(unittest.TestCase):
    def setUp(self):
        self.close_prices = [
            100.0, 101.5, 102.3, 101.7, 103.1, 102.8, 104.0, 105.2, 104.7, 106.0,
            105.5, 104.9, 105.8, 106.5, 107.1, 106.8, 108.0, 107.5, 108.7, 110.0
        ]
        self.df = pd.DataFrame({"Close": self.close_prices})
        self.df["Return"] = self.df["Close"].pct_change()
        self.df["Lag1"] = self.df["Return"].shift(1)
        self.df["Lag2"] = self.df["Return"].shift(2)
        self.df["Direction"] = (self.df["Return"].shift(-1) > 0).astype(int)
        self.df = self.df.dropna()
        self.X = self.df[["Lag1", "Lag2"]]
        self.y = self.df["Direction"]
        self.X_train, self.X_test = self.X[:-5], self.X[-5:]
        self.y_train, self.y_test = self.y[:-5], self.y[-5:]

    def test_return_column_exists(self):
        import user_code
        importlib.reload(user_code)
        df = getattr(user_code, 'df', None)
        _dynamic_test(self, df is not None and "Return" in df.columns,
                      "DataFrame contains 'Return' column",
                      "DataFrame is missing 'Return' column")

    def test_lag_features_exist(self):
        import user_code
        importlib.reload(user_code)
        df = getattr(user_code, 'df', None)
        _dynamic_test(self, df is not None and "Lag1" in df.columns and "Lag2" in df.columns,
                      "DataFrame contains 'Lag1' and 'Lag2' columns",
                      "DataFrame is missing 'Lag1' or 'Lag2' columns")

    def test_direction_column(self):
        import user_code
        importlib.reload(user_code)
        df = getattr(user_code, 'df', None)
        _dynamic_test(self, df is not None and "Direction" in df.columns,
                      "DataFrame contains 'Direction' column",
                      "DataFrame is missing 'Direction' column")
        if df is not None and "Direction" in df.columns:
            # Check correct values for first 3 rows
            expected = self.df["Direction"].iloc[:3].tolist()
            actual = df["Direction"].iloc[:3].tolist()
            _dynamic_test(self, actual == expected,
                          "Direction column values are correct for first 3 rows",
                          f"Expected: {expected}, got: {actual}")

    def test_train_test_split(self):
        import user_code
        importlib.reload(user_code)
        X_train = getattr(user_code, 'X_train', None)
        X_test = getattr(user_code, 'X_test', None)
        y_train = getattr(user_code, 'y_train', None)
        y_test = getattr(user_code, 'y_test', None)
        _dynamic_test(self,
            X_train is not None and X_test is not None and y_train is not None and y_test is not None,
            "Train/test sets exist",
            "Train/test sets missing"
        )
        if X_train is not None and X_test is not None and y_train is not None and y_test is not None:
            _dynamic_test(self,
                len(X_test) == 5 and len(y_test) == 5,
                "Test set contains 5 observations",
                f"Test set does not contain 5 rows: {len(X_test)}, {len(y_test)}"
            )
            _dynamic_test(self,
                len(X_train) == len(self.X_train) and len(y_train) == len(self.y_train),
                "Train set contains correct number of observations",
                f"Train set size incorrect: {len(X_train)}, {len(y_train)}")

    def test_logistic_regression_fit(self):
        import user_code
        importlib.reload(user_code)
        model = getattr(user_code, 'model', None)
        X_train = getattr(user_code, 'X_train', None)
        y_train = getattr(user_code, 'y_train', None)
        _dynamic_test(self,
            model is not None and hasattr(model, 'predict'),
            "LogisticRegression model is fitted",
            "LogisticRegression model is missing or not fitted"
        )
        if model is not None and X_train is not None:
            try:
                preds = model.predict(X_train)
                _dynamic_test(self, len(preds) == len(X_train),
                    "Model can predict on training set",
                    "Model cannot predict on training set")
            except Exception as e:
                _dynamic_test(self, False, "Model can predict on training set", f"Model prediction failed: {e}")

    def test_model_accuracy_and_confusion_matrix(self):
        import user_code
        importlib.reload(user_code)
        accuracy = getattr(user_code, 'accuracy', None)
        cm = getattr(user_code, 'cm', None)
        _dynamic_test(self, accuracy is not None and 0 <= accuracy <= 1,
                      "Accuracy variable exists and is a valid value",
                      f"Accuracy variable is missing or invalid: {accuracy}")
        _dynamic_test(self, cm is not None and isinstance(cm, (np.ndarray, list)),
                      "Confusion matrix variable exists and is a numpy array or list",
                      f"Confusion matrix variable is missing or not an array: {cm}")
        if cm is not None:
            shape = np.shape(cm)
            _dynamic_test(self, shape == (2,2),
                          "Confusion matrix is 2x2",
                          f"Confusion matrix shape is not 2x2: {shape}")

    def test_printed_output(self):
        import user_code
        importlib.reload(user_code)
        f = io.StringIO()
        with redirect_stdout(f):
            importlib.reload(user_code)
        output = normalize_text(f.getvalue())
        expected_acc = "test accuracy:"
        expected_cm = "confusion matrix:"
        _dynamic_test(self, expected_acc in output,
                      "Output includes 'Test Accuracy:'",
                      f"Output missing 'Test Accuracy:' in: {output}")
        _dynamic_test(self, expected_cm in output,
                      "Output includes 'Confusion Matrix:'",
                      f"Output missing 'Confusion Matrix:' in: {output}")

def _dynamic_test(test_case, condition, success_message, failure_message):
    if condition:
        test_case._testMethodName = success_message
        test_case.assertTrue(True, success_message)
    else:
        test_case._testMethodName = failure_message
        test_case.fail(failure_message)

def normalize_text(text):
    text = text.lower()
    text = re.sub(r"\\s{2,}", " ", text)
    text = re.sub(r"\\s*([,:?])\\s*", r"\\1 ", text)
    return text.strip()

def change_var(code: str, var_name: str, value: str) -> str:
    tree = ast.parse(code)
    lines = code.splitlines()
    changed = False
    # Collect all assignment nodes to modify
    assign_nodes = [
        (i, node)
        for i, node in enumerate(tree.body)
        if isinstance(node, ast.Assign)
        and any(isinstance(target, ast.Name) and target.id == var_name for target in node.targets)
    ]

    # If nothing to change, return unmodified code
    if not assign_nodes:
        return code

    # Perform replacements for all matching assignments (from last to first to not break line offsets)
    for i, node in reversed(assign_nodes):
        start_line = node.lineno - 1
        line = lines[start_line]
        indent = ' ' * (len(line) - len(line.lstrip()))
        lines[start_line] = f"{indent}{var_name} = {value}"
        next_line = len(lines)
        for next_node in tree.body[i+1:]:
            if hasattr(next_node, 'lineno'):
                next_line = next_node.lineno - 1
                break
        if next_line > start_line + 1:
            lines[start_line+1:next_line] = []
        changed = True

    return '\\n'.join(lines) if changed else code

if __name__ == "__main__":
    unittest.main()


test_main.py

A hands-on course designed for traders who want to leverage Python to analyze financial data, build trading strategies, and visualize market trends. This course combines practical coding tasks with real-world trading scenarios, focusing on how Python can be used to solve common problems faced by traders.

Learn how to use Python to analyze and manipulate financial data, focusing on the needs of traders. This section covers data structures, basic calculations, and essential data wrangling techniques.

Master the art of visualizing financial data and trading indicators using Python's plotting libraries. Learn to create insightful charts that help traders make informed decisions.

Apply Python to develop, backtest, and evaluate basic trading strategies. Learn to use key performance metrics and risk measures relevant to traders.

Challenge: Predict Market Direction with Logistic Regression

Solution