Predicting which customers are at risk of churning is a cornerstone of proactive **Customer Success management**. By identifying these customers early, you can focus your efforts on retention strategies, ultimately reducing revenue loss and improving satisfaction. Using **Python** and machine learning, you can automate this process and make data-driven decisions that support your team's goals.

import pandas as pd

# Hardcoded customer engagement data
data = [
    {"customer_id": "C001", "login_frequency": 15, "support_tickets": 1, "feature_usage": 7, "churn": 0},
    {"customer_id": "C002", "login_frequency": 3, "support_tickets": 4, "feature_usage": 2, "churn": 1},
    {"customer_id": "C003", "login_frequency": 10, "support_tickets": 2, "feature_usage": 5, "churn": 0},
    {"customer_id": "C004", "login_frequency": 1, "support_tickets": 6, "feature_usage": 1, "churn": 1},
    {"customer_id": "C005", "login_frequency": 8, "support_tickets": 0, "feature_usage": 6, "churn": 0},
    {"customer_id": "C006", "login_frequency": 2, "support_tickets": 5, "feature_usage": 2, "churn": 1},
    {"customer_id": "C007", "login_frequency": 12, "support_tickets": 1, "feature_usage": 8, "churn": 0},
    {"customer_id": "C008", "login_frequency": 4, "support_tickets": 3, "feature_usage": 3, "churn": 1},
    {"customer_id": "C009", "login_frequency": 9, "support_tickets": 2, "feature_usage": 7, "churn": 0},
    {"customer_id": "C010", "login_frequency": 5, "support_tickets": 4, "feature_usage": 2, "churn": 1},
]

df = pd.DataFrame(data)
print(df)

To build an effective churn prediction model, you first need to prepare your data by selecting relevant features (such as **login frequency**, **support tickets**, and **feature usage**) and separating them from the **churn** label. You will use scikit-learn's `LogisticRegression` to fit the model on your dataset. After training, you can evaluate the model's accuracy by comparing its predictions to the actual churn labels. The model's predicted probabilities will help you interpret which customers are most at risk: higher probabilities indicate greater risk of churn. By listing these customers and their risk scores, you gain actionable insights for targeted intervention.


import unittest
import user_code
import ast
import re   
import importlib
import csv
import unittest
import importlib
import sys
import io
from contextlib import redirect_stdout
import pandas as pd

class TestTask(unittest.TestCase):
    def setUp(self):
        self.df = pd.DataFrame([
            {"customer_id": "C001", "login_frequency": 15, "support_tickets": 1, "feature_usage": 7, "churn": 0},
            {"customer_id": "C002", "login_frequency": 3, "support_tickets": 4, "feature_usage": 2, "churn": 1},
            {"customer_id": "C003", "login_frequency": 10, "support_tickets": 2, "feature_usage": 5, "churn": 0},
            {"customer_id": "C004", "login_frequency": 1, "support_tickets": 6, "feature_usage": 1, "churn": 1},
            {"customer_id": "C005", "login_frequency": 8, "support_tickets": 0, "feature_usage": 6, "churn": 0},
            {"customer_id": "C006", "login_frequency": 2, "support_tickets": 5, "feature_usage": 2, "churn": 1},
            {"customer_id": "C007", "login_frequency": 12, "support_tickets": 1, "feature_usage": 8, "churn": 0},
            {"customer_id": "C008", "login_frequency": 4, "support_tickets": 3, "feature_usage": 3, "churn": 1},
            {"customer_id": "C009", "login_frequency": 9, "support_tickets": 2, "feature_usage": 7, "churn": 0},
            {"customer_id": "C010", "login_frequency": 5, "support_tickets": 4, "feature_usage": 2, "churn": 1},
        ])

    def test_model_trains_on_correct_features(self):
        import user_code
        importlib.reload(user_code)
        # Patch LogisticRegression.fit to capture input
        called = {}
        orig_fit = user_code.LogisticRegression.fit
        def fake_fit(self_, X, y):
            called['X'] = X.copy()
            called['y'] = y.copy()
            return orig_fit(self_, X, y)
        user_code.LogisticRegression.fit = fake_fit
        try:
            user_code.predict_churn_and_at_risk(self.df)
        finally:
            user_code.LogisticRegression.fit = orig_fit
        cols = list(called['X'].columns) if 'X' in called else []
        _dynamic_test(
            self,
            set(cols) == set(['login_frequency', 'support_tickets', 'feature_usage']),
            "Model uses correct features for training",
            f"Model trained on columns {cols}, expected ['login_frequency', 'support_tickets', 'feature_usage']"
        )

    def test_accuracy_printed(self):
        import user_code
        importlib.reload(user_code)
        f = io.StringIO()
        with redirect_stdout(f):
            user_code.predict_churn_and_at_risk(self.df)
        output = normalize_text(f.getvalue())
        _dynamic_test(
            self,
            "model accuracy on dataset" in output,
            "Model accuracy is printed",
            "Model accuracy was not printed in output"
        )

    def test_at_risk_customers_printed(self):
        import user_code
        importlib.reload(user_code)
        f = io.StringIO()
        with redirect_stdout(f):
            user_code.predict_churn_and_at_risk(self.df)
        output = normalize_text(f.getvalue())
        _dynamic_test(
            self,
            "at-risk customers and their predicted churn probabilities" in output,
            "At-risk customers header is printed",
            "At-risk customers header was not printed"
        )
        # Check at least one customer id is printed (since in this dataset there are churned customers)
        found = any(cid in output for cid in ["c002", "c004", "c006", "c008", "c010"])
        _dynamic_test(
            self,
            found,
            "At least one at-risk customer id is printed",
            "No at-risk customer id was printed"
        )

    def test_probability_threshold(self):
        # This test checks that only customers with probability >= 0.5 are printed
        import user_code
        importlib.reload(user_code)
        # Patch LogisticRegression.predict_proba to return fixed probabilities
        orig_predict_proba = user_code.LogisticRegression.predict_proba
        def fake_predict_proba(self_, X):
            # Return increasing probabilities for each row
            import numpy as np
            n = X.shape[0]
            # 0.4 for all but 2nd and 4th (index 1,3) which are 0.5 and 0.7
            probs = [[0.6, 0.4]] * n
            probs[1] = [0.5, 0.5]
            probs[3] = [0.3, 0.7]
            return np.array(probs)
        user_code.LogisticRegression.predict_proba = fake_predict_proba
        f = io.StringIO()
        with redirect_stdout(f):
            user_code.predict_churn_and_at_risk(self.df)
        output = normalize_text(f.getvalue())
        user_code.LogisticRegression.predict_proba = orig_predict_proba
        # Only C002 and C004 should appear
        _dynamic_test(
            self,
            "c002" in output and "c004" in output,
            "Customers with probability >= 0.5 are printed",
            "Customers with probability >= 0.5 not printed as expected"
        )
        _dynamic_test(
            self,
            "c001" not in output and "c003" not in output and "c005" not in output,
            "Customers with probability < 0.5 are not printed",
            "Customers with probability < 0.5 were incorrectly printed"
        )

def _dynamic_test(test_case, condition, success_message, failure_message):
    if condition:
        test_case._testMethodName = success_message
        test_case.assertTrue(True, success_message)
    else:
        test_case._testMethodName = failure_message
        test_case.fail(failure_message)

def normalize_text(text):
    text = text.lower()
    text = re.sub(r"\\s{2,}", " ", text)
    text = re.sub(r"\\s*([,:?])\\s*", r"\\1 ", text)
    return text.strip()

def change_var(code: str, var_name: str, value: str) -> str:
    tree = ast.parse(code)
    lines = code.splitlines()
    changed = False
    # Collect all assignment nodes to modify
    assign_nodes = [
        (i, node)
        for i, node in enumerate(tree.body)
        if isinstance(node, ast.Assign)
        and any(isinstance(target, ast.Name) and target.id == var_name for target in node.targets)
    ]

    # If nothing to change, return unmodified code
    if not assign_nodes:
        return code

    # Perform replacements for all matching assignments (from last to first to not break line offsets)
    for i, node in reversed(assign_nodes):
        start_line = node.lineno - 1
        line = lines[start_line]
        indent = ' ' * (len(line) - len(line.lstrip()))
        lines[start_line] = f"{indent}{var_name} = {value}"
        next_line = len(lines)
        for next_node in tree.body[i+1:]:
            if hasattr(next_node, 'lineno'):
                next_line = next_node.lineno - 1
                break
        if next_line > start_line + 1:
            lines[start_line+1:next_line] = []
        changed = True

    return '\\n'.join(lines) if changed else code

if __name__ == "__main__":
    unittest.main()


test_main.py

Learn how to leverage Python to automate, analyze, and optimize tasks relevant to Customer Success Managers. This course focuses on practical applications of Python for customer data analysis, reporting, and workflow automation, tailored for professionals with basic Python knowledge.

Explore foundational techniques for analyzing customer data using Python. Learn to summarize, filter, and extract insights from customer datasets relevant to Customer Success Managers.

Delve into methods for assessing customer health and predicting churn using Python. Learn to build simple models and interpret their results for proactive Customer Success management.

Discover how Python can automate repetitive tasks and streamline workflows for Customer Success Managers, from reporting to customer outreach.

Challenge: Predict At-Risk Customers

Solution