To deepen your understanding of **cohort analysis**, you will write a Python script that simulates a simple retention cohort analyzer. **Cohort analysis** is a powerful tool for growth hackers, as it helps you track user retention over time by grouping users based on shared characteristics—in this case, their signup month. You will use a hardcoded `pandas` DataFrame to represent user data, including user IDs, their signup months, and retention status (where `1` means retained and `0` means not retained). Your task is to calculate the retention rate for each signup month and display the results, giving you practical experience with both `pandas` and user retention analytics.

Begin by importing the `pandas` library and creating a DataFrame with the necessary columns. The DataFrame will contain three columns: `user_id`, `signup_month`, and `retained`. Each row represents a user, their signup month, and whether they were retained.

import pandas as pd

# Hardcoded user data for cohort analysis
data = {
    "user_id": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    "signup_month": ["2024-01", "2024-01", "2024-01", "2024-02", "2024-02", 
                     "2024-02", "2024-03", "2024-03", "2024-03", "2024-03"],
    "retained": [1, 0, 1, 1, 0, 1, 0, 1, 1, 0]
}

df = pd.DataFrame(data)

# Calculate retention rate for each signup month
cohort = df.groupby("signup_month")["retained"].mean().reset_index()
cohort["retention_rate"] = (cohort["retained"] * 100).round(2)

# Print the results
for _, row in cohort.iterrows():
    print(
        f"Signup Month: {row['signup_month']} - Retention Rate: {row['retention_rate']}%"
    )

This script groups users by their `signup_month` and calculates the average of the `retained` column for each group, which represents the **retention rate**. The retention rate is then multiplied by 100 and rounded to two decimal places for readability. Finally, the script prints the retention rate for each signup month, allowing you to see which cohorts are performing better in terms of user retention. By analyzing these results, you can identify trends and make data-driven decisions to improve user engagement and retention strategies.


import unittest
import user_code
import ast
import re   
import importlib
import csv
import unittest
import importlib
import sys
import io

class TestTask(unittest.TestCase):
    def test_dataframe_structure(self):
        import user_code
        importlib.reload(user_code)
        df = getattr(user_code, 'df', None)
        _dynamic_test(
            self,
            df is not None,
            "DataFrame 'df' is defined.",
            "DataFrame 'df' is not defined.",
        )
        _dynamic_test(
            self,
            hasattr(df, 'columns'),
            "'df' has columns attribute.",
            "'df' does not have columns attribute.",
        )
        cols = set(df.columns.tolist())
        expected_cols = set(['user_id', 'signup_month', 'retained'])
        _dynamic_test(
            self,
            expected_cols.issubset(cols),
            "DataFrame has required columns.",
            f"DataFrame missing columns: {expected_cols - cols}",
        )
        _dynamic_test(
            self,
            len(df) >= 8,
            "DataFrame has at least 8 rows.",
            f"DataFrame has fewer than 8 rows: {len(df)}",
        )
        unique_months = df['signup_month'].nunique()
        _dynamic_test(
            self,
            unique_months >= 2,
            "There are at least two unique signup_month values.",
            f"Expected at least 2 unique signup_month values, got {unique_months}",
        )

    def test_retention_rate_calculation(self):
        import user_code
        importlib.reload(user_code)
        df = getattr(user_code, 'df', None)
        cohort = getattr(user_code, 'cohort', None)
        _dynamic_test(
            self,
            cohort is not None,
            "Cohort DataFrame is defined.",
            "Cohort DataFrame is not defined.",
        )
        _dynamic_test(
            self,
            'signup_month' in cohort.columns and 'retention_rate' in cohort.columns,
            "Cohort DataFrame has required columns.",
            f"Cohort DataFrame missing columns: {set(['signup_month', 'retention_rate']) - set(cohort.columns)}",
        )
        # Calculate expected retention rates
        expected = (
            df.groupby('signup_month')['retained']
            .mean()
            .reset_index()
        )
        expected['retention_rate'] = (expected['retained'] * 100).round(2)
        for _, row in expected.iterrows():
            month = row['signup_month']
            rate = row['retention_rate']
            actual_row = cohort[cohort['signup_month'] == month]
            _dynamic_test(
                self,
                not actual_row.empty,
                f"Cohort includes signup_month {month}.",
                f"Cohort missing signup_month {month}.",
            )
            actual_rate = actual_row.iloc[0]['retention_rate']
            _dynamic_test(
                self,
                abs(actual_rate - rate) < 1e-2,
                f"Retention rate for {month} is correct.",
                f"Expected {rate} for {month}, got {actual_rate}.",
            )

    def test_print_output_format(self):
        import user_code
        import importlib
        f = io.StringIO()
        sys_stdout = sys.stdout
        sys.stdout = f
        importlib.reload(user_code)
        sys.stdout = sys_stdout
        output = normalize_text(f.getvalue())
        cohort = getattr(user_code, 'cohort', None)
        _dynamic_test(
            self,
            cohort is not None,
            "Cohort DataFrame exists for output test.",
            "Cohort DataFrame is missing for output test.",
        )
        for _, row in cohort.iterrows():
            expected_line = f"signup month: {row['signup_month']} - retention rate: {row['retention_rate']}%"
            _dynamic_test(
                self,
                expected_line in output,
                f"Output includes line: {expected_line}",
                f"Expected output: '{expected_line}' not found in: {output}",
            )

def _dynamic_test(test_case, condition, success_message, failure_message):
    if condition:
        test_case._testMethodName = success_message
        test_case.assertTrue(True, success_message)
    else:
        test_case._testMethodName = failure_message
        test_case.fail(failure_message)

def normalize_text(text):
    text = text.lower()
    text = re.sub(r"\\s{2,}", " ", text)
    text = re.sub(r"\\s*([,:?])\\s*", r"\\1 ", text)
    return text.strip()

def change_var(code: str, var_name: str, value: str) -> str:
    tree = ast.parse(code)
    lines = code.splitlines()
    changed = False
    # Collect all assignment nodes to modify
    assign_nodes = [
        (i, node)
        for i, node in enumerate(tree.body)
        if isinstance(node, ast.Assign)
        and any(isinstance(target, ast.Name) and target.id == var_name for target in node.targets)
    ]

    # If nothing to change, return unmodified code
    if not assign_nodes:
        return code

    # Perform replacements for all matching assignments (from last to first to not break line offsets)
    for i, node in reversed(assign_nodes):
        start_line = node.lineno - 1
        line = lines[start_line]
        indent = ' ' * (len(line) - len(line.lstrip()))
        lines[start_line] = f"{indent}{var_name} = {value}"
        next_line = len(lines)
        for next_node in tree.body[i+1:]:
            if hasattr(next_node, 'lineno'):
                next_line = next_node.lineno - 1
                break
        if next_line > start_line + 1:
            lines[start_line+1:next_line] = []
        changed = True

    return '\\n'.join(lines) if changed else code

if __name__ == "__main__":
    unittest.main()


test_main.py

Unlock the power of Python to automate, analyze, and optimize growth strategies. This course is designed for growth hackers who want to leverage Python for data-driven marketing, user acquisition, and rapid experimentation. Learn to automate repetitive tasks, analyze user behavior, and extract actionable insights—all without prior data science experience.

Learn how to use Python to automate repetitive growth hacking tasks, from data cleaning to campaign reporting.

Use Python to analyze user engagement, retention, and behavior patterns to inform growth strategies.

Apply Python to design, analyze, and optimize growth experiments such as A/B tests and campaign performance evaluations.

Challenge: Retention Cohort Analyzer

Solution