import unittest
import user_code
import ast
import re   
import importlib
import csv
import unittest
import numpy as np
import importlib

class TestTask(unittest.TestCase):
    def setUp(self):
        self.module_name = "user_code"

    def test_sarsa_updates_q_values(self):
        import user_code
        importlib.reload(user_code)

        q_table = np.zeros((3, 2))
        state_action_sequence = [(0, 1), (1, 0), (2, 1)]
        alpha = 0.5
        gamma = 0.9
        rewards = [1, 0, 2]
        expected_q = np.zeros((3, 2))
        # Step 1: (0,1) -> (1,0), reward=1
        # Q[0,1] = 0 + 0.5 * (1 + 0.9*0 - 0) = 0.5
        expected_q[0,1] = 0.5
        # Step 2: (1,0) -> (2,1), reward=0
        # Q[1,0] = 0 + 0.5 * (0 + 0.9*0 - 0) = 0
        # (no change)
        user_code.update_q_table_sarsa(q_table, state_action_sequence, alpha, gamma, rewards)
        _dynamic_test(
            self,
            np.allclose(q_table, expected_q),
            "Q-table updated correctly for SARSA simple run.",
            f"Expected Q-table: {expected_q}, got: {q_table}"
        )

    def test_only_sequence_pairs_are_updated(self):
        import user_code
        importlib.reload(user_code)
        q_table = np.ones((4, 3)) * 7
        state_action_sequence = [(2, 2), (3, 1), (0, 0)]
        alpha = 0.2
        gamma = 0.5
        rewards = [10, -3, 5]
        # Save original
        original = q_table.copy()
        user_code.update_q_table_sarsa(q_table, state_action_sequence, alpha, gamma, rewards)
        # Only (2,2) and (3,1) can be updated
        changed = not np.allclose(q_table, original)
        unchanged_indices = [(i, j) for i in range(4) for j in range(3)
                             if (i, j) not in [(2,2), (3,1)]]
        unchanged = all(q_table[i, j] == original[i, j] for i, j in unchanged_indices)
        _dynamic_test(
            self,
            changed and unchanged,
            "Only sequence state-action pairs are updated.",
            f"Some Q-table entries not in the sequence were changed."
        )

    def test_q_table_shape_compatibility(self):
        import user_code
        importlib.reload(user_code)
        q_table = np.zeros((5, 4))
        state_action_sequence = [(4, 3), (2, 0), (0, 1), (3, 2)]
        alpha = 0.9
        gamma = 0.99
        rewards = [2, -1, 5, 0]
        try:
            user_code.update_q_table_sarsa(q_table, state_action_sequence, alpha, gamma, rewards)
            worked = True
        except Exception as e:
            worked = False
        _dynamic_test(
            self,
            worked,
            "Function works with any compatible Q-table shape.",
            "Function failed for Q-table with arbitrary shape."
        )

    def test_final_state_action_not_updated(self):
        import user_code
        importlib.reload(user_code)
        q_table = np.zeros((3, 2))
        q_table[2, 1] = 99
        state_action_sequence = [(0, 1), (1, 0), (2, 1)]
        alpha = 0.5
        gamma = 0.9
        rewards = [1, 0, 2]
        user_code.update_q_table_sarsa(q_table, state_action_sequence, alpha, gamma, rewards)
        _dynamic_test(
            self,
            q_table[2, 1] == 99,
            "Final state-action pair was not updated.",
            "Final state-action pair was incorrectly updated."
        )

def _dynamic_test(test_case, condition, success_message, failure_message):
    if condition:
        test_case._testMethodName = success_message
        test_case.assertTrue(True, success_message)
    else:
        test_case._testMethodName = failure_message
        test_case.fail(failure_message)

def normalize_text(text):
    text = text.lower()
    text = re.sub(r"\\s{2,}", " ", text)
    text = re.sub(r"\\s*([,:?])\\s*", r"\\1 ", text)
    return text.strip()

def change_var(code: str, var_name: str, value: str) -> str:
    tree = ast.parse(code)
    lines = code.splitlines()
    changed = False
    # Collect all assignment nodes to modify
    assign_nodes = [
        (i, node)
        for i, node in enumerate(tree.body)
        if isinstance(node, ast.Assign)
        and any(isinstance(target, ast.Name) and target.id == var_name for target in node.targets)
    ]

    # If nothing to change, return unmodified code
    if not assign_nodes:
        return code

    # Perform replacements for all matching assignments (from last to first to not break line offsets)
    for i, node in reversed(assign_nodes):
        start_line = node.lineno - 1
        line = lines[start_line]
        indent = ' ' * (len(line) - len(line.lstrip()))
        lines[start_line] = f"{indent}{var_name} = {value}"
        next_line = len(lines)
        for next_node in tree.body[i+1:]:
            if hasattr(next_node, 'lineno'):
                next_line = next_node.lineno - 1
                break
        if next_line > start_line + 1:
            lines[start_line+1:next_line] = []
        changed = True

    return '\\n'.join(lines) if changed else code

if __name__ == "__main__":
    unittest.main()


test_main.py

Practice implementing Q-learning and SARSA from scratch with Python. Includes step-by-step coding exercises, intuitive explanations, and simple tasks like grid-world navigation.

Explore the foundations of reinforcement learning, implement Q-learning and SARSA from scratch, and apply them to simple environments with hands-on coding, explanations, and challenges.

Challenge: Q-table Update with SARSA

Solution