Imagine you are tasked with collecting information from a list of web pages, such as fetching the latest news headlines or product prices from several sites. Doing this sequentially—one after another—would be slow, especially if some pages take longer to respond. To speed up the process, you want to fetch multiple pages at the same time using **threads**. This approach allows you to make the most of waiting times, retrieving data from several sites concurrently and processing results as soon as they arrive.


import unittest
import user_code
import ast
import re   
import unittest
import user_code
import importlib
import io
from contextlib import redirect_stdout
import re
import ast

class TestTask(unittest.TestCase):
    def test_fetch_url_content_length(self):
        import user_code
        importlib.reload(user_code)
        url = "https://example.com"
        orig_req = user_code.urllib.request.Request
        def patched_req(url, headers=None):
            headers = headers or {"User-Agent": "Mozilla/5.0"}
            return orig_req(url, headers=headers)
        user_code.urllib.request.Request = patched_req
        try:
            result = user_code.fetch_url_content(url)
        finally:
            user_code.urllib.request.Request = orig_req
        _dynamic_test(
            self,
            isinstance(result, str) and len(result) == 100,
            "fetch_url_content returns 100 characters as a string",
            f"Expected 100 character string, got: {result} (length: {len(result) if isinstance(result, str) else 'n/a'})",
        )

    def test_fetch_url_content_content(self):
        import user_code
        importlib.reload(user_code)
        url = "https://example.com"
        orig_req = user_code.urllib.request.Request
        def patched_req(url, headers=None):
            headers = headers or {"User-Agent": "Mozilla/5.0"}
            return orig_req(url, headers=headers)
        user_code.urllib.request.Request = patched_req
        try:
            result = user_code.fetch_url_content(url)
        finally:
            user_code.urllib.request.Request = orig_req
        _dynamic_test(
            self,
            "example" in result.lower(),
            "fetch_url_content returns content from the correct URL",
            f"Returned content does not seem to be from example.com: {result}",
        )

    def test_threaded_print_output(self):
        import user_code
        importlib.reload(user_code)
        # Patch fetch_url_content to return predictable output
        orig_fetch = user_code.fetch_url_content
        def fake_fetch(url):
            return (url[-10:] * 10)[:100]
        user_code.fetch_url_content = fake_fetch
        f = io.StringIO()
        with redirect_stdout(f):
            user_code.main()
        output = f.getvalue()
        expected_urls = [
            "https://example.com",
            "https://www.python.org",
            "https://www.wikipedia.org"
        ]
        for url in expected_urls:
            # Accept both with and without space after colon, to match lambda/print behavior
            # Also accept extra spaces after the colon
            pattern = re.compile(rf"content from\s*{re.escape(url)}:\s*")
            found = any(pattern.match(line.strip().lower()) for line in output.splitlines())
            _dynamic_test(
                self,
                found,
                f"Output includes line for {url}",
                f"Expected output for url '{url}' not found in: '{output}'",
            )
        user_code.fetch_url_content = orig_fetch

    def test_main_waits_for_threads(self):
        import user_code
        importlib.reload(user_code)
        import threading
        import time
        orig_fetch = user_code.fetch_url_content
        def slow_fetch(url):
            time.sleep(0.5)
            return url[-10:]*10
        user_code.fetch_url_content = slow_fetch
        f = io.StringIO()
        start = time.time()
        with redirect_stdout(f):
            user_code.main()
        elapsed = time.time() - start
        _dynamic_test(
            self,
            elapsed >= 0.5,
            "main waits for all threads to finish",
            f"main did not wait for threads, elapsed: {elapsed}",
        )
        user_code.fetch_url_content = orig_fetch

def _dynamic_test(test_case, condition, success_message, failure_message):
    if condition:
        test_case._testMethodName = success_message
        test_case.assertTrue(True, success_message)
    else:
        test_case._testMethodName = failure_message
        test_case.fail(failure_message)

def normalize_text(text):
    text = text.lower()
    text = re.sub(r"\s{2,}", " ", text)
    text = re.sub(r"\s*([,:?])\s*", r"\1 ", text)
    return text.strip()

def change_var(code: str, var_name: str, value: str) -> str:
    tree = ast.parse(code)
    lines = code.splitlines()
    for i, node in enumerate(tree.body):
        if isinstance(node, ast.Assign):
            for target in node.targets:
                if isinstance(target, ast.Name) and target.id == var_name:
                    start_line = node.lineno - 1
                    line = lines[start_line]
                    indent = ' ' * (len(line) - len(line.lstrip()))
                    lines[start_line] = f"{indent}{var_name} = {value}"
                    next_line = len(lines)
                    for next_node in tree.body[i+1:]:
                        if hasattr(next_node, 'lineno'):
                            next_line = next_node.lineno - 1
                            break
                    if next_line > start_line + 1:
                        lines[start_line+1:next_line] = []
                    
                    return '\\n'.join(lines)
    return code

if __name__ == "__main__":
    unittest.main()


test_main.py

A beginner-friendly course introducing the concepts, techniques, and practical applications of multithreading and multiprocessing in Python. Learn how to write concurrent programs, manage threads and processes, and solve real-world problems using parallel execution.

Explore the foundational concepts of concurrency, parallelism, and the differences between threads and processes in Python.

Learn how to create, start, and manage threads in Python, and understand thread synchronization.

Explore how to create and manage processes in Python, and understand inter-process communication.

Delve into advanced concurrency patterns, performance considerations, and best practices for writing robust concurrent code.

Challenge: Threaded Web Scraper

Lösung