import unittest
import user_code
import ast
import re   
import importlib
import csv
import unittest
import importlib
import numpy as np
from sklearn.linear_model import LinearRegression

class TestTask(unittest.TestCase):
    def test_model_is_fitted_and_instance(self):
        import user_code
        importlib.reload(user_code)
        model = user_code.build_qsar_model([
            "CCO", "CC(=O)O", "c1ccccc1", "CCN", "CCCC"
        ], [0.5, 1.2, 2.3, 0.7, 1.0])
        _dynamic_test(
            self,
            isinstance(model, LinearRegression),
            "Returned model is an instance of LinearRegression",
            f"Returned model is not an instance of LinearRegression: {type(model)}",
        )
        _dynamic_test(
            self,
            hasattr(model, 'coef_') and hasattr(model, 'intercept_'),
            "Model has coef_ and intercept_ attributes after fitting",
            "Model does not have coef_ and intercept_ attributes after fitting",
        )
        _dynamic_test(
            self,
            np.issubdtype(type(model.intercept_), np.number),
            "Model intercept_ is numeric",
            f"Model intercept_ is not numeric: {model.intercept_}",
        )
        _dynamic_test(
            self,
            isinstance(model.coef_, np.ndarray) and np.issubdtype(model.coef_.dtype, np.number),
            "Model coef_ is a numeric numpy array",
            f"Model coef_ is not a numeric numpy array: {model.coef_}",
        )

    def test_invalid_smiles_are_excluded(self):
        import user_code
        importlib.reload(user_code)
        smiles = ["CCO", "INVALID", "CCCC", "NONSENSE", "CCN"]
        properties = [0.5, 99.9, 1.0, 123.4, 0.7]
        # Compute descriptors for all
        X_all = user_code.compute_descriptors(smiles)
        # Mask for valid rows
        valid_mask = ~np.isnan(X_all).any(axis=1)
        valid_properties = np.array(properties)[valid_mask]
        model = user_code.build_qsar_model(smiles, properties)
        # Model should be fitted on valid_properties only (length 3)
        _dynamic_test(
            self,
            hasattr(model, 'coef_') and model.coef_.shape[0] == 4,
            "Model is fitted on 4 descriptors for valid molecules",
            f"Model coef_ shape is {model.coef_.shape}, expected (4,)",
        )
        _dynamic_test(
            self,
            len(valid_properties) == 3,
            "Only valid molecules are used for fitting",
            f"Expected 3 valid molecules, got {len(valid_properties)}",
        )

    def test_nan_rows_are_excluded(self):
        import user_code
        importlib.reload(user_code)
        smiles = ["CCO", "CC(=O)O", "INVALID"]
        properties = [0.5, 1.2, 9.9]
        X = user_code.compute_descriptors(smiles)
        mask = ~np.isnan(X).any(axis=1)
        valid_count = np.sum(mask)
        model = user_code.build_qsar_model(smiles, properties)
        _dynamic_test(
            self,
            hasattr(model, 'coef_'),
            "Model is fitted and has coef_ attribute",
            "Model does not have coef_ attribute after fitting",
        )
        _dynamic_test(
            self,
            valid_count == 2,
            "Invalid SMILES excluded, only 2 valid molecules used",
            f"Expected 2 valid, got {valid_count}",
        )

    def test_regression_behavior(self):
        import user_code
        importlib.reload(user_code)
        smiles = ["CCO", "CCCC", "CCN"]
        properties = [1.0, 2.0, 3.0]
        model = user_code.build_qsar_model(smiles, properties)
        # Predict using the same descriptors
        X = user_code.compute_descriptors(smiles)
        mask = ~np.isnan(X).any(axis=1)
        preds = model.predict(X[mask])
        _dynamic_test(
            self,
            len(preds) == 3,
            "Model predicts the same number of values as valid inputs",
            f"Expected 3 predictions, got {len(preds)}",
        )
        _dynamic_test(
            self,
            np.all(np.isfinite(preds)),
            "Predictions are all finite numbers",
            f"Predictions contain non-finite values: {preds}",
        )

def _dynamic_test(test_case, condition, success_message, failure_message):
    if condition:
        test_case._testMethodName = success_message
        test_case.assertTrue(True, success_message)
    else:
        test_case._testMethodName = failure_message
        test_case.fail(failure_message)

def normalize_text(text):
    text = text.lower()
    text = re.sub(r"\\s{2,}", " ", text)
    text = re.sub(r"\\s*([,:?])\\s*", r"\\1 ", text)
    return text.strip()

def change_var(code: str, var_name: str, value: str) -> str:
    tree = ast.parse(code)
    lines = code.splitlines()
    changed = False
    # Collect all assignment nodes to modify
    assign_nodes = [
        (i, node)
        for i, node in enumerate(tree.body)
        if isinstance(node, ast.Assign)
        and any(isinstance(target, ast.Name) and target.id == var_name for target in node.targets)
    ]

    # If nothing to change, return unmodified code
    if not assign_nodes:
        return code

    # Perform replacements for all matching assignments (from last to first to not break line offsets)
    for i, node in reversed(assign_nodes):
        start_line = node.lineno - 1
        line = lines[start_line]
        indent = ' ' * (len(line) - len(line.lstrip()))
        lines[start_line] = f"{indent}{var_name} = {value}"
        next_line = len(lines)
        for next_node in tree.body[i+1:]:
            if hasattr(next_node, 'lineno'):
                next_line = next_node.lineno - 1
                break
        if next_line > start_line + 1:
            lines[start_line+1:next_line] = []
        changed = True

    return '\\n'.join(lines) if changed else code

if __name__ == "__main__":
    unittest.main()


test_main.py

Dive into the world of chemoinformatics using Python! This course introduces you to essential concepts such as molecular representations, descriptors, similarity, clustering, and QSAR modeling. Through engaging theory and hands-on challenges, you'll learn to analyze chemical data, compute molecular properties, and build predictive models—all with beginner-friendly explanations and real-world examples.

Explore how molecules are represented digitally and learn to parse and interpret these representations using Python.

Learn how to compare molecules, find similar compounds, and group them for drug discovery applications.

Apply chemoinformatics techniques to screen and rank compound libraries for drug discovery.

Challenge: Build a Simple QSAR Model

Ratkaisu

Challenge: Build a Simple QSAR Model

Ratkaisu