import unittest
import importlib
import user_code

from rdkit import Chem
from rdkit.Chem import AllChem, DataStructs


class TestTask(unittest.TestCase):
    def test_returns_only_similar_smiles(self):
        importlib.reload(user_code)

        ref = "CCO"
        candidates = ["CCC", "CCN", "CCCO", "CCCl", "CCO", "CCCCO"]

        # Compute expected using the same logic: Morgan FP radius=2, similarity > 0.7
        ref_mol = Chem.MolFromSmiles(ref)
        self.assertIsNotNone(ref_mol, "Reference SMILES should be valid in this test.")
        ref_fp = AllChem.GetMorganFingerprintAsBitVect(ref_mol, radius=2)

        expected = []
        for smi in candidates:
            mol = Chem.MolFromSmiles(smi)
            if mol is None:
                continue
            fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=2)
            sim = DataStructs.TanimotoSimilarity(ref_fp, fp)
            if sim > 0.7:
                expected.append(smi)

        result = user_code.find_similar_molecules(ref, candidates)

        _dynamic_test(
            self,
            isinstance(result, list) and set(result) == set(expected),
            f"Returns correct SMILES above threshold: {expected}",
            f"Expected {expected}, got {result}",
        )

    def test_skips_invalid_candidate_smiles(self):
        importlib.reload(user_code)

        ref = "CCO"
        # Use truly invalid SMILES strings
        invalid_1 = "notasmiles"
        invalid_2 = "C1(CC"     # unclosed ring/parenthesis

        candidates = ["CCCO", invalid_1, "CCO", "C1CC1", invalid_2]
        result = user_code.find_similar_molecules(ref, candidates)

        _dynamic_test(
            self,
            (invalid_1 not in result) and (invalid_2 not in result),
            "Invalid SMILES are skipped in candidates",
            f"Invalid SMILES found in result: {result}",
        )

    def test_raises_valueerror_on_invalid_reference(self):
        importlib.reload(user_code)

        candidates = ["CCCO", "CCO", "CCCCO"]
        raised = False
        try:
            user_code.find_similar_molecules("notavalidsmiles", candidates)
        except ValueError:
            raised = True

        _dynamic_test(
            self,
            raised,
            "Raises ValueError if reference SMILES is invalid",
            "Did not raise ValueError for invalid reference SMILES",
        )

    def test_uses_morgan_fp_radius2(self):
        importlib.reload(user_code)

        ref = "c1ccccc1"  # benzene
        candidates = ["c1ccccc1C", "c1ccncc1", "c1ccccc1O", "CCCC"]

        ref_mol = Chem.MolFromSmiles(ref)
        ref_fp = AllChem.GetMorganFingerprintAsBitVect(ref_mol, 2)

        expected = []
        for smi in candidates:
            mol = Chem.MolFromSmiles(smi)
            if mol is None:
                continue
            fp = AllChem.GetMorganFingerprintAsBitVect(mol, 2)
            sim = DataStructs.TanimotoSimilarity(ref_fp, fp)
            if sim > 0.7:
                expected.append(smi)

        result = user_code.find_similar_molecules(ref, candidates)

        _dynamic_test(
            self,
            set(result) == set(expected),
            "Morgan fingerprints with radius 2 used correctly",
            f"Expected {expected}, got {result}",
        )


def _dynamic_test(test_case, condition, success_message, failure_message):
    if condition:
        test_case._testMethodName = success_message
        test_case.assertTrue(True, success_message)
    else:
        test_case._testMethodName = failure_message
        test_case.fail(failure_message)


if __name__ == "__main__":
    unittest.main()


test_main.py

Dive into the world of chemoinformatics using Python! This course introduces you to essential concepts such as molecular representations, descriptors, similarity, clustering, and QSAR modeling. Through engaging theory and hands-on challenges, you'll learn to analyze chemical data, compute molecular properties, and build predictive models—all with beginner-friendly explanations and real-world examples.

Explore how molecules are represented digitally and learn to parse and interpret these representations using Python.

Learn how to compare molecules, find similar compounds, and group them for drug discovery applications.

Apply chemoinformatics techniques to screen and rank compound libraries for drug discovery.

Challenge: Find Similar Drug-like Molecules

Solution

Challenge: Find Similar Drug-like Molecules

Solution