Source code for synkit.Chem.Reaction.radical_wildcard

from rdkit import Chem
from rdkit.Chem import SanitizeFlags
import re
from typing import Tuple, List, Optional, Dict



[docs]
def clean_wc(
    rsmi: str, invert: bool = False, max_frag: bool = False, wild_card: bool = True
) -> str:
    """
    Clean wildcard-containing fragments from one side of a reaction SMILES,
    optionally selecting the largest remaining fragment.

    :param rsmi: Reaction SMILES string in the form 'R>>P'.
    :type rsmi: str
    :param invert: If True, process the reactant side; otherwise the product side.
    :type invert: bool
    :param max_frag: If True, force fragment selection (implies wild_card=True).
    :type max_frag: bool
    :param wild_card: If True, remove fragments containing '*' before selection.
    :type wild_card: bool
    :returns: The processed reaction SMILES.
    :rtype: str
    :raises ValueError: If input does not split into reactant and product.

    Example
    -------
    >>> clean_wc('A.B>>C.*', invert=False, wild_card=True)
    'A.B>>C'
    >>> clean_wc('A.B>>C.D', invert=False, max_frag=True)
    'A.B>>C'
    """
    # Ensure max_frag implies wild_card
    if max_frag:
        wild_card = True

    # Split into reactant and product
    parts = rsmi.split(">>")
    if len(parts) != 2:
        raise ValueError("Reaction SMILES must contain exactly one '>>'.")
    react, prod = parts

    # Select side to process
    side = react if invert else prod

    processed = side
    if wild_card:
        frags = side.split(".")
        # Filter out fragments containing wildcards
        filtered = [frag for frag in frags if "*" not in frag]
        if len(filtered) > 1:
            # select the longest fragment
            processed = max(filtered, key=len)
        elif len(filtered) == 1:
            processed = filtered[0]
        # if no filtered fragments or single fragment, keep original side

    # Reconstruct and return
    if invert:
        return f"{processed}>>{prod}"
    return f"{react}>>{processed}"




[docs]
class RadicalWildcardAdder:
    """A utility for adding wildcard dummy atoms ([*]) to radical centers in
    reaction SMILES, with unique incremental atom-map indices and correct
    propagation into products.

    Each reactive radical atom in the reactant block is identified by its unpaired electron count,
    assigned one or more wildcard map indices, and recorded. The same wildcard(s) are then appended
    to the corresponding atom(s) in the product block, ensuring consistent mapping.

    :param start_map: If provided, this integer will be the first atom-map index
                      used for wildcard dummy atoms; subsequent radicals get incremented indices.
                      If None, the next unused index is auto-determined from the input SMILES.
    :type start_map: Optional[int]

    Example
    -------
    >>> adder = RadicalWildcardAdder(start_map=8)
    >>> rxn = "[C:2][OH:4].[O:6][H:7]>>[C:2][O:6].[OH:4][H:7]"
    >>> print(adder.transform(rxn))
    [C:2]([OH:4])[*:8].[O:6]([H:7])[*:9]>>[C:2]([O:6][*:9])[*:8].[OH:4][H:7]
    """

    def __init__(self, start_map: Optional[int] = None) -> None:
        """Initialize the adder with an optional starting map index.

        :param start_map: Starting atom-map index for wildcards or None
            to auto-pick.
        :type start_map: Optional[int]
        """
        self.start_map = start_map

    def __repr__(self) -> str:
        """Official representation."""
        return f"<RadicalWildcardAdder(start_map={self.start_map})>"

    def __str__(self) -> str:
        """User-friendly description."""
        m = self.start_map if self.start_map is not None else "auto"
        return f"RadicalWildcardAdder(start_map={m})"


[docs]
    def transform(self, rxn_smiles: str) -> str:
        """Append wildcard dummy atoms to each radical center in the reactant
        block and propagate the same wildcards to the matching atoms in the
        product block.

        :param rxn_smiles: Reaction SMILES string, two-component or
            three-component.
        :type rxn_smiles: str
        :returns: Modified reaction SMILES with consistent wildcard
            attachments.
        :rtype: str
        :raises ValueError: If the SMILES is not valid or fragments fail
            to parse.
        """
        # Split into reactants > agents? > products
        react_blk, agents_blk, prod_blk = self._split_reaction(rxn_smiles)

        # Determine first wildcard map index
        existing = [int(n) for n in re.findall(r":(\d+)", rxn_smiles)]
        next_map = (
            self.start_map
            if self.start_map is not None
            else max(existing, default=0) + 1
        )

        # Record mapping: original atom-map -> list of wildcard_maps
        wildcard_map_for: Dict[int, List[int]] = {}

        # Build sanitizeOps mask (skip H-adjustment)
        keep_ops = SanitizeFlags.SANITIZE_ADJUSTHS

        # Process one block (helper)
        def _process(frags: List[str], propagate: bool) -> List[str]:
            nonlocal next_map
            out = []
            for smi in frags:
                if not smi:
                    continue
                # Load unsanitized then re-sanitize to preserve explicit H
                # mol = Chem.MolFromSmiles(smi, sanitize=False)
                # if mol is None:
                #     raise ValueError(f"Cannot parse SMILES fragment: {smi}")
                # Chem.SanitizeMol(mol, sanitizeOps=keep_ops)
                mol = Chem.MolFromSmiles(smi, sanitize=False)
                Chem.SanitizeMol(mol)
                rw = Chem.RWMol(mol)

                atoms = list(rw.GetAtoms())
                changed = False

                for atom in atoms:
                    rad = atom.GetNumRadicalElectrons()
                    orig_map = atom.GetAtomMapNum()
                    if rad > 0:
                        # Initialize list for this orig_map
                        if propagate and orig_map not in wildcard_map_for:
                            wildcard_map_for[orig_map] = []
                        # For each unpaired electron, attach a wildcard
                        for _ in range(rad):
                            if propagate:
                                wm = next_map
                                wildcard_map_for[orig_map].append(wm)
                                next_map += 1
                            else:
                                # in products, use already-recorded wm sequentially
                                wm_list = wildcard_map_for.get(orig_map, [])
                                if not wm_list:
                                    continue
                                wm = wm_list.pop(0)
                            # add dummy wildcard
                            dummy = Chem.Atom(0)
                            dummy.SetAtomMapNum(wm)
                            dummy.SetNoImplicit(True)
                            rw.AddAtom(dummy)
                            rw.AddBond(
                                atom.GetIdx(),
                                rw.GetNumAtoms() - 1,
                                Chem.BondType.SINGLE,
                            )
                            changed = True

                if changed:
                    Chem.SanitizeMol(rw.GetMol(), sanitizeOps=keep_ops)

                out.append(
                    Chem.MolToSmiles(
                        rw.GetMol(), isomericSmiles=True, allHsExplicit=True
                    )
                )
            return out

        react_frags = react_blk.split(".") if react_blk else []
        new_reacts = _process(react_frags, propagate=True)

        prod_frags = prod_blk.split(".") if prod_blk else []
        new_prods = _process(prod_frags, propagate=False)

        react_str = ".".join(new_reacts)
        prod_str = ".".join(new_prods)
        if agents_blk is None:
            return f"{react_str}>>{prod_str}"
        return f"{react_str}>{agents_blk}>{prod_str}"


    @staticmethod
    def _split_reaction(rxn: str) -> Tuple[str, Optional[str], str]:
        """Split a reaction SMILES into reactants, agents (optional), and
        products.

        :param rxn: The reaction SMILES string.
        :type rxn: str
        :returns: Tuple of (reactants_block, agents_block or None,
            products_block).
        :rtype: Tuple[str, Optional[str], str]
        :raises ValueError: If the SMILES does not contain 2 or 3 '>'
            symbols.
        """
        parts = rxn.split(">")
        if len(parts) == 2:
            return parts[0], None, parts[1]
        if len(parts) == 3:
            return parts[0], parts[1], parts[2]
        raise ValueError("Reaction SMILES must contain 2 or 3 '>' symbols")