Source code for synkit.Chem.Reaction.radical_wildcard
from rdkit import Chem
from rdkit.Chem import SanitizeFlags
import re
from typing import Tuple, List, Optional, Dict
[docs]
def clean_wc(
rsmi: str, invert: bool = False, max_frag: bool = False, wild_card: bool = True
) -> str:
"""
Clean wildcard-containing fragments from one side of a reaction SMILES,
optionally selecting the largest remaining fragment.
:param rsmi: Reaction SMILES string in the form 'R>>P'.
:type rsmi: str
:param invert: If True, process the reactant side; otherwise the product side.
:type invert: bool
:param max_frag: If True, force fragment selection (implies wild_card=True).
:type max_frag: bool
:param wild_card: If True, remove fragments containing '*' before selection.
:type wild_card: bool
:returns: The processed reaction SMILES.
:rtype: str
:raises ValueError: If input does not split into reactant and product.
Example
-------
>>> clean_wc('A.B>>C.*', invert=False, wild_card=True)
'A.B>>C'
>>> clean_wc('A.B>>C.D', invert=False, max_frag=True)
'A.B>>C'
"""
# Ensure max_frag implies wild_card
if max_frag:
wild_card = True
# Split into reactant and product
parts = rsmi.split(">>")
if len(parts) != 2:
raise ValueError("Reaction SMILES must contain exactly one '>>'.")
react, prod = parts
# Select side to process
side = react if invert else prod
processed = side
if wild_card:
frags = side.split(".")
# Filter out fragments containing wildcards
filtered = [frag for frag in frags if "*" not in frag]
if len(filtered) > 1:
# select the longest fragment
processed = max(filtered, key=len)
elif len(filtered) == 1:
processed = filtered[0]
# if no filtered fragments or single fragment, keep original side
# Reconstruct and return
if invert:
return f"{processed}>>{prod}"
return f"{react}>>{processed}"
[docs]
class RadicalWildcardAdder:
"""A utility for adding wildcard dummy atoms ([*]) to radical centers in
reaction SMILES, with unique incremental atom-map indices and correct
propagation into products.
Each reactive radical atom in the reactant block is identified by its unpaired electron count,
assigned one or more wildcard map indices, and recorded. The same wildcard(s) are then appended
to the corresponding atom(s) in the product block, ensuring consistent mapping.
:param start_map: If provided, this integer will be the first atom-map index
used for wildcard dummy atoms; subsequent radicals get incremented indices.
If None, the next unused index is auto-determined from the input SMILES.
:type start_map: Optional[int]
Example
-------
>>> adder = RadicalWildcardAdder(start_map=8)
>>> rxn = "[C:2][OH:4].[O:6][H:7]>>[C:2][O:6].[OH:4][H:7]"
>>> print(adder.transform(rxn))
[C:2]([OH:4])[*:8].[O:6]([H:7])[*:9]>>[C:2]([O:6][*:9])[*:8].[OH:4][H:7]
"""
def __init__(self, start_map: Optional[int] = None) -> None:
"""Initialize the adder with an optional starting map index.
:param start_map: Starting atom-map index for wildcards or None
to auto-pick.
:type start_map: Optional[int]
"""
self.start_map = start_map
def __repr__(self) -> str:
"""Official representation."""
return f"<RadicalWildcardAdder(start_map={self.start_map})>"
def __str__(self) -> str:
"""User-friendly description."""
m = self.start_map if self.start_map is not None else "auto"
return f"RadicalWildcardAdder(start_map={m})"
[docs]
def transform(self, rxn_smiles: str) -> str:
"""Append wildcard dummy atoms to each radical center in the reactant
block and propagate the same wildcards to the matching atoms in the
product block.
:param rxn_smiles: Reaction SMILES string, two-component or
three-component.
:type rxn_smiles: str
:returns: Modified reaction SMILES with consistent wildcard
attachments.
:rtype: str
:raises ValueError: If the SMILES is not valid or fragments fail
to parse.
"""
# Split into reactants > agents? > products
react_blk, agents_blk, prod_blk = self._split_reaction(rxn_smiles)
# Determine first wildcard map index
existing = [int(n) for n in re.findall(r":(\d+)", rxn_smiles)]
next_map = (
self.start_map
if self.start_map is not None
else max(existing, default=0) + 1
)
# Record mapping: original atom-map -> list of wildcard_maps
wildcard_map_for: Dict[int, List[int]] = {}
# Build sanitizeOps mask (skip H-adjustment)
keep_ops = SanitizeFlags.SANITIZE_ADJUSTHS
# Process one block (helper)
def _process(frags: List[str], propagate: bool) -> List[str]:
nonlocal next_map
out = []
for smi in frags:
if not smi:
continue
# Load unsanitized then re-sanitize to preserve explicit H
# mol = Chem.MolFromSmiles(smi, sanitize=False)
# if mol is None:
# raise ValueError(f"Cannot parse SMILES fragment: {smi}")
# Chem.SanitizeMol(mol, sanitizeOps=keep_ops)
mol = Chem.MolFromSmiles(smi, sanitize=False)
Chem.SanitizeMol(mol)
rw = Chem.RWMol(mol)
atoms = list(rw.GetAtoms())
changed = False
for atom in atoms:
rad = atom.GetNumRadicalElectrons()
orig_map = atom.GetAtomMapNum()
if rad > 0:
# Initialize list for this orig_map
if propagate and orig_map not in wildcard_map_for:
wildcard_map_for[orig_map] = []
# For each unpaired electron, attach a wildcard
for _ in range(rad):
if propagate:
wm = next_map
wildcard_map_for[orig_map].append(wm)
next_map += 1
else:
# in products, use already-recorded wm sequentially
wm_list = wildcard_map_for.get(orig_map, [])
if not wm_list:
continue
wm = wm_list.pop(0)
# add dummy wildcard
dummy = Chem.Atom(0)
dummy.SetAtomMapNum(wm)
dummy.SetNoImplicit(True)
rw.AddAtom(dummy)
rw.AddBond(
atom.GetIdx(),
rw.GetNumAtoms() - 1,
Chem.BondType.SINGLE,
)
changed = True
if changed:
Chem.SanitizeMol(rw.GetMol(), sanitizeOps=keep_ops)
out.append(
Chem.MolToSmiles(
rw.GetMol(), isomericSmiles=True, allHsExplicit=True
)
)
return out
react_frags = react_blk.split(".") if react_blk else []
new_reacts = _process(react_frags, propagate=True)
prod_frags = prod_blk.split(".") if prod_blk else []
new_prods = _process(prod_frags, propagate=False)
react_str = ".".join(new_reacts)
prod_str = ".".join(new_prods)
if agents_blk is None:
return f"{react_str}>>{prod_str}"
return f"{react_str}>{agents_blk}>{prod_str}"
@staticmethod
def _split_reaction(rxn: str) -> Tuple[str, Optional[str], str]:
"""Split a reaction SMILES into reactants, agents (optional), and
products.
:param rxn: The reaction SMILES string.
:type rxn: str
:returns: Tuple of (reactants_block, agents_block or None,
products_block).
:rtype: Tuple[str, Optional[str], str]
:raises ValueError: If the SMILES does not contain 2 or 3 '>'
symbols.
"""
parts = rxn.split(">")
if len(parts) == 2:
return parts[0], None, parts[1]
if len(parts) == 3:
return parts[0], parts[1], parts[2]
raise ValueError("Reaction SMILES must contain 2 or 3 '>' symbols")