Source code for synkit.Chem.utils
from rdkit import Chem
from rdkit.Chem.MolStandardize import rdMolStandardize
from rdkit.Chem import rdChemReactions
import re
from typing import List, Optional, Tuple, Union
[docs]
def clean_radical_rsmi(rsmi: str) -> str:
"""
Load each side of a reaction SMILES (rSMI) into RDKit, split into disconnected fragments,
remove any fragment that contains an atom with nonzero radical electrons,
then reassemble back into a cleaned reaction SMILES.
:param rsmi: Reaction SMILES string, e.g.
'A>>B.C'
:type rsmi: str
:returns: Cleaned reaction SMILES with radical-containing fragments removed.
:rtype: str
Example:
>>> clean_radical_rsmi(
... 'COC(=O)C(CCCCNC(=O)OCc1ccccc1)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O'
... '>>COC(=O)C(CCCCNC(=O)OCc1ccccc1)NC(N)=O.COc1c[c]c(O)c(C(C)(C)C)c1'
... )
'COC(=O)C(CCCCNC(=O)OCc1ccccc1)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O'
'>>COC(=O)C(CCCCNC(=O)OCc1ccccc1)NC(N)=O'
"""
if ">>" not in rsmi:
return rsmi
def _clean_side(side: str) -> str:
mol = Chem.MolFromSmiles(side)
if mol is None:
return ""
frags = Chem.GetMolFrags(mol, asMols=True)
kept = []
for frag in frags:
if any(atom.GetNumRadicalElectrons() > 0 for atom in frag.GetAtoms()):
continue
kept.append(Chem.MolToSmiles(frag, isomericSmiles=True))
return ".".join(kept)
reac, prod = rsmi.split(">>", 1)
return f"{_clean_side(reac)}>>{_clean_side(prod)}"
[docs]
def enumerate_tautomers(reaction_smiles: str) -> Optional[List[str]]:
"""Enumerate possible tautomers of reactants while canonicalizing products.
:param reaction_smiles: Reaction SMILES in 'reactants>>products'
format.
:type reaction_smiles: str
:returns: List of reaction SMILES for each reactant tautomer
(including the original), or None on error.
:rtype: Optional[List[str]]
:raises ValueError: If reactant or product SMILES are invalid.
"""
try:
reactants_smiles, products_smiles = reaction_smiles.split(">>")
reactants_mol = Chem.MolFromSmiles(reactants_smiles)
products_mol = Chem.MolFromSmiles(products_smiles)
if reactants_mol is None or products_mol is None:
raise ValueError("Invalid reactant or product SMILES.")
enumerator = rdMolStandardize.TautomerEnumerator()
reactants_tautos = enumerator.Enumerate(reactants_mol) or [reactants_mol]
prod_can = Chem.MolToSmiles(products_mol, canonical=True)
rsmi_list = [Chem.MolToSmiles(m) + ">>" + prod_can for m in reactants_tautos]
rsmi_list.insert(0, reaction_smiles)
return rsmi_list
except ValueError:
raise
except Exception:
return None
[docs]
def mapping_success_rate(list_mapping_data: List[str]) -> float:
"""Calculate percentage of entries containing atom‑mapping annotations.
:param list_mapping_data: List of strings to search for mappings.
:type list_mapping_data: List[str]
:returns: Percentage of entries containing `:<digits>` patterns,
rounded to two decimals.
:rtype: float
:raises ValueError: If input list is empty.
"""
if not list_mapping_data:
raise ValueError("The input list is empty, cannot calculate success rate.")
pattern = re.compile(r":\d+")
success = sum(1 for entry in list_mapping_data if pattern.search(entry))
return round(100 * success / len(list_mapping_data), 2)
[docs]
def count_carbons(smiles: str) -> int:
"""Count the number of carbon atoms in a molecule.
:param smiles: SMILES string of the molecule.
:type smiles: str
:returns: Number of carbon atoms, or raises ValueError if SMILES
invalid.
:rtype: int
:raises ValueError: If the SMILES string is invalid.
"""
mol = Chem.MolFromSmiles(smiles)
if mol is None:
raise ValueError(f"Invalid SMILES string: {smiles}")
return sum(1 for atom in mol.GetAtoms() if atom.GetSymbol() == "C")
[docs]
def get_max_fragment(smiles: Union[str, List[str]]) -> str:
"""Return the largest fragment by atom count from SMILES.
:param smiles: SMILES string(s), possibly with '.' separators.
:type smiles: str or List[str]
:returns: SMILES of the fragment with the most atoms, or empty
string if none valid.
:rtype: str
"""
if isinstance(smiles, str):
fragments = smiles.split(".")
else:
fragments = [frag for s in smiles for frag in s.split(".")]
mols = [Chem.MolFromSmiles(f) for f in fragments if f]
mols = [m for m in mols if m]
if not mols:
return ""
max_mol = max(mols, key=lambda m: m.GetNumAtoms())
return Chem.MolToSmiles(max_mol)
[docs]
def filter_smiles(smiles_list: List[str], target_smiles: str) -> List[str]:
"""Filter SMILES list to those containing carbon and not equal to a target.
:param smiles_list: List of SMILES strings to filter.
:type smiles_list: List[str]
:param target_smiles: SMILES string to exclude.
:type target_smiles: str
:returns: Filtered list containing SMILES with at least one carbon atom
and not matching `target_smiles`.
:rtype: List[str]
"""
target_mol = Chem.MolFromSmiles(target_smiles)
target_can = Chem.MolToSmiles(target_mol) if target_mol else ""
result: List[str] = []
for smi in smiles_list:
mol = Chem.MolFromSmiles(smi)
if mol and any(atom.GetSymbol() == "C" for atom in mol.GetAtoms()):
can = Chem.MolToSmiles(mol)
if can != target_can:
result.append(smi)
return result
[docs]
def remove_atom_mappings(mol: Chem.Mol) -> Chem.Mol:
"""Strip atom‑mapping numbers from a molecule.
:param mol: RDKit Mol object.
:type mol: Chem.Mol
:returns: The same Mol with all atom‑map numbers set to zero.
:rtype: Chem.Mol
"""
for atom in mol.GetAtoms():
atom.SetAtomMapNum(0)
return mol
[docs]
def get_sanitized_smiles(smiles_list: List[str]) -> List[str]:
"""Sanitize SMILES list by removing mappings and invalid entries.
:param smiles_list: List of SMILES strings to sanitize.
:type smiles_list: List[str]
:returns: List of sanitized, isomeric SMILES of the largest
fragments only.
:rtype: List[str]
"""
sanitized: List[str] = []
for smiles in smiles_list:
if "->" in smiles:
continue
mol = Chem.MolFromSmiles(smiles)
if not mol:
continue
mol = remove_atom_mappings(mol)
try:
Chem.SanitizeMol(mol)
sanitized.append(Chem.MolToSmiles(mol, isomericSmiles=True))
except Exception:
continue
# keep only the largest fragment across all
if sanitized:
sanitized = [get_max_fragment(sanitized)]
return sanitized
[docs]
def remove_duplicates(smiles_list: List[str]) -> List[str]:
"""Remove duplicate strings from a list, preserving first occurrence.
:param smiles_list: List of strings (e.g., SMILES) possibly with
duplicates.
:type smiles_list: List[str]
:returns: List with duplicates removed in original order.
:rtype: List[str]
"""
seen = set()
unique: List[str] = []
for s in smiles_list:
if s not in seen:
unique.append(s)
seen.add(s)
return unique
[docs]
def process_smiles_list(smiles_list: List[str]) -> List[str]:
"""Split dot‑connected SMILES into individual components.
:param smiles_list: List of SMILES strings, some containing '.'
separators.
:type smiles_list: List[str]
:returns: Flattened list of component SMILES strings.
:rtype: List[str]
"""
new_list: List[str] = []
for smiles in smiles_list:
if "." in smiles:
new_list.extend(smiles.split("."))
else:
new_list.append(smiles)
return new_list
[docs]
def remove_explicit_H_from_rsmi(rsmi: str) -> str:
"""Remove explicit H atoms from a reaction SMILES, preserving AAM.
:param rsmi: Atom‑mapped reaction SMILES with explicit hydrogens.
:type rsmi: str
:returns: Simplified reaction SMILES with implicit hydrogens.
:rtype: str
"""
rxn = rdChemReactions.ReactionFromSmarts(rsmi, useSmiles=True)
def cleaned(mols):
return ".".join(
Chem.MolToSmiles(Chem.RemoveHs(m), isomericSmiles=True) for m in mols
)
react = cleaned(rxn.GetReactants())
prod = cleaned(rxn.GetProducts())
return f"{react}>>{prod}"
[docs]
def remove_common_reagents(reaction_smiles: str) -> Tuple[Optional[str], Optional[str]]:
"""Remove reagents present on both sides of a reaction SMILES.
:param reaction_smiles: Reaction SMILES 'reactants>>products'.
:type reaction_smiles: str
:returns: Tuple(cleaned_reaction, list_of_removed_reagents or None
if none found).
:rtype: Tuple[str, Optional[List[str]]]
"""
reactants, products = reaction_smiles.split(">>")
reactant_list = reactants.split(".")
product_list = products.split(".")
common_reagents = set(reactant_list) & set(product_list)
filtered_reactants = [r for r in reactant_list if r not in common_reagents]
filtered_products = [p for p in product_list if p not in common_reagents]
cleaned_reaction_smiles = (
".".join(filtered_reactants) + ">>" + ".".join(filtered_products)
)
return cleaned_reaction_smiles
[docs]
def reverse_reaction(rsmi: str) -> str:
"""Reverse a reaction SMILES.
:param rsmi: Reaction SMILES 'reactants>>products'.
:type rsmi: str
:returns: Reaction SMILES 'products>>reactants'.
:rtype: str
"""
parts = rsmi.split(">>")
return f"{parts[1]}>>{parts[0]}" if len(parts) == 2 else rsmi
[docs]
def merge_reaction(rsmi_1: str, rsmi_2: str) -> Optional[str]:
"""Merge two reaction SMILES into a single combined reaction.
:param rsmi_1: First reaction SMILES.
:type rsmi_1: str
:param rsmi_2: Second reaction SMILES.
:type rsmi_2: str
:returns: Merged reaction SMILES or None if inputs invalid.
:rtype: Optional[str]
"""
try:
r1, p1 = rsmi_1.split(">>")
r2, p2 = rsmi_2.split(">>")
except ValueError:
return None
if not all([r1, p1, r2, p2]):
return None
return f"{r1}.{r2}>>{p1}.{p2}"
[docs]
def find_longest_fragment(input_list: List[str]) -> Optional[str]:
"""Find the longest string in a list.
:param input_list: List of strings to search.
:type input_list: List[str]
:returns: Longest string or None if list empty.
:rtype: Optional[str]
"""
if not input_list:
return None
return max(input_list, key=len)