Source code for synkit.Chem.Reaction.cleaning
from typing import List
from synkit.Chem.Reaction.standardize import Standardize
from synkit.Chem.Reaction.balance_check import BalanceReactionCheck
[docs]
class Cleaning:
"""Utilities for cleaning and filtering reaction SMILES lists.
Methods
-------
remove_duplicates(smiles_list)
Remove duplicate SMILES while preserving input order.
clean_smiles(smiles_list)
Standardize, balance‑check, and deduplicate a list of reaction SMILES.
"""
def __init__(self) -> None:
"""Initialize the Cleaning helper.
No instance attributes are used.
"""
pass
[docs]
@staticmethod
def remove_duplicates(smiles_list: List[str]) -> List[str]:
"""Remove duplicate SMILES strings, preserving first occurrences.
:param smiles_list: List of reaction SMILES strings.
:type smiles_list: List[str]
:returns: List of unique SMILES in original order.
:rtype: List[str]
"""
seen = set()
return [smi for smi in smiles_list if not (smi in seen or seen.add(smi))]
[docs]
@staticmethod
def clean_smiles(smiles_list: List[str]) -> List[str]:
"""Standardize, balance‑check, and deduplicate reaction SMILES.
Steps:
1. Standardize each SMILES via `Standardize.standardize_rsmi`.
2. Keep only those that pass `BalanceReactionCheck.rsmi_balance_check`.
3. Remove duplicates preserving order.
:param smiles_list: List of reaction SMILES strings to clean.
:type smiles_list: List[str]
:returns: Cleaned list of standardized, balanced, unique SMILES.
:rtype: List[str]
"""
standardizer = Standardize()
balance_checker = BalanceReactionCheck()
standardized: List[str] = []
for smi in smiles_list:
try:
std = standardizer.standardize_rsmi(smi, stereo=True)
if std:
standardized.append(std)
except Exception:
continue
balanced = [
smi for smi in standardized if balance_checker.rsmi_balance_check(smi)
]
return Cleaning.remove_duplicates(balanced)