Source code for synkit.Chem.Reaction.deionize

import random
from itertools import combinations, permutations
from joblib import Parallel, delayed
from typing import List, Tuple, Callable, Dict, Any

from rdkit import Chem
from rdkit.Chem.MolStandardize import rdMolStandardize

from synkit.Chem.Reaction.balance_check import BalanceReactionCheck


[docs] class Deionize: """Neutralize ionic species and mixtures of ions in reactions. Provides methods to group ions into neutral combinations, uncharge individual anions/cations, and apply these corrections to SMILES strings or entire reaction dictionaries. """
[docs] @staticmethod def random_pair_ions( charges: List[int], smiles: List[str] ) -> Tuple[List[List[str]], List[List[int]]]: """Identify non‑overlapping groups of ions whose charges sum to zero. :param charges: List of integer formal charges for each ion. :type charges: List[int] :param smiles: Corresponding SMILES strings for each ion. :type smiles: List[str] :returns: A tuple of two lists: - groups of SMILES strings forming neutral sets, - groups of their corresponding charges. :rtype: Tuple[List[List[str]], List[List[int]]] """ def find_groups(indices: List[int], size: int) -> Tuple[int, ...]: for group in combinations(indices, size): if sum(charges[i] for i in group) == 0: return group return () indices = list(range(len(charges))) random.shuffle(indices) used = set() grouped_smiles: List[List[str]] = [] grouped_charges: List[List[int]] = [] for group_size in (2, 3, 4): while True: available = [i for i in indices if i not in used] group = find_groups(available, group_size) if not group: break grouped_smiles.append([smiles[i] for i in group]) grouped_charges.append([charges[i] for i in group]) used.update(group) return grouped_smiles, grouped_charges
[docs] @staticmethod def uncharge_anion(smiles: str, charges: int = -1) -> str: """Neutralize an anionic SMILES string. :param smiles: SMILES of the anion to neutralize. :type smiles: str :param charges: Formal charge of the ion (negative integer). Defaults to -1. :type charges: int :returns: SMILES of the uncharged molecule. :rtype: str """ if smiles == "[N-]=[N+]=[N-]": return "[N-]=[N+]=[N]" if charges == -1: mol = Chem.MolFromSmiles(smiles) uncharger = rdMolStandardize.Uncharger() uncharged = uncharger.uncharge(mol) return Chem.MolToSmiles(uncharged) # for multi‐charged anions return smiles.replace(f"{charges}", "").replace("[", "").replace("]", "")
[docs] @staticmethod def uncharge_cation(smiles: str, charges: int = 1) -> str: """Neutralize a cationic SMILES string. :param smiles: SMILES of the cation to neutralize. :type smiles: str :param charges: Formal charge of the ion (positive integer). Defaults to 1. :type charges: int :returns: SMILES of the uncharged molecule. :rtype: str """ if charges == 1: return smiles.replace("+", "") return smiles.replace(f"+{charges}", "")
[docs] @staticmethod def uncharge_smiles(charge_smiles: str) -> str: """Neutralize all ionic components in a dot‑separated SMILES string. Splits into components, identifies ionic species, groups them into neutral sets via `random_pair_ions`, then applies `uncharge_anion` or `uncharge_cation` and recombines. :param charge_smiles: SMILES string with ionic and non‑ionic parts. :type charge_smiles: str :returns: SMILES string with charges neutralized. :rtype: str """ parts = charge_smiles.split(".") charges = [Chem.rdmolops.GetFormalCharge(Chem.MolFromSmiles(p)) for p in parts] if all(c == 0 for c in charges): return charge_smiles non_ionic, ionic_parts, ionic_charges = [], [], [] for p, c in zip(parts, charges): if c == 0: non_ionic.append(p) else: ionic_parts.append(p) ionic_charges.append(c) valid = non_ionic.copy() groups, group_chs = Deionize.random_pair_ions(ionic_charges, ionic_parts) for smiles_group, charge_group in zip(groups, group_chs): candidates = [] for smi, ch in zip(smiles_group, charge_group): if ch > 0: candidates.append(Deionize.uncharge_cation(smi, ch)) else: candidates.append(Deionize.uncharge_anion(smi, ch)) # try permutations for valid SMILES for perm in permutations(candidates): combo = "".join(perm) if Chem.MolFromSmiles(combo): valid.append(Chem.CanonSmiles(combo)) break else: valid.extend(smiles_group) return ".".join(valid)
[docs] @staticmethod def ammonia_hydroxide_standardize(reaction_smiles: str) -> str: """Simplify ammonium hydroxide pairs in a reaction SMILES. :param reaction_smiles: Reaction SMILES string. :type reaction_smiles: str :returns: Reaction SMILES with '[NH4+].[OH-]' replaced by 'N.O' or 'O.N'. :rtype: str """ return reaction_smiles.replace("[NH4+].[OH-]", "N.O").replace( "[OH-].[NH4+]", "O.N" )
[docs] @classmethod def apply_uncharge_smiles_to_reactions( cls, reactions: List[Dict[str, Any]], uncharge_smiles_func: Callable[[str], str], n_jobs: int = 4, ) -> List[Dict[str, Any]]: """Apply a neutralization function to each reaction’s reactants/products in parallel. Adds keys 'new_reactants', 'new_products', and 'standardized_reactions' based on uncharged SMILES and verifies formula balance. :param reactions: List of reaction dicts with 'reactants' and 'products' keys. :type reactions: List[Dict[str, Any]] :param uncharge_smiles_func: Function to neutralize a SMILES string. :type uncharge_smiles_func: Callable[[str], str] :param n_jobs: Number of parallel jobs to run. Defaults to 4. :type n_jobs: int :returns: List of updated reaction dicts with: - 'success': bool indicating formula match - 'new_reactants' / 'new_products' - 'standardized_reactions' :rtype: List[Dict[str, Any]] """ def process(reaction: Dict[str, Any]) -> Dict[str, Any]: # pre‐standardize ammonia hydroxide r_fix = cls.ammonia_hydroxide_standardize(reaction["reactants"]) p_fix = cls.ammonia_hydroxide_standardize(reaction["products"]) ur = uncharge_smiles_func(r_fix) up = uncharge_smiles_func(p_fix) r_formula = BalanceReactionCheck().get_combined_molecular_formula(ur) p_formula = BalanceReactionCheck().get_combined_molecular_formula(up) reaction["success"] = r_formula == p_formula reaction["new_reactants"] = ur if reaction["success"] else r_fix reaction["new_products"] = up if reaction["success"] else p_fix reaction["standardized_reactions"] = ( f"{reaction['new_reactants']}>>{reaction['new_products']}" ) return reaction return Parallel(n_jobs=n_jobs)(delayed(process)(rxn) for rxn in reactions)