Source code for synkit.Graph.ITS.normalize_aam

import networkx as nx
from rdkit import Chem
from typing import List

from synkit.IO.chem_converter import rsmi_to_graph
from synkit.IO.graph_to_mol import GraphToMol
from synkit.Chem.Reaction.fix_aam import FixAAM
from synkit.Graph.Hyrogen._misc import implicit_hydrogen
from synkit.Graph.ITS.its_construction import ITSConstruction
from synkit.Graph.ITS.its_decompose import get_rc


[docs] class NormalizeAAM: """Provides functionalities to normalize atom mappings in SMILES representations, extract and process reaction centers from ITS graphs, and convert between graph representations and molecular models.""" def __init__(self) -> None: """Initializes the NormalizeAAM class.""" pass
[docs] @staticmethod def fix_rsmi_kekulize(rsmi: str) -> str: """Filters the reactants and products of a reaction SMILES string. Parameters: - rsmi (str): A string representing the reaction SMILES in the form of "reactants >> products". Returns: - str: A filtered reaction SMILES string where invalid reactants/products are removed. """ # Split the reaction into reactants and products reactants, products = rsmi.split(">>") # Filter valid reactants and products filtered_reactants = NormalizeAAM.fix_kekulize(reactants) filtered_products = NormalizeAAM.fix_kekulize(products) # Return the filtered reaction SMILES return f"{filtered_reactants}>>{filtered_products}"
[docs] @staticmethod def fix_kekulize(smiles: str) -> str: """Filters and returns valid SMILES strings from a string of SMILES, joined by '.'. This function processes a string of SMILES separated by periods (e.g., "CCO.CC=O"), filters out invalid SMILES, and returns a string of valid SMILES joined by periods. Parameters: - smiles (str): A string containing SMILES strings separated by periods ('.'). Returns: - str: A string of valid SMILES, joined by periods ('.'). """ smiles_list = smiles.split(".") # Split SMILES by period valid_smiles = [] # List to store valid SMILES strings for smile in smiles_list: mol = Chem.MolFromSmiles(smile, sanitize=False) if mol: # Check if molecule is valid valid_smiles.append( Chem.MolToSmiles( mol, canonical=True, kekuleSmiles=True, allHsExplicit=True ) ) return ".".join(valid_smiles) # Return valid SMILES joined by '.'
[docs] @staticmethod def extract_subgraph(graph: nx.Graph, indices: List[int]) -> nx.Graph: """Extracts a subgraph from a given graph based on a list of node indices. Parameters: graph (nx.Graph): The original graph from which to extract the subgraph. indices (List[int]): A list of node indices that define the subgraph. Returns: nx.Graph: The extracted subgraph. """ return graph.subgraph(indices).copy()
[docs] def reset_indices_and_atom_map( self, subgraph: nx.Graph, aam_key: str = "atom_map" ) -> nx.Graph: """Resets the node indices and the atom_map of the subgraph to be continuous from 1 onwards. Parameters: subgraph (nx.Graph): The subgraph with possibly non-continuous indices. aam_key (str): The attribute key for atom mapping. Defaults to 'atom_map'. Returns: nx.Graph: A new subgraph with continuous indices and adjusted atom_map. """ new_graph = nx.Graph() node_id_mapping = { old_id: new_id for new_id, old_id in enumerate(subgraph.nodes(), 1) } for old_id, new_id in node_id_mapping.items(): node_data = subgraph.nodes[old_id].copy() node_data[aam_key] = new_id new_graph.add_node(new_id, **node_data) for u, v, data in subgraph.edges(data=True): new_graph.add_edge(node_id_mapping[u], node_id_mapping[v], **data) return new_graph
[docs] def fit(self, rsmi: str, fix_aam_indice: bool = True) -> str: """Processes a reaction SMILES (RSMI) to adjust atom mappings, extract reaction centers, decompose into separate reactant and product graphs, and generate the corresponding SMILES. Parameters: - rsmi (str): The reaction SMILES string to be processed. - fix_aam_indice (bool): Whether to fix the atom mapping numbers. Defaults to True. Returns: str: The resulting reaction SMILES string with updated atom mappings. """ rsmi = self.fix_rsmi_kekulize(rsmi) if fix_aam_indice: rsmi = FixAAM().fix_aam_rsmi(rsmi) r_graph, p_graph = rsmi_to_graph( rsmi, sanitize=True, use_index_as_atom_map=True, drop_non_aam=True, ) its = ITSConstruction().ITSGraph(r_graph, p_graph) rc = get_rc(its) list_hydrogen = [] for _, value in rc.nodes(data=True): if value["element"] == "H": list_hydrogen.append(value["atom_map"]) r_graph = implicit_hydrogen(r_graph, list_hydrogen) p_graph = implicit_hydrogen(p_graph, list_hydrogen) r_mol, p_mol = GraphToMol().graph_to_mol( r_graph, sanitize=True, use_h_count=True ), GraphToMol().graph_to_mol(p_graph, sanitize=True, use_h_count=True) return f"{Chem.MolToSmiles(r_mol)}>>{Chem.MolToSmiles(p_mol)}"