Source code for synkit.Rule.Modify.molecule_rule

import re
from rdkit import Chem
from typing import Optional
from synkit.IO.chem_converter import smart_to_gml
from synkit.Rule.Modify.strip_rule import strip_context


[docs] class MoleculeRule: """A class for generating molecule rules, atom-mapped SMILES, and GML representations from SMILES strings.""" def __init__(self) -> None: """Initializes the MoleculeRule object.""" pass
[docs] @staticmethod def remove_edges_from_left_right(input_str: str) -> str: """Remove all contents from the 'left' and 'right' sections of a chemical rule description. Parameters: - input_str (str): The string representation of the rule. Returns: - str: The modified string with cleared 'left' and 'right' sections. """ # Pattern to match 'left [' to the matching ']' left_pattern = r"(left \[)(.*?)(^\s*\])" # Pattern to match 'right [' to the matching ']' right_pattern = r"(right \[)(.*?)(^\s*\])" # Replace contents within 'left [' and 'right [' sections using non-greedy matching # Multiline mode to handle newlines and match start of lines with '^' input_str = re.sub( left_pattern, r"\1\n \3", input_str, flags=re.DOTALL | re.MULTILINE ) input_str = re.sub( right_pattern, r"\1\n \3", input_str, flags=re.DOTALL | re.MULTILINE ) return input_str
[docs] @staticmethod def generate_atom_map(smiles: str) -> Optional[str]: """Generate atom-mapped SMILES by assigning unique map numbers to each atom in the molecule. Parameters: - smiles (str): The SMILES string representing the molecule. Returns: - Optional[str]: The atom-mapped SMILES string, or None if the SMILES string is invalid. """ mol = Chem.MolFromSmiles(smiles) if not mol: return None # Invalid SMILES # Assign atom map numbers (1-based index) for idx, atom in enumerate(mol.GetAtoms()): atom.SetAtomMapNum(idx + 1) # Return the SMILES with atom map return Chem.MolToSmiles(mol, canonical=True)
[docs] @staticmethod def generate_molecule_smart(smiles: str) -> Optional[str]: """Generate a SMARTS-like string from atom-mapped SMILES. Parameters: - smiles (str): The SMILES string representing the molecule. Returns: - Optional[str]: The SMARTS-like string derived from atom-mapped SMILES, or None if the SMILES is invalid. """ atom_map_smiles = MoleculeRule.generate_atom_map(smiles) if atom_map_smiles is None: return None # Invalid SMILES # Return the SMARTS-like string return f"{atom_map_smiles}>>{atom_map_smiles}"
[docs] def generate_molecule_rule( self, smiles: str, name: str = "molecule", explicit_hydrogen: bool = True, sanitize: bool = True, ) -> Optional[str]: """Generate a GML representation of the molecule rule from SMILES. Parameters: - smiles (str): The SMILES string representing the molecule. - name (str, optional): The rule name used in GML generation. Defaults to 'molecule'. - explicit_hydrogen (bool, optional): Whether to include explicit hydrogen atoms in GML. Defaults to True. - sanitize (bool, optional): Whether to sanitize the molecule before conversion. Defaults to True. Returns: - Optional[str]: The GML representation of the molecule rule, or None if invalid. """ rsmi = self.generate_molecule_smart(smiles) if rsmi is None: return None # Invalid SMARTS string # Return the GML representation gml = smart_to_gml( rsmi, core=False, sanitize=sanitize, explicit_hydrogen=explicit_hydrogen, rule_name=name, ) gml = strip_context(gml, False) return gml