Source code for synkit.Chem.Fingerprint.transformation_fp
"""transformation_fp.py
=======================
Compute reaction‐level fingerprints by combining molecular fingerprints
of reactants and products, with optional absolute mode and bit‐vector conversion.
Quick start
-----------
>>> from synkit.Chem.Fingerprint.transformation_fp import TransformationFP
>>> arr = TransformationFP().fit('CCO>>CC=O', symbols='>>', fp_type='ecfp4', abs=True)
>>> bv = TransformationFP().fit('CCO>>CC=O', symbols='>>', fp_type='ecfp4', abs=True, return_array=False)
"""
from __future__ import annotations
from typing import Any, Union
import numpy as np
from rdkit.DataStructs import cDataStructs
from synkit.Chem.Fingerprint.smiles_featurizer import SmilesFeaturizer
[docs]
class TransformationFP:
"""Calculate reaction fingerprints by featurizing individual molecules and
combining them via vector subtraction.
:cvar None: Stateless utility class.
"""
def __init__(self) -> None:
"""Initialize TransformationFP.
This class has no instance state; all methods are static or
class‐level.
"""
pass
[docs]
@staticmethod
def convert_arr2vec(arr: np.ndarray) -> cDataStructs.ExplicitBitVect:
"""Convert a NumPy array of bits into an RDKit ExplicitBitVect.
:param arr: Array of 0/1 values representing a fingerprint.
:type arr: np.ndarray
:returns: RDKit bit vector constructed from the bit string.
:rtype: cDataStructs.ExplicitBitVect
"""
bitstr = "".join(str(int(x)) for x in arr.flatten())
return cDataStructs.CreateFromBitString(bitstr)
[docs]
def fit(
self,
reaction_smiles: str,
symbols: str,
fp_type: str,
abs: bool,
return_array: bool = True,
**kwargs: Any,
) -> Union[np.ndarray, cDataStructs.ExplicitBitVect]:
"""Generate a reaction fingerprint by subtracting reactant from product
fingerprints.
:param reaction_smiles: Reaction SMILES, reactant and product separated by `symbols`.
:type reaction_smiles: str
:param symbols: Delimiter between reactants and products in the SMILES string.
:type symbols: str
:param fp_type: Fingerprint type to use for individual molecules (e.g., 'ecfp4').
:type fp_type: str
:param abs: If True, take absolute value of the difference vector.
:type abs: bool
:param return_array: If True, return a NumPy array; otherwise convert to an RDKit bit vector.
:type return_array: bool
:param kwargs: Additional keyword arguments passed to `SmilesFeaturizer.featurize_smiles`.
:type kwargs: Any
:returns: Reaction fingerprint as a NumPy array or RDKit bit vector.
:rtype: Union[np.ndarray, cDataStructs.ExplicitBitVect]
:raises ValueError: If `reaction_smiles` is not correctly formatted.
"""
if symbols not in reaction_smiles:
raise ValueError(f"Reaction SMILES must contain separator '{symbols}'")
react_part, prod_part = reaction_smiles.split(symbols)
def sum_fps(parts: list[str]) -> np.ndarray:
total = None
for smi in parts:
vec = SmilesFeaturizer.featurize_smiles(smi, fp_type, **kwargs)
if total is None:
total = vec.copy() if isinstance(vec, np.ndarray) else vec
else:
total = total + vec # type: ignore
return total # type: ignore
react_vec = sum_fps(react_part.split("."))
prod_vec = sum_fps(prod_part.split("."))
diff = prod_vec - react_vec # type: ignore
if abs:
diff = np.abs(diff)
if return_array:
return diff # type: ignore
return TransformationFP.convert_arr2vec(diff) # type: ignore
[docs]
def help(self) -> None:
"""Print usage summary for the TransformationFP class.
:returns: None
:rtype: NoneType
"""
print("TransformationFP: compute reaction fingerprints via vector subtraction.")
print(
" fit(reaction_smiles, symbols, fp_type, abs, return_array=True, **kwargs)"
)
print(" reaction_smiles: 'R1.R2>>P1.P2' SMILES string")
print(" symbols: separator between reactants and products (e.g. '>>')")
print(
" fp_type: one of 'maccs', 'avalon', 'ecfp#', 'fcfp#', 'rdk#', 'ap', 'torsion', 'pharm2d'"
)
print(" abs: take absolute difference (True/False)")
print(" return_array: return NumPy array (True) or RDKit bit vector (False)")
print(" convert_arr2vec(arr: np.ndarray) -> ExplicitBitVect")
print("Example:")
print(" tfp = TransformationFP()")
print(" arr = tfp.fit('CCO>>CC=O', '>>', 'ecfp4', abs=True)")
print(
" bv = tfp.fit('CCO>>CC=O', '>>', 'ecfp4', abs=True, return_array=False)"
)
def __str__(self) -> str:
"""Short description of the transformer.
:returns: Class name.
:rtype: str
"""
return "<TransformationFP>"
__repr__ = __str__