Source code for synkit.Chem.Fingerprint.fp_calculator

from __future__ import annotations
from typing import Any, Dict, List
from joblib import Parallel, delayed

from synkit.IO.debug import configure_warnings_and_logs
from synkit.Chem.Fingerprint.transformation_fp import TransformationFP

configure_warnings_and_logs(True, True)


[docs] class FPCalculator: """Calculate fingerprint vectors for chemical reactions represented by SMILES strings. :cvar fps: Shared fingerprint engine instance. :vartype fps: TransformationFP :cvar VALID_FP_TYPES: Supported fingerprint type identifiers. :vartype VALID_FP_TYPES: List[str] :param n_jobs: Number of parallel jobs to use for batch processing. :type n_jobs: int :param verbose: Verbosity level for parallel execution. :type verbose: int """ fps: TransformationFP = TransformationFP() VALID_FP_TYPES: List[str] = [ "drfp", "avalon", "maccs", "torsion", "pharm2D", "ecfp2", "ecfp4", "ecfp6", "fcfp2", "fcfp4", "fcfp6", "rdk5", "rdk6", "rdk7", "ap", ] def __init__(self, n_jobs: int = 1, verbose: int = 0) -> None: """Initialize the FPCalculator. :param n_jobs: Number of parallel jobs to use for fingerprint computation. :type n_jobs: int :param verbose: Verbosity level for the parallel processing. :type verbose: int """ self.n_jobs = n_jobs self.verbose = verbose def _validate_fp_type(self, fp_type: str) -> None: """Ensure the requested fingerprint type is supported. :param fp_type: Fingerprint type identifier to validate. :type fp_type: str :raises ValueError: If `fp_type` is not in VALID_FP_TYPES. """ if fp_type not in self.VALID_FP_TYPES: valid = ", ".join(self.VALID_FP_TYPES) raise ValueError( f"Unsupported fingerprint type '{fp_type}'. Supported types: {valid}." )
[docs] @staticmethod def dict_process( data_dict: Dict[str, Any], rsmi_key: str, symbol: str = ">>", fp_type: str = "ecfp4", absolute: bool = True, ) -> Dict[str, Any]: """Compute a fingerprint for a single reaction SMILES entry and add it to the dict. :param data_dict: Dictionary containing reaction data. :type data_dict: dict :param rsmi_key: Key in `data_dict` for the reaction SMILES string. :type rsmi_key: str :param symbol: Delimiter between reactant and product in the SMILES. :type symbol: str :param fp_type: Fingerprint type to compute. :type fp_type: str :param absolute: Whether to take absolute values of the fingerprint difference. :type absolute: bool :returns: The input dictionary with a new key `fp_{fp_type}` holding the fingerprint vector. :rtype: dict :raises ValueError: If `rsmi_key` is missing in `data_dict`. """ if rsmi_key not in data_dict: raise ValueError(f"Key '{rsmi_key}' not found in data dictionary.") # compute and insert fingerprint vec = FPCalculator.fps.fit( data_dict[rsmi_key], symbols=symbol, fp_type=fp_type, abs=absolute ) data_dict[f"{fp_type}"] = vec return data_dict
[docs] def parallel_process( self, data_dicts: List[Dict[str, Any]], rsmi_key: str, symbol: str = ">>", fp_type: str = "ecfp4", absolute: bool = True, ) -> List[Dict[str, Any]]: """Compute fingerprints for a batch of reaction dictionaries in parallel. :param data_dicts: List of dictionaries, each containing a reaction SMILES. :type data_dicts: list of dict :param rsmi_key: Key in each dict for the reaction SMILES string. :type rsmi_key: str :param symbol: Delimiter between reactant and product in the SMILES. :type symbol: str :param fp_type: Fingerprint type to compute. :type fp_type: str :param absolute: Whether to take absolute values of the fingerprint difference. :type absolute: bool :returns: A list of dictionaries augmented with `fp_{fp_type}` entries. :rtype: list of dict :raises ValueError: If `fp_type` is unsupported or any dict is missing `rsmi_key`. """ # Validate fingerprint type once self._validate_fp_type(fp_type) # Process in parallel results = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(self.dict_process)(dd, rsmi_key, symbol, fp_type, absolute) for dd in data_dicts ) return results
def __str__(self) -> str: """Short string summarizing the calculator configuration. :returns: A summary of n_jobs and verbosity. :rtype: str """ return f"<FPCalculator n_jobs={self.n_jobs} verbose={self.verbose}>"
[docs] def help(self) -> None: """Print details about supported fingerprint types and usage. :returns: None :rtype: NoneType """ print("FPCalculator supports the following fingerprint types:") for t in self.VALID_FP_TYPES: print(" -", t) print(f"Configured for {self.n_jobs} parallel jobs, verbose={self.verbose}")