Source code for synkit.CRN.Construct.DAG.crn

from __future__ import annotations

import logging
from copy import deepcopy
from typing import Any, Dict, List, Sequence, Union

from synkit.Chem.Reaction.cleaning import Cleaning
from synkit.Chem.utils import (
    count_carbons,
    get_max_fragment,
    process_smiles_list,
)
from synkit.Synthesis.reactor_utils import _remove_reagent
from synkit.Synthesis.Reactor.mod_reactor import MODReactor
from synkit.Synthesis.Reactor.strategy import Strategy

logger = logging.getLogger("CRN")


[docs] class CRN: """Expand an initial pool of molecules through several rounds of rule application using **MODReactor** under the hood. Public attributes ----------------- initial_smiles : List[str] The starting set of molecules. n_repeats : int Number of expansion rounds requested. rounds : List[Tuple[str, List[str]]] `[("Round 1", [rxn₁, …]), …]` — kept for backwards compatibility. final_smiles : List[str] Unique molecule SMILES present after the last round. rule_count : int How many rules were supplied. Public helpers -------------- run() -> CRN Rebuild the network from scratch (chainable). product_sets -> Dict[str, List[str]] Mapping of round‑tag → reaction‑SMILES list. get_reaction_smiles() -> Dict[str, List[str]] Same as `product_sets` (alias). help() Human‑readable summary. """ # ------------------------------------------------------------------ init def __init__( self, rule_list: List[Dict[str, Any]], smiles_list: Union[str, Sequence[str]], *, n_repeats: int = 3, prune: bool = True, strategy: Union[str, Strategy] = Strategy.BACKTRACK, verbosity: int = 0, ) -> None: if not rule_list: raise ValueError("rule_list must contain at least one rule dict") self.rule_list = rule_list self.initial_smiles: List[str] = ( smiles_list.split(".") if isinstance(smiles_list, str) else list(smiles_list) ) self.n_repeats = max(1, n_repeats) self._prune = prune self.strategy = Strategy.from_string(strategy) self.verbosity = verbosity # populated by _build_crn() self.rounds: List[Dict[str, List[str]]] = [] self.final_smiles: List[str] = [] self._build_crn() # auto‑run on construction # ------------------------------------------------------------------ API
[docs] def run(self) -> "CRN": """Re‑run the expansion pipeline and return *self* for chaining.""" self._build_crn() return self
# ---------- properties ------------------------------------------------- @property def rule_count(self) -> int: return len(self.rule_list) @property def product_sets(self) -> Dict[str, List[str]]: """Dict view of the per‑round reaction SMILES. Handles both shapes: * self.rounds == [{"Round 1": [...]}, {"Round 2": [...]}, ...] * self.rounds == [("Round 1", [...]), ("Round 2", [...]), ...] """ if not self.rounds: return {} # rounds as list[dict[str, list[str]]] if isinstance(self.rounds[0], dict): out: Dict[str, List[str]] = {} for d in self.rounds: # type: ignore[arg-type] out.update(d) return out # fallback: list[tuple[str, list[str]]] return {tag: rxns for tag, rxns in self.rounds} # type: ignore[misc] # Alias kept for tests / external callers
[docs] def get_reaction_smiles(self) -> Dict[str, List[str]]: return self.product_sets
# ---------------------------------------------------------------- help
[docs] def help(self) -> None: print("CRN\n---") print(" Initial SMILES :", self.initial_smiles) print(" Rules :", self.rule_count) print(" Rounds :", self.n_repeats) print(" Final molecules:", len(self.final_smiles)) print(" Final SMILES :", self.final_smiles)
# ---------------------------------------------------------------- repr def __repr__(self) -> str: return ( f"<CRN rules={self.rule_count} start={len(self.initial_smiles)} " f"rounds={self.n_repeats} final={len(self.final_smiles)}>" ) __str__ = __repr__ # ============================================================ internals def _expand_once(self, smiles: List[str]) -> List[str]: """Apply every rule once to the molecule pool and return reaction RSMI.""" rxn_results: List[str] = [] smiles_for_mod = process_smiles_list(smiles) for rule in self.rule_list: reactor = MODReactor( smiles_for_mod, rule["gml"], invert=False, strategy=self.strategy, verbosity=self.verbosity, ) reactor.run() rsmi = reactor.get_reaction_smiles() rsmi = Cleaning().clean_smiles(rsmi) rsmi = [_remove_reagent(r) for r in rsmi] rxn_results.extend(rsmi) return rxn_results def _update_smiles_pool( self, current: List[str], reactions: List[str], *, starting: str, target: str, ) -> List[str]: """Merge products from *reactions* into *current* with optional pruning.""" new: List[str] = [] for rsmi in reactions: products = rsmi.split(">>")[1].split(".") if self._prune: products = get_max_fragment(products) if count_carbons(products) <= count_carbons(target) and count_carbons( products ) >= count_carbons(starting): new.append(products) else: new.extend(products) return list(set(current).union(new)) def _build_crn(self) -> None: """Populate `rounds` and `final_smiles`.""" self.rounds.clear() smiles_pool = deepcopy(self.initial_smiles) starting = min(smiles_pool, key=count_carbons) target = max(smiles_pool, key=count_carbons) last_rxns: List[str] = [] for idx in range(1, self.n_repeats + 1): if idx > 1: smiles_pool = self._update_smiles_pool( smiles_pool, last_rxns, starting=starting, target=target, ) last_rxns = self._expand_once(smiles_pool) self.rounds.append({f"Round {idx}": last_rxns}) # Final molecules self.final_smiles = self._update_smiles_pool( smiles_pool, last_rxns, starting=starting, target=target )