Source code for synkit.Synthesis.Reactor.mod_reactor

from __future__ import annotations

"""modreactor.py
=========================
A **hardened** and **typed** re-write of the original ``MODReactor`` wrapper
around the MØD toolkit. The public API remains 100 % compatible but the
internals are now:

* **Safer**  – avoids mutating inputs, validates arguments, logs diagnostics.
* **Faster** – lazy-builds the derivation graph and reaction SMILES only when first accessed.
* **Cleaner** – exhaustive doc-strings, typing everywhere, and single-purpose
  helpers. All heavy lifting lives in private methods prefixed `_`.

External behavior is unchanged:
```python
r = MODReactor("CC.O", "rule.gml", strategy="bt").run()
smiles = r.get_reaction_smiles()
"""
import importlib.util
from pathlib import Path
from collections import Counter
from typing import Any, List, Optional, Union


from synkit.IO.debug import setup_logging
from synkit.IO.data_io import load_gml_as_text
from synkit.IO.chem_converter import smart_to_gml
from synkit.Chem.Molecule.standardize import sanitize_and_canonicalize_smiles

from synkit.Synthesis.Reactor.strategy import Strategy
from synkit.Synthesis.reactor_utils import _deduplicateGraphs

# ──────────────────────────────────────────────────────────────────────────────
# Logging
# ──────────────────────────────────────────────────────────────────────────────
log = setup_logging(task_type="MODReactor")
if importlib.util.find_spec("mod"):
    from mod import smiles, ruleGMLString, DG, config
else:
    ruleGMLString = None
    smiles = None
    DG = None
    config = None
    log.warning("Optional 'mod' package not found")


# ──────────────────────────────────────────────────────────────────────────────
# MODReactor
# ──────────────────────────────────────────────────────────────────────────────

[docs]
class MODReactor:
    """Lazy, ergonomic wrapper around the MØD toolkit’s derivation pipeline.

    Workflow
    --------
    1. Instantiate: give substrate SMILES and a rule GML (path or string).
    2. Call `.run()` to execute the reaction strategy.
    3. Inspect results via `.get_reaction_smiles()`, `.product_sets`, `.get_dg()`, etc.

    Attributes
    ----------
    initial_smiles : List[str]
        List of SMILES strings for reactants (or products, if inverted).
    rule_file : Path
        Filesystem path or raw GML string or raw smart with AAM for the reaction rule.
    invert : bool
        If True, apply the rule in reverse (products → reactants).
    strategy : Strategy
        One of ALL, COMPONENT, or BACKTRACK.
    verbosity : int
        Verbosity level for the MØD DG.apply() call.
    print_results : bool
        If True, prints the derivation graph to stdout.
    """

    # ------------------------------------------------------------------
    # Construction
    # ------------------------------------------------------------------
    def __init__(
        self,
        substrate: Union[str, List[str]],
        rule_file: Union[str, Path],
        *,
        invert: bool = False,
        strategy: Union[str, Strategy] = Strategy.BACKTRACK,
        verbosity: int = 0,
        print_results: bool = False,
    ) -> None:

        self.initial_smiles: List[str] = (
            substrate if isinstance(substrate, list) else substrate.split(".")
        )
        self.rule_file = rule_file
        self.invert = bool(invert)
        self.strategy = Strategy.from_string(strategy)
        self.verbosity = verbosity
        self.print_results = print_results

        # Prepared artefacts (lazy)
        self._initial_molecules: List[Any] = self._prepare_initial_molecules()
        self._reaction_rule: Any = self._parse_reaction_rule()
        self._dg: Optional[DG] = None
        self._temp_results: Optional[List[List[str]]] = None
        self._reaction_smiles: Optional[List[str]] = None

    # ------------------------------------------------------------------
    # Public high‑level API
    # ------------------------------------------------------------------

[docs]
    def run(self) -> "MODReactor":
        """Execute the chosen strategy **once** and return *self* so you can
        chain:

        ```python
        r = MODReactor(...).run()
        smiles = r.get_reaction_smiles()
        ```
        """
        if self._temp_results is None:
            self._temp_results = self._predict()  # ← may build DG
        return self


    # helpers for outside world ------------------------------------------------

[docs]
    def get_reaction_smiles(self) -> List[str]:
        """Retrieve the reaction SMILES strings (lazy).

        Returns
        -------
        List[str]
            List of reaction SMILES, in “A>>B” format.
        """
        return self.reaction_smiles



[docs]
    def get_dg(self) -> DG:
        """Access the underlying derivation graph.

        Returns
        -------
        DG
            The MØD derivation graph constructed during `.run()`.

        Raises
        ------
        RuntimeError
            If `.run()` has not yet been called.
        """
        if self._dg is None:
            raise RuntimeError("Call `.run()` before accessing the derivation graph.")
        return self._dg


    # ------------------------------------------------------------------
    # Introspection / niceties
    # ------------------------------------------------------------------
    def __str__(self) -> str:
        return (
            f"<MODReactor n_substrate={len(self.initial_smiles)} "
            f"invert={self.invert} strategy={self.strategy.value} "
            f"predictions={self.prediction_count}>"
        )

    __repr__ = __str__


[docs]
    def help(self) -> None:
        """Print a one-page summary of reactor configuration and results."""
        print("MODReactor".ljust(60, "─"))
        print(f"Rule file     : {self.rule_file}")
        print(f"Substrate     : {'.'.join(self.initial_smiles)}")
        print(f"Invert rule   : {self.invert}")
        print(f"Strategy      : {self.strategy.value}")
        print(f"Verbosity     : {self.verbosity}")
        print(f"Predictions   : {self.prediction_count}")
        if self._reaction_smiles:
            print(f"First result  : {self._reaction_smiles[0]}")
        print("─" * 60)


    # ------------------------------------------------------------------
    # Convenience properties
    # ------------------------------------------------------------------
    @property
    def dg(self) -> Optional[DG]:
        """DG or None – cached derivation graph.

        See also
        --------
        get_dg
        """
        return self._dg

    @property
    def product_sets(self) -> List[List[str]]:
        """Raw product sets (lists of SMILES) before joining into full
        reactions."""
        return self.temp_results

    @property
    def product_smiles(self) -> List[str]:
        """Flattened list of all product SMILES (may contain duplicates)."""
        return [s for batch in self.temp_results for s in batch]

    @property
    def prediction_count(self) -> int:
        """Number of distinct prediction batches generated."""
        return len(self._temp_results or [])

    # ------------------------------------------------------------------
    # Internals – lazy properties
    # ------------------------------------------------------------------
    @property
    def temp_results(self) -> List[List[str]]:
        """Lazy-loaded raw product lists.

        Returns
        -------
        List[List[str]]
        """
        if self._temp_results is None:
            self._temp_results = self._predict()
        return self._temp_results

    @property
    def reaction_smiles(self) -> List[str]:
        """Lazy-loaded reaction SMILES strings of form “A>>B”.

        Returns
        -------
        List[str]
        """
        if self._reaction_smiles is None:
            base = ".".join(self.initial_smiles)
            self._reaction_smiles = self.generate_reaction_smiles(
                self.temp_results, base, invert=self.invert
            )
        return self._reaction_smiles

    # ------------------------------------------------------------------
    # Internals – setup
    # ------------------------------------------------------------------
    def _prepare_initial_molecules(self) -> List[Any]:
        """Convert SMILES → MØD molecule objects, dedupe, and sort.

        Returns
        -------
        List[Any]
        """
        mols = [smiles(s, add=False) for s in self.initial_smiles]
        mols = _deduplicateGraphs(mols)
        mols.sort(key=lambda m: getattr(m, "numVertices", 0))
        log.debug("Prepared %d initial molecules", len(mols))
        return mols

    def _parse_reaction_rule(self) -> Any:
        """Load or parse the reaction rule from raw GML or file.

        Returns
        -------
        Any
            Rule object from ruleGMLString().
        """
        # First try raw text parse
        try:
            raw = str(self.rule_file)
            rule = ruleGMLString(raw, invert=self.invert, add=False)
            log.debug("Parsed rule from raw text")
            return rule
        except Exception:
            log.debug("Raw parse failed; trying file load", exc_info=True)
        # Second assume this is smart
        try:
            raw = smart_to_gml(self.rule_file)
            rule = ruleGMLString(raw, invert=self.invert, add=False)
            log.debug("Parsed smart from raw text")
            return rule
        except Exception:
            log.debug("Smart parse failed; trying file load", exc_info=True)
        # Then try file
        try:
            gml = load_gml_as_text(self.rule_file)
            rule = ruleGMLString(gml, invert=self.invert, add=False)
            log.debug("Loaded rule from file %s", self.rule_file)
            return rule
        except Exception:
            log.error(
                "Failed to load rule from text or file %s",
                self.rule_file,
                exc_info=True,
            )
            raise

    # ------------------------------------------------------------------
    # Internals – strategy dispatch
    # ------------------------------------------------------------------
    def _predict(self) -> List[List[str]]:
        """Dispatch to the appropriate application strategy.

        Returns
        -------
        List[List[str]]
            Raw product batches.
        """
        dispatch = {
            Strategy.ALL: self._apply_all,
            Strategy.COMPONENT: self._apply_components,
            Strategy.BACKTRACK: self._apply_backtrack,
        }
        func = dispatch[self.strategy]
        log.info("Running strategy %s", self.strategy.value)
        results = func()
        if not results:
            log.warning("No predictions generated")
        return results

    # ------------------------------------------------------------------
    # Internals – concrete strategy routines
    # ------------------------------------------------------------------
    def _apply_components(self) -> List[List[str]]:
        """
        Component-aware application: no cross-CC backtracking.

        Returns
        -------
        List[List[str]]
            Product batches.
        """
        self._dg = DG(graphDatabase=self._initial_molecules)
        config.dg.doRuleIsomorphismDuringBinding = False
        self._dg.build().apply(
            self._initial_molecules, self._reaction_rule, verbosity=self.verbosity
        )
        if self.print_results:
            self._dg.print()
        products = []
        for e in self._dg.edges:
            productSmiles = [v.graph.smiles for v in e.targets]
            products.append(productSmiles)
        return products

    def _apply_all(self) -> List[List[str]]:
        """Classic “ALL” strategy: VF2 with reagents included.

        Returns
        -------
        List[List[str]]
            Product batches (including unused reagents).
        """
        self._dg = DG(graphDatabase=self._initial_molecules)
        config.dg.doRuleIsomorphismDuringBinding = False
        self._dg.build().apply(
            self._initial_molecules,
            self._reaction_rule,
            verbosity=self.verbosity,
            onlyProper=False,
        )
        if self.print_results:
            self._dg.print()

        products, educts = [], []
        for e in self._dg.edges:
            products.append([v.graph.smiles for v in e.targets])
            educts.append([v.graph.smiles for v in e.sources])

        # re‑attach unused reagents
        base = Counter(sanitize_and_canonicalize_smiles(s) for s in self.initial_smiles)
        for batch, used in zip(products, educts):
            missing = list(
                (
                    base - Counter(sanitize_and_canonicalize_smiles(s) for s in used)
                ).elements()
            )
            batch.extend(missing)
        return products

    def _apply_backtrack(self) -> List[List[str]]:
        """
        BACKTRACK strategy: try COMPONENT, sanitize, else fall back to ALL.

        Returns
        -------
        List[List[str]]
            Sanitized product batches.
        """
        prod = self._apply_components()
        prod = [[sanitize_and_canonicalize_smiles(s) for s in batch] for batch in prod]
        if prod:
            return prod
        log.info("Component strategy returned 0 → falling back to ALL")
        return self._apply_all()

    # ------------------------------------------------------------------
    # Static helpers
    # ------------------------------------------------------------------

[docs]
    @staticmethod
    def generate_reaction_smiles(
        temp_results: List[List[str]],
        base_smiles: str,
        *,
        invert: bool = False,
        arrow: str = ">>",
        separator: str = ".",
    ) -> List[str]:
        """Build reaction SMILES of the form “A>>B”, where A and B swap roles
        if invert=True.

        Parameters
        ----------
        temp_results : List[List[str]]
            Batches of product (or reactant) SMILES.
        base_smiles : str
            The “other side” of the reaction: the reactant side when
            invert=False, or the product side when invert=True.
        invert : bool
            If False, generates “base_smiles>>joined_batch”;
            if True, generates “joined_batch>>base_smiles”.
        arrow : str
            The reaction arrow to use (default ">>").
        separator : str
            How to join multiple SMILES in a batch (default ".").

        Returns
        -------
        List[str]
            Reaction SMILES strings, one per batch.
        """
        reactions: List[str] = []
        for batch in temp_results:
            if all(x is not None for x in batch):
                joined = separator.join(batch) if batch else ""
                left, right = (joined, base_smiles) if invert else (base_smiles, joined)
                reactions.append(f"{left}{arrow}{right}")
        return reactions