Source code for synkit.CRN.Pathway.reachability

from __future__ import annotations

from dataclasses import dataclass
from typing import Dict, Iterable, List, Mapping, Optional, Set, Tuple

import json

from ._adapter import tokenize_syncrn_incidence

Multiset = Dict[str, int]



[docs]
@dataclass
class ReachabilityConfig:
    """
    Configuration for forward reachability traversal.

    This configuration controls how many propagation layers are explored and
    whether the traversal should terminate as soon as no newly reachable
    species are discovered.

    :param max_layers:
        Maximum number of forward layers to compute.
    :type max_layers: int
    :param stop_when_no_new_species:
        If ``True``, stop once a layer produces no newly reachable species,
        even if reactions are still enabled.
    :type stop_when_no_new_species: bool

    Example
    -------
    .. code-block:: python

        cfg = ReachabilityConfig(
            max_layers=100,
            stop_when_no_new_species=True,
        )
    """

    max_layers: int = 10_000
    stop_when_no_new_species: bool = True




[docs]
@dataclass
class ReachabilityLayer:
    """
    One forward reachability layer.

    A layer summarizes what became newly active at a given traversal depth.
    In set semantics, a reaction is considered enabled when all of its
    reactant species are already reachable. In multiset semantics, a reaction
    is enabled when the current marking contains sufficient multiplicity for
    each reactant.

    :param depth:
        Layer index, starting at ``1`` for the first propagation step.
    :type depth: int
    :param newly_enabled_reactions:
        Reaction identifiers that became enabled at this layer.
    :type newly_enabled_reactions: List[str]
    :param newly_reachable_species:
        Species that became reachable for the first time at this layer.
    :type newly_reachable_species: List[str]
    :param all_reachable_species:
        Complete set of reachable species after this layer is applied.
    :type all_reachable_species: List[str]

    Example
    -------
    .. code-block:: python

        layer = ReachabilityLayer(
            depth=2,
            newly_enabled_reactions=["r3", "r4"],
            newly_reachable_species=["E", "F"],
            all_reachable_species=["A", "B", "C", "D", "E", "F"],
        )
    """

    depth: int
    newly_enabled_reactions: List[str]
    newly_reachable_species: List[str]
    all_reachable_species: List[str]




[docs]
@dataclass
class ReachabilityResult:
    """
    Container for forward reachability results.

    This object stores the initial support, the layered traversal trace, and
    the first depth at which each species or reaction became reachable or
    enabled.

    :param initial_species:
        Initial reachable species support used to seed the traversal.
    :type initial_species: List[str]
    :param layers:
        Ordered list of reachability layers.
    :type layers: List[ReachabilityLayer]
    :param species_first_depth:
        Mapping from species identifier to the first layer depth at which that
        species became reachable. Initial species are assigned depth ``0``.
    :type species_first_depth: Dict[str, int]
    :param reaction_first_depth:
        Mapping from reaction identifier to the first layer depth at which that
        reaction became enabled.
    :type reaction_first_depth: Dict[str, int]

    Example
    -------
    .. code-block:: python

        result = ReachabilityResult(
            initial_species=["A", "B"],
            layers=[],
            species_first_depth={"A": 0, "B": 0},
            reaction_first_depth={},
        )
    """

    initial_species: List[str]
    layers: List[ReachabilityLayer]
    species_first_depth: Dict[str, int]
    reaction_first_depth: Dict[str, int]




[docs]
class PathwayReachability:
    """
    Forward reachability utilities for SynCRN pathway analysis.

    This class provides layered forward propagation over a reaction hypergraph.
    It supports two related semantics:

    - **Set semantics**:
      a reaction is enabled once all reactant species are present in the
      reachable set; stoichiometric multiplicities are ignored for enabling.
    - **Multiset semantics**:
      a reaction is enabled only when the current marking contains enough
      copies of each reactant species to satisfy stoichiometric coefficients.

    The class can be loaded either from tokenized ``vertices`` / ``edges``
    data or directly from a SynCRN-like object via :meth:`load_syncrn`.

    Internally, reactions are stored as a mapping

    ``reaction_id -> (tail_multiset, head_multiset)``

    where the tail is the reactant multiset and the head is the product
    multiset.

    :param config:
        Optional reachability configuration. If omitted, a default
        :class:`ReachabilityConfig` is used.
    :type config: Optional[ReachabilityConfig]

    Example
    -------
    .. code-block:: python

        rr = PathwayReachability()
        rr.load_hypergraph(
            vertices=["A", "B", "C", "D"],
            edges={
                "r1": ({"A": 1, "B": 1}, {"C": 1}),
                "r2": ({"C": 1}, {"D": 1}),
            },
        )

        result = rr.compute_layers_set(initial_species=["A", "B"])
        print(result.species_first_depth)
        # {'A': 0, 'B': 0, 'C': 1, 'D': 2}
    """

    def __init__(self, config: Optional[ReachabilityConfig] = None) -> None:
        """
        Initialize an empty reachability engine.

        :param config:
            Optional reachability configuration.
        :type config: Optional[ReachabilityConfig]
        """
        self.vertices: Set[str] = set()
        self.edges: Dict[str, Tuple[Multiset, Multiset]] = {}
        self.species_token_to_id: Dict[str, str] = {}
        self.reaction_token_to_id: Dict[str, str] = {}
        self.species_id_to_token: Dict[str, str] = {}
        self.reaction_id_to_token: Dict[str, str] = {}
        self._config = config or ReachabilityConfig()

    # ------------------------------------------------------------------
    # data loading
    # ------------------------------------------------------------------


[docs]
    def load_hypergraph(
        self,
        vertices: Iterable[str],
        edges: Mapping[str, Tuple[Mapping[str, int], Mapping[str, int]]],
    ) -> "PathwayReachability":
        """
        Load a tokenized reaction hypergraph directly.

        Each edge must map a reaction identifier to a pair
        ``(tail_multiset, head_multiset)``, where both multisets map species
        identifiers to strictly positive stoichiometric coefficients.

        Non-positive coefficients are discarded during normalization.

        :param vertices:
            Iterable of species identifiers.
        :type vertices: Iterable[str]
        :param edges:
            Mapping from reaction identifier to
            ``(reactant_multiset, product_multiset)``.
        :type edges: Mapping[str, Tuple[Mapping[str, int], Mapping[str, int]]]
        :returns:
            The current instance, to allow fluent chaining.
        :rtype: PathwayReachability

        Example
        -------
        .. code-block:: python

            rr = PathwayReachability().load_hypergraph(
                vertices=["A", "B", "C"],
                edges={
                    "r1": ({"A": 1}, {"B": 1}),
                    "r2": ({"B": 1}, {"C": 1}),
                },
            )
        """
        self.vertices = set(str(v) for v in vertices)
        self.edges = {
            str(eid): (
                {str(s): int(v) for s, v in tail.items() if int(v) > 0},
                {str(s): int(v) for s, v in head.items() if int(v) > 0},
            )
            for eid, (tail, head) in edges.items()
        }
        self.species_token_to_id = {v: v for v in self.vertices}
        self.reaction_token_to_id = {eid: eid for eid in self.edges}
        self.species_id_to_token = {v: v for v in self.vertices}
        self.reaction_id_to_token = {eid: eid for eid in self.edges}
        return self



[docs]
    def load_syncrn(
        self,
        crn: object,
        *,
        species: str = "label",
        reaction: str = "id",
    ) -> "PathwayReachability":
        """
        Load reachability data from a SynCRN-like object.

        This method delegates tokenization to
        :func:`tokenize_syncrn_incidence`, then stores both the tokenized
        hypergraph and the forward/backward token-id lookup tables.

        :param crn:
            SynCRN-like object to tokenize.
        :type crn: object
        :param species:
            Species attribute used during tokenization, such as ``"label"`` or
            another species node attribute supported by the adapter.
        :type species: str
        :param reaction:
            Reaction attribute used during tokenization, such as ``"id"``.
        :type reaction: str
        :returns:
            The current instance, to allow fluent chaining.
        :rtype: PathwayReachability

        Example
        -------
        .. code-block:: python

            rr = PathwayReachability().load_syncrn(
                syncrn_object,
                species="label",
                reaction="id",
            )
        """
        vertices, edges, species_token_to_id, reaction_token_to_id, _ = (
            tokenize_syncrn_incidence(
                crn,
                species=species,
                reaction=reaction,
            )
        )
        self.load_hypergraph(vertices, edges)
        self.species_token_to_id = species_token_to_id
        self.reaction_token_to_id = reaction_token_to_id
        self.species_id_to_token = {
            sid: tok for tok, sid in species_token_to_id.items()
        }
        self.reaction_id_to_token = {
            rid: tok for tok, rid in reaction_token_to_id.items()
        }
        return self


    # ------------------------------------------------------------------
    # small helpers
    # ------------------------------------------------------------------

    @staticmethod
    def _normalize_marking(marking: Mapping[str, int]) -> Dict[str, int]:
        """
        Normalize a marking into a clean positive multiset.

        Species with non-positive counts are removed.

        :param marking:
            Input species-count mapping.
        :type marking: Mapping[str, int]
        :returns:
            Normalized marking containing only strictly positive counts.
        :rtype: Dict[str, int]
        """
        return {str(s): int(v) for s, v in marking.items() if int(v) > 0}

    @staticmethod
    def _support(marking: Mapping[str, int]) -> Set[str]:
        """
        Return the support of a marking.

        The support is the set of species whose count is strictly positive.

        :param marking:
            Input species-count mapping.
        :type marking: Mapping[str, int]
        :returns:
            Set of species present with positive multiplicity.
        :rtype: Set[str]
        """
        return {s for s, v in marking.items() if int(v) > 0}

    def _enabled_reactions_for_set(self, reachable: Set[str]) -> List[str]:
        """
        Return reactions enabled under set semantics.

        A reaction is enabled if all reactant species are already members of
        the reachable set. Stoichiometric coefficients are ignored for the
        enabling condition.

        :param reachable:
            Current reachable species set.
        :type reachable: Set[str]
        :returns:
            List of enabled reaction identifiers.
        :rtype: List[str]
        """
        enabled: List[str] = []
        for eid, (tail, _) in self.edges.items():
            if set(tail.keys()).issubset(reachable):
                enabled.append(eid)
        return enabled

    def _enabled_reactions_for_marking(self, marking: Mapping[str, int]) -> List[str]:
        """
        Return reactions enabled under multiset semantics.

        A reaction is enabled if the marking contains at least the required
        stoichiometric coefficient for every reactant species.

        :param marking:
            Current species marking.
        :type marking: Mapping[str, int]
        :returns:
            List of enabled reaction identifiers.
        :rtype: List[str]
        """
        enabled: List[str] = []
        for eid, (tail, _) in self.edges.items():
            if all(int(marking.get(s, 0)) >= int(coeff) for s, coeff in tail.items()):
                enabled.append(eid)
        return enabled

    def _fire_reactions_once_in_batch(
        self,
        marking: Mapping[str, int],
        reaction_ids: Iterable[str],
    ) -> Dict[str, int]:
        """
        Fire a batch of reactions once each from the given marking.

        Reactants are consumed and products are produced exactly once for each
        reaction in ``reaction_ids``. The resulting marking is normalized so
        that non-positive counts are removed.

        :param marking:
            Input marking before firing.
        :type marking: Mapping[str, int]
        :param reaction_ids:
            Iterable of reaction identifiers to fire once.
        :type reaction_ids: Iterable[str]
        :returns:
            Updated normalized marking after the batch firing.
        :rtype: Dict[str, int]
        """
        nxt = dict(marking)
        for eid in reaction_ids:
            tail, head = self.edges[eid]
            for s, coeff in tail.items():
                nxt[s] = nxt.get(s, 0) - int(coeff)
            for s, coeff in head.items():
                nxt[s] = nxt.get(s, 0) + int(coeff)
        return self._normalize_marking(nxt)

    @staticmethod
    def _update_first_depth_maps(
        *,
        depth: int,
        enabled_now: Iterable[str],
        new_species: Set[str],
        seen_reactions: Set[str],
        reaction_first_depth: Dict[str, int],
        species_first_depth: Dict[str, int],
    ) -> None:
        """
        Update first-seen depth maps for reactions and species.

        Reactions are assigned a first depth only once, when first observed as
        enabled. Species are assigned a first depth when they first appear in
        the newly reached set.

        :param depth:
            Current traversal depth.
        :type depth: int
        :param enabled_now:
            Reactions enabled at the current depth.
        :type enabled_now: Iterable[str]
        :param new_species:
            Species that became newly reachable at the current depth.
        :type new_species: Set[str]
        :param seen_reactions:
            Mutable set of reactions already assigned a first depth.
        :type seen_reactions: Set[str]
        :param reaction_first_depth:
            Output mapping from reaction identifier to first enabling depth.
        :type reaction_first_depth: Dict[str, int]
        :param species_first_depth:
            Output mapping from species identifier to first reachability depth.
        :type species_first_depth: Dict[str, int]
        """
        for eid in enabled_now:
            if eid not in seen_reactions:
                reaction_first_depth[eid] = depth
                seen_reactions.add(eid)
        for s in sorted(new_species):
            species_first_depth[s] = depth

    # ------------------------------------------------------------------
    # main computations
    # ------------------------------------------------------------------


[docs]
    def compute_layers_set(
        self,
        initial_species: Iterable[str],
        max_layers: Optional[int] = None,
    ) -> ReachabilityResult:
        """
        Compute layered forward reachability in set semantics.

        In this mode, only species presence matters. Stoichiometric
        multiplicities are ignored when deciding whether a reaction is enabled.

        The algorithm proceeds layer by layer:
        1. find all reactions whose reactants are already reachable,
        2. keep only reactions not previously seen as enabled,
        3. collect all products they produce,
        4. add any newly produced species to the reachable set.

        :param initial_species:
            Initial set of reachable species.
        :type initial_species: Iterable[str]
        :param max_layers:
            Optional layer cap overriding the instance configuration.
        :type max_layers: Optional[int]
        :returns:
            Reachability result containing the full layered traversal trace.
        :rtype: ReachabilityResult

        Example
        -------
        .. code-block:: python

            rr = PathwayReachability().load_hypergraph(
                vertices=["A", "B", "C", "D"],
                edges={
                    "r1": ({"A": 1, "B": 1}, {"C": 1}),
                    "r2": ({"C": 1}, {"D": 1}),
                },
            )

            result = rr.compute_layers_set(initial_species=["A", "B"])

            for layer in result.layers:
                print(layer.depth, layer.newly_reachable_species)

            # 1 ['C']
            # 2 ['D']
        """
        max_layers = max_layers if max_layers is not None else self._config.max_layers
        reachable: Set[str] = {str(x) for x in initial_species}
        seen_reactions: Set[str] = set()

        species_first_depth: Dict[str, int] = {s: 0 for s in reachable}
        reaction_first_depth: Dict[str, int] = {}
        layers: List[ReachabilityLayer] = []

        for depth in range(1, max_layers + 1):
            newly_enabled = [
                eid
                for eid in self._enabled_reactions_for_set(reachable)
                if eid not in seen_reactions
            ]
            produced_this_round: Set[str] = set()
            for eid in newly_enabled:
                _, head = self.edges[eid]
                produced_this_round.update(head.keys())
            new_species = produced_this_round - reachable

            if not newly_enabled and not new_species:
                break

            self._update_first_depth_maps(
                depth=depth,
                enabled_now=newly_enabled,
                new_species=new_species,
                seen_reactions=seen_reactions,
                reaction_first_depth=reaction_first_depth,
                species_first_depth=species_first_depth,
            )
            reachable.update(new_species)

            layers.append(
                ReachabilityLayer(
                    depth=depth,
                    newly_enabled_reactions=sorted(newly_enabled),
                    newly_reachable_species=sorted(new_species),
                    all_reachable_species=sorted(reachable),
                )
            )

            if self._config.stop_when_no_new_species and not new_species:
                break

        return ReachabilityResult(
            initial_species=sorted(set(str(x) for x in initial_species)),
            layers=layers,
            species_first_depth=species_first_depth,
            reaction_first_depth=reaction_first_depth,
        )



[docs]
    def compute_layers_multiset(
        self,
        initial_marking: Mapping[str, int],
        max_layers: Optional[int] = None,
    ) -> ReachabilityResult:
        """
        Compute layered forward reachability in multiset semantics.

        In this mode, species counts matter. A reaction is enabled only if the
        current marking contains enough multiplicity for every reactant. At
        each layer, all currently enabled reactions are fired once in batch.

        This procedure is useful when approximate token-flow behavior is
        desired, but it should not be confused with exhaustive Petri-net
        reachability analysis over all possible firing sequences.

        :param initial_marking:
            Initial species marking.
        :type initial_marking: Mapping[str, int]
        :param max_layers:
            Optional layer cap overriding the instance configuration.
        :type max_layers: Optional[int]
        :returns:
            Reachability result containing the full layered traversal trace.
        :rtype: ReachabilityResult

        Example
        -------
        .. code-block:: python

            rr = PathwayReachability().load_hypergraph(
                vertices=["A", "B", "C"],
                edges={
                    "r1": ({"A": 2}, {"B": 1}),
                    "r2": ({"B": 1}, {"C": 1}),
                },
            )

            result = rr.compute_layers_multiset(initial_marking={"A": 2})

            print(result.species_first_depth)
            # {'A': 0, 'B': 1, 'C': 2}
        """
        max_layers = max_layers if max_layers is not None else self._config.max_layers
        marking = self._normalize_marking(initial_marking)
        support = self._support(marking)

        species_first_depth: Dict[str, int] = {s: 0 for s in support}
        reaction_first_depth: Dict[str, int] = {}
        layers: List[ReachabilityLayer] = []
        seen_reactions: Set[str] = set()

        for depth in range(1, max_layers + 1):
            enabled_now = self._enabled_reactions_for_marking(marking)
            if not enabled_now:
                break

            next_marking = self._fire_reactions_once_in_batch(marking, enabled_now)
            next_support = self._support(next_marking)
            new_species = next_support - support

            self._update_first_depth_maps(
                depth=depth,
                enabled_now=enabled_now,
                new_species=new_species,
                seen_reactions=seen_reactions,
                reaction_first_depth=reaction_first_depth,
                species_first_depth=species_first_depth,
            )

            layers.append(
                ReachabilityLayer(
                    depth=depth,
                    newly_enabled_reactions=sorted(enabled_now),
                    newly_reachable_species=sorted(new_species),
                    all_reachable_species=sorted(next_support),
                )
            )

            marking = next_marking
            support = next_support

            if self._config.stop_when_no_new_species and not new_species:
                break

        return ReachabilityResult(
            initial_species=sorted(
                self._support(self._normalize_marking(initial_marking))
            ),
            layers=layers,
            species_first_depth=species_first_depth,
            reaction_first_depth=reaction_first_depth,
        )


    # ------------------------------------------------------------------
    # export helpers
    # ------------------------------------------------------------------


[docs]
    def export_layers_json(
        self, result: ReachabilityResult, fn: str
    ) -> "PathwayReachability":
        """
        Export a reachability result to a JSON file.

        The output contains the initial species set, first-depth maps, and the
        full list of layered traversal records.

        :param result:
            Reachability result to serialize.
        :type result: ReachabilityResult
        :param fn:
            Output JSON filename.
        :type fn: str
        :returns:
            The current instance, to allow fluent chaining.
        :rtype: PathwayReachability
        :raises OSError:
            Raised if the target file cannot be written.

        Example
        -------
        .. code-block:: python

            rr.export_layers_json(result, "reachability_layers.json")
        """
        data = {
            "initial_species": result.initial_species,
            "species_first_depth": result.species_first_depth,
            "reaction_first_depth": result.reaction_first_depth,
            "layers": [
                {
                    "depth": layer.depth,
                    "newly_enabled_reactions": layer.newly_enabled_reactions,
                    "newly_reachable_species": layer.newly_reachable_species,
                    "all_reachable_species": layer.all_reachable_species,
                }
                for layer in result.layers
            ],
        }
        with open(fn, "w") as fh:
            json.dump(data, fh, indent=2)
        return self


    def __repr__(self) -> str:  # pragma: no cover - simple repr
        """
        Return a compact developer-friendly representation.

        :returns:
            String representation including the number of vertices and edges.
        :rtype: str
        """
        return f"<PathwayReachability vertices={len(self.vertices)} edges={len(self.edges)}>"




[docs]
def syncrn_to_reachability_inputs(
    crn: object,
    *,
    species: str = "label",
    reaction: str = "id",
) -> Tuple[List[str], Dict[str, Tuple[Dict[str, int], Dict[str, int]]]]:
    """
    Convert a SynCRN-like object into tokenized reachability inputs.

    This is a lightweight adapter returning only the tokenized species list and
    reaction hyperedges needed by :class:`PathwayReachability`.

    :param crn:
        SynCRN-like object to tokenize.
    :type crn: object
    :param species:
        Species attribute used during tokenization.
    :type species: str
    :param reaction:
        Reaction attribute used during tokenization.
    :type reaction: str
    :returns:
        Pair ``(vertices, edges)`` where ``vertices`` is the species token list
        and ``edges`` maps reaction token to ``(tail_multiset, head_multiset)``.
    :rtype: Tuple[List[str], Dict[str, Tuple[Dict[str, int], Dict[str, int]]]]

    Example
    -------
    .. code-block:: python

        vertices, edges = syncrn_to_reachability_inputs(
            syncrn_object,
            species="label",
            reaction="id",
        )
    """
    vertices, edges, _, _, _ = tokenize_syncrn_incidence(
        crn, species=species, reaction=reaction
    )
    return vertices, edges




[docs]
def run_reachability_from_syncrn(
    crn: object,
    initial_species: Iterable[str],
    *,
    species: str = "label",
    reaction: str = "id",
    verbose: bool = True,
) -> Tuple[PathwayReachability, ReachabilityResult]:
    """
    Run layered qualitative reachability directly from a SynCRN-like object.

    This convenience function constructs a :class:`PathwayReachability`
    instance, loads the tokenized SynCRN representation, computes set-based
    layered reachability, and optionally prints a human-readable traversal
    summary.

    :param crn:
        SynCRN-like object to analyze.
    :type crn: object
    :param initial_species:
        Initial reachable species set.
    :type initial_species: Iterable[str]
    :param species:
        Species attribute used during tokenization.
    :type species: str
    :param reaction:
        Reaction attribute used during tokenization.
    :type reaction: str
    :param verbose:
        If ``True``, print the tokenized edges and each traversal layer.
    :type verbose: bool
    :returns:
        Pair ``(reachability_engine, result)``.
    :rtype: Tuple[PathwayReachability, ReachabilityResult]

    Example
    -------
    .. code-block:: python

        rr, result = run_reachability_from_syncrn(
            syncrn_object,
            initial_species=["A", "B"],
            species="label",
            reaction="id",
            verbose=True,
        )
    """
    rr = PathwayReachability().load_syncrn(crn, species=species, reaction=reaction)
    result = rr.compute_layers_set(initial_species=initial_species)

    if verbose:
        print("Edges:")
        for eid, (t, h) in rr.edges.items():
            print(f"  {eid}: {t} >> {h}")
        print("Initial species:", sorted(set(str(x) for x in initial_species)))
        for layer in result.layers:
            print(f"Layer {layer.depth}:")
            print("  Newly enabled reactions:", layer.newly_enabled_reactions)
            print("  Newly reachable species:", layer.newly_reachable_species)
            print("  All reachable species:", layer.all_reachable_species)

    return rr, result