Source code for synkit.Graph.Matcher.orbit
from __future__ import annotations
from typing import Dict, FrozenSet, Hashable, Iterable, List, Set
[docs]
class OrbitAccuracy:
"""
Compare two orbit partitions (approximate vs exact) and compute accuracy metrics.
The class is intentionally small and OOP-styled: most methods are chainable
(return ``self``) and computed results are exposed via properties.
Parameters
----------
approx_orbits :
Iterable of frozenset-like objects (each containing node identifiers).
Each element represents one orbit (set of node ids) from the *approximate*
partition. The iterable is consumed and a *copy* (list of frozensets) is
stored internally.
exact_orbits :
Iterable of frozenset-like objects (each containing node identifiers).
Each element represents one orbit from the *exact* partition.
Raises
------
ValueError
If the union of nodes covered by the two partitions differs (i.e. they do
not refer to the same node set), a :class:`ValueError` is raised with a
short diagnostic listing nodes missing in either partition.
Attributes
----------
nodes : set
The set of all node identifiers (union of both partitions). Available
after initialization.
metrics : dict
Computed metrics (see :meth:`compute`) exposed as a dictionary via the
:pyattr:`metrics` property.
confusion_map : dict
A mapping ``approx_orbit_index -> { exact_orbit_index: overlap_count }``
exposed via the :pyattr:`confusion_map` property.
Notes
-----
- Node identifiers must be hashable (ints, str, ...).
- The input iterables are not modified; the class stores its own frozenset
copies.
- :meth:`compute` is chainable and returns ``self``; read metrics via the
:pyattr:`metrics` property.
Examples
--------
.. code-block:: python
approx = [frozenset({1}), frozenset({2, 3})]
exact = [frozenset({1}), frozenset({2, 3})]
oa = OrbitAccuracy(approx, exact).compute()
print(oa) # -> <OrbitAccuracy nodes=3 approx_orbits=2 exact_orbits=2>
print(oa.metrics) # -> {'node_exact_match_fraction': 1.0, ...}
print(oa.confusion_map) # -> {0: {0: 1}, 1: {1: 2}}
"""
def __init__(
self,
approx_orbits: Iterable[FrozenSet[Hashable]],
exact_orbits: Iterable[FrozenSet[Hashable]],
) -> None:
# store copies as frozensets to guarantee immutability semantics
self._approx_raw: List[FrozenSet[Hashable]] = [
frozenset(s) for s in approx_orbits
]
self._exact_raw: List[FrozenSet[Hashable]] = [
frozenset(s) for s in exact_orbits
]
self._validate()
self._build_mappings()
self._metrics: Dict[str, float] = {}
self._confusion: Dict[int, Dict[int, int]] = {}
def __repr__(self) -> str:
return (
f"<OrbitAccuracy nodes={len(self.nodes)} "
f"approx_orbits={len(self._approx_raw)} exact_orbits={len(self._exact_raw)}>"
)
# ---- Convenience / help ----
[docs]
def help(self) -> str:
"""
Return a short usage/help string.
Returns
-------
str
Brief one-line instructions on how to use the class.
"""
return (
"Instantiate with approx and exact orbit iterables of frozensets. "
"Call .compute() then read .metrics or .confusion_map."
)
# ---- Internal validation / mapping builders ----
def _validate(self) -> None:
"""
Ensure both partitions cover the same node set.
Raises
------
ValueError
If the union of nodes in ``approx_orbits`` differs from that in
``exact_orbits``. The raised message includes nodes missing in
either partition for quick diagnostics.
"""
approx_nodes: Set[Hashable] = (
set().union(*self._approx_raw) if self._approx_raw else set()
)
exact_nodes: Set[Hashable] = (
set().union(*self._exact_raw) if self._exact_raw else set()
)
if approx_nodes != exact_nodes:
missing_in_approx = exact_nodes - approx_nodes
missing_in_exact = approx_nodes - exact_nodes
msg_parts = []
if missing_in_approx:
msg_parts.append(
f"nodes missing in approx: {sorted(missing_in_approx)}"
)
if missing_in_exact:
msg_parts.append(f"nodes missing in exact: {sorted(missing_in_exact)}")
raise ValueError(
"Orbit partitions do not cover the same node set. "
+ " ".join(msg_parts)
)
self.nodes: Set[Hashable] = approx_nodes
def _build_mappings(self) -> None:
"""Build node -> orbit index mappings for both partitions (internal use)."""
self._node_to_approx: Dict[Hashable, int] = {}
self._node_to_exact: Dict[Hashable, int] = {}
for i, orb in enumerate(self._approx_raw):
for n in orb:
self._node_to_approx[n] = i
for j, orb in enumerate(self._exact_raw):
for n in orb:
self._node_to_exact[n] = j
# ---- Core API ----
[docs]
def compute(self, brute_force_pairs: bool = True) -> "OrbitAccuracy":
"""
Compute all metrics and build the confusion map.
This method is chainable and returns ``self``; call :pyattr:`metrics` or
:pyattr:`confusion_map` afterwards to access results.
Parameters
----------
brute_force_pairs : bool, optional
If True (default) compute pairwise accuracy by checking all unordered
node pairs (O(N^2)). For very large node sets a combinatorial
method (based on orbit sizes) may be preferred; this implementation
defaults to brute-force because typical orbit counts are moderate.
Returns
-------
OrbitAccuracy
Returns ``self`` to enable chaining.
"""
self._compute_node_exact_match_fraction()
self._compute_confusion()
self._compute_purity()
if brute_force_pairs:
self._compute_pairwise_accuracy_bruteforce()
else:
self._compute_pairwise_accuracy_combinatorial_fallback()
return self
def _compute_node_exact_match_fraction(self) -> None:
"""Compute fraction of nodes whose approx orbit exactly equals the exact orbit."""
correct = 0
for n in self.nodes:
approx_idx = self._node_to_approx[n]
exact_idx = self._node_to_exact[n]
if self._approx_raw[approx_idx] == self._exact_raw[exact_idx]:
correct += 1
total = len(self.nodes) or 1
self._metrics["node_exact_match_fraction"] = correct / total
def _compute_confusion(self) -> None:
"""Build confusion matrix as a dict: approx_idx -> { exact_idx: overlap_count }."""
conf: Dict[int, Dict[int, int]] = {}
for ai, aorb in enumerate(self._approx_raw):
row: Dict[int, int] = {}
for ej, eorb in enumerate(self._exact_raw):
inter = len(aorb & eorb)
if inter:
row[ej] = inter
conf[ai] = row
self._confusion = conf
def _compute_purity(self) -> None:
"""
Compute purity: weighted average (by approx-orbit size) of the largest
overlap between each approx-orbit and any exact-orbit.
"""
total_nodes = len(self.nodes) or 1
weight_sum = 0
for ai, aorb in enumerate(self._approx_raw):
if not aorb:
continue
overlaps = self._confusion.get(ai, {})
best_overlap = max(overlaps.values()) if overlaps else 0
weight_sum += best_overlap
self._metrics["purity"] = weight_sum / total_nodes
def _compute_pairwise_accuracy_bruteforce(self) -> None:
"""Brute-force O(N^2) pairwise agreement calculation (same-orbit vs different-orbit)."""
nodes = list(self.nodes)
n = len(nodes)
if n < 2:
self._metrics["pairwise_accuracy"] = 1.0
return
matches = 0
total = 0
for i in range(n):
ni = nodes[i]
for j in range(i + 1, n):
nj = nodes[j]
approx_same = self._node_to_approx[ni] == self._node_to_approx[nj]
exact_same = self._node_to_exact[ni] == self._node_to_exact[nj]
if approx_same == exact_same:
matches += 1
total += 1
self._metrics["pairwise_accuracy"] = matches / total if total else 1.0
def _compute_pairwise_accuracy_combinatorial_fallback(self) -> None:
"""
Placeholder for a combinatorial pairwise method (not implemented).
Currently delegates to the brute-force implementation.
"""
self._compute_pairwise_accuracy_bruteforce()
# ---- Properties / accessors ----
@property
def metrics(self) -> Dict[str, float]:
"""
Return computed metrics.
Returns
-------
dict
Copy of the metrics dictionary. Call :meth:`compute` first to populate.
"""
return dict(self._metrics)
@property
def confusion_map(self) -> Dict[int, Dict[int, int]]:
"""
Return the confusion map: approx_orbit_index -> { exact_orbit_index: count }.
Returns
-------
dict
Copy of the internal confusion mapping. Call :meth:`compute` first.
"""
# return a shallow copy to avoid accidental external mutation
return {k: dict(v) for k, v in self._confusion.items()}
@property
def approx_orbits(self) -> List[FrozenSet[Hashable]]:
"""
Return the stored approx-orbits as a list of frozensets.
Returns
-------
list
Internal copy of the approximate orbit list.
"""
return list(self._approx_raw)
@property
def exact_orbits(self) -> List[FrozenSet[Hashable]]:
"""
Return the stored exact-orbits as a list of frozensets.
Returns
-------
list
Internal copy of the exact orbit list.
"""
return list(self._exact_raw)
# ---- Human-readable report ----
[docs]
def report(self, max_rows: int = 10) -> str:
"""
Produce a short human-readable report summarising computed metrics and the
top confusion rows.
Parameters
----------
max_rows : int, optional
Maximum number of confusion rows to include in the textual report.
Default is 10.
Returns
-------
str
A multi-line string summarising the results. Call :meth:`compute`
before calling this method.
"""
lines: List[str] = []
m = self.metrics
lines.append(repr(self))
lines.append("Metrics:")
for k in ("node_exact_match_fraction", "pairwise_accuracy", "purity"):
if k in m:
lines.append(f" {k}: {m[k]:.6f}")
lines.append("Top confusion rows (approx_orbit_idx -> {exact_idx:count}):")
for ai in sorted(self._confusion)[:max_rows]:
lines.append(f" {ai} -> {self._confusion[ai]}")
return "\n".join(lines)