Source code for synkit.Rule.Compose.rule_compose
import os
import glob
import logging
import importlib.util
from typing import List, Set
from synkit.Rule.Compose.valence_constrain import ValenceConstrain
from synkit.IO.data_io import load_gml_as_text
if importlib.util.find_spec("mod"):
from mod import ruleGMLString, RCMatch
else:
ruleGMLString = None
RCMatch = None
print("Optional 'mod' package not found")
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
[docs]
class RuleCompose:
def __init__(self) -> None:
pass
[docs]
@staticmethod
def filter_smallest_vertex(combo: List[object]) -> List[object]:
"""Filters and returns the elements from a list that have the smallest
number of vertices in their context.
Parameters:
- combo (List[object]): A list of objects, each with a 'context'
attribute that has a 'numVertices' attribute.
Returns:
- List[object]: A list of objects from the input list that have
the minimum number of vertices in their context.
"""
# Extract the number of vertices from each rule's context and find the minimum
num_vertices = [rule.context.numVertices for rule in combo]
min_vertex = min(num_vertices)
# Collect all rules that have the minimum number of vertices
new_combo = [
rule
for rule, vertices in zip(combo, num_vertices)
if vertices == min_vertex
]
return new_combo
[docs]
@staticmethod
def rule_cluster(graphs: List) -> List:
"""Clusters graphs based on their isomorphic relationship and returns a
list of graphs, each from a different cluster.
Parameters:
- graphs: A list of graph objects.
Returns:
- List: A list of graphs where each graph is a representative
from a different cluster.
"""
visited: Set[int] = set()
clusters: List[Set[int]] = []
for i, graph_i in enumerate(graphs):
if i in visited:
continue
cluster: Set[int] = {i}
visited.add(i)
for j, graph_j in enumerate(graphs):
if j in visited or j <= i:
continue
if graph_i.isomorphism(graph_j) == 1:
cluster.add(j)
visited.add(j)
clusters.append(cluster)
representative_graphs = [graphs[list(cluster)[0]] for cluster in clusters]
return representative_graphs
@staticmethod
def _compose(rule_1, rule_2):
"""Compose two rules and filter the results based on chemical valence
constraints.
Parameters:
- rule_1: First rule object to compose.
- rule_2: Second rule object to compose.
Returns:
- list: List of 'good' modifications where the resulting rules pass the
valence checks.
"""
try:
# Attempt to match and compose the rules
m = RCMatch(rule_1, rule_2)
modRes = m.composeAll()
valence_check = ValenceConstrain()
goodMod, _ = valence_check.split(modRes)
goodMod_smallest = RuleCompose.filter_smallest_vertex(goodMod)
goodMod_unique = RuleCompose.rule_cluster(goodMod_smallest)
return goodMod_unique
except Exception as e:
print(e)
return [] # Return an empty list in case of failure
@staticmethod
def _process_compose(rule_1_id, rule_2_id, rule_path, rule_path_compose):
"""Process and compose two rules based on their GML files.
Parameters:
- rule_1_id (str): Identifier for the first rule.
- rule_2_id (str): Identifier for the second rule.
- rule_path (str): Directory path where the original GML files are stored.
- rule_path_compose (str): Directory path where the composed GML files
will be saved.
Returns:
- list: Composed rules from the two provided rules.
"""
rule_1 = load_gml_as_text(f"{rule_path}/{rule_1_id}.gml")
rule_1 = ruleGMLString(rule_1, add=False)
rule_2 = ruleGMLString(
load_gml_as_text(f"{rule_path}/{rule_2_id}.gml"), add=False
)
rules_compose = RuleCompose._compose(rule_1, rule_2)
if rule_path_compose:
for key, value in enumerate(rules_compose):
filepath = f"{rule_path_compose}/p_{rule_1_id}_{rule_2_id}_r{key}.gml"
RuleCompose.save_gml_from_text(
value.getGMLString(), filepath, key, [rule_1_id, rule_2_id]
)
return rules_compose
@staticmethod
def _auto_compose(rule_path, rule_path_compose):
"""Automatically find all GML files in the given directory and compose
them pairwise.
Parameters:
- rule_path (str): Directory path where the GML files are stored.
- rule_path_compose (str): Directory path where the composed GML files will
be saved.
Returns:
- None: Composed rules are saved directly to the filesystem.
"""
# Get all gml file names in the directory
gml_files = [os.path.basename(f) for f in glob.glob(f"{rule_path}/*.gml")]
gml_ids = [
os.path.splitext(f)[0] for f in gml_files
] # Strip the .gml extension to get IDs
# Compose each pair of rules once (i.e., (rule1, rule2) but not (rule2, rule1))
# Calculate the total number of compositions for progress logging
num_files = len(gml_ids)
total_compositions = num_files * (num_files - 1) // 2
current_composition = 0
for i in range(len(gml_ids)):
for j in range(i + 1, len(gml_ids)):
RuleCompose._process_compose(
gml_ids[i], gml_ids[j], rule_path, rule_path_compose
)
current_composition += 1
if current_composition % 100 == 0:
logging.info(
f"Progress: {current_composition}/{total_compositions}"
+ "compositions completed."
)
[docs]
@staticmethod
def save_gml_from_text(
gml_content: str, gml_file_path: str, rule_id: str, parent_ids: List[str]
) -> bool:
"""Save a text string to a GML file by modifying the 'ruleID' line to
include parent rule names. This function parses the given GML content,
identifies any lines starting with 'ruleID', and replaces these lines
with a new ruleID that incorporates identifiers from parent rules.
Parameters:
- gml_content (str): The content to be saved to the GML file. This should be the
entire textual content of a GML file.
- gml_file_path (str): The file path where the GML file should be saved. If the
path does not exist or is inaccessible, the function will return False and print
an error message.
- rule_id (str): The original rule ID from the content. This is the identifier
that will be modified to include parent IDs in the new ruleID.
- parent_ids (List[str]): List of parent rule IDs to prepend to the original rule
ID. These are combined into a new identifier to reflect the hierarchical
relationship in rule IDs.
Returns:
- bool: True if the file was successfully saved, False otherwise. The function
attempts to write the modified GML content to the specified file path.
"""
try:
parent_ids = [str(i) for i in parent_ids]
rule_id = str(rule_id)
# Create the new ruleID by concatenating parent IDs with the original rule ID
new_rule_id = (
"p_" + "_".join(parent_ids) + "_r_" + rule_id
if parent_ids
else "r_" + rule_id
)
# Initialize a list to hold the modified lines
modified_lines = []
# Iterate through each line and replace the 'ruleID' line as needed
for line in gml_content.splitlines():
if line.strip().startswith("ruleID"):
# Replace the whole line with the new ruleID
modified_lines.append(f'\truleID "{new_rule_id}"')
else:
modified_lines.append(line)
# Join all lines back into a single string
modified_content = "\n".join(modified_lines)
# Write the modified content to the file
with open(gml_file_path, "w") as file:
file.write(modified_content)
return True
except FileNotFoundError:
print(f"Unable to access the file path: {gml_file_path}")
return False
except Exception as e:
print(f"An error occurred while writing to the file: {e}")
return False