Source code for synkit.Graph.Feature.hash_fps
import networkx as nx
import hashlib
from typing import Optional, Any
[docs]
class HashFPs:
def __init__(
self, graph: nx.Graph, numBits: int = 256, hash_alg: str = "sha256"
) -> None:
"""Initialize the HashFPs class with a graph and configuration
settings.
Parameters:
- graph (nx.Graph): The graph to be fingerprinted.
- numBits (int): Number of bits in the output binary hash. Default is 256 bits.
- hash_alg (str): The hash algorithm to use, such as 'sha256' or 'sha512'.
Raises:
- ValueError: If `numBits` is non-positive or if `hash_alg` is not supported
by hashlib.
"""
self.graph = graph
self.numBits = numBits
self.hash_alg = hash_alg
self.validate_parameters()
[docs]
def validate_parameters(self) -> None:
"""Validate the initial parameters for errors."""
if self.numBits <= 0:
raise ValueError("Number of bits must be positive")
if not hasattr(hashlib, self.hash_alg):
raise ValueError(f"Unsupported hash algorithm: {self.hash_alg}")
[docs]
def hash_fps(
self,
start_node: Optional[int] = None,
end_node: Optional[int] = None,
max_path_length: Optional[int] = None,
) -> str:
"""Generate a binary hash fingerprint of the graph based on its paths
and cycles.
Parameters:
- start_node (Optional[int]): The starting node index for path detection.
- end_node (Optional[int]): The ending node index for path detection.
- max_path_length (Optional[int]): The maximum length for paths to be considered.
Returns:
- str: A binary string representing the truncated hash of the graph's structural
features.
"""
hash_object = self.initialize_hash()
features = self.extract_features(start_node, end_node, max_path_length)
full_hash_binary = self.finalize_hash(hash_object, features)
return full_hash_binary
[docs]
def initialize_hash(self) -> Any:
"""Initialize and return the hash object based on the specified
algorithm."""
return getattr(hashlib, self.hash_alg)()
[docs]
def finalize_hash(self, hash_object: Any, features: str) -> str:
"""Finalize the hash using the features extracted and return the hash
as a binary string.
Parameters:
- hash_object (Any): The hash object.
- features (str): Concatenated string of graph features.
Returns:
- str: The final binary string of the hash, truncated or extended to `numBits`.
"""
hash_object.update(features.encode())
full_hash_binary = bin(int(hash_object.hexdigest(), 16))[2:]
if len(full_hash_binary) < self.numBits:
full_hash_binary += self.iterative_deepening(
hash_object, self.numBits - len(full_hash_binary)
)
return full_hash_binary[: self.numBits]
[docs]
def iterative_deepening(self, hash_object: Any, remaining_bits: int) -> str:
"""Extend hash length using iterative hashing until the desired bit
length is achieved.
Parameters:
- hash_object (hashlib._Hash): The hash object for iterative deepening.
- remaining_bits (int): Number of bits needed to reach `numBits`.
Returns:
- str: Additional binary data to achieve the desired hash length.
"""
additional_data = ""
while (
len(additional_data) * 4 < remaining_bits
): # Each hex digit represents 4 bits
hash_object.update(additional_data.encode())
additional_data += hash_object.hexdigest()
return bin(int(additional_data, 16))[2:][:remaining_bits]