Source code for rgpycrumbs.geom.fragments

"""
Core fragment detection algorithms for molecular systems.

Provides geometric (scaled covalent radii) and bond-order (GFN2-xTB) based
fragment detection, plus distance-based fragment merging.

.. versionadded:: 0.0.6
"""

import logging
import sys

if sys.version_info >= (3, 11):
    from enum import StrEnum
else:
    from enum import Enum

[docs] class StrEnum(str, Enum): """Backport for Python 3.10."""
import numpy as np from ase.atoms import Atoms from ase.data import covalent_radii from ase.neighborlist import build_neighbor_list, natural_cutoffs from ase.units import Bohr from scipy.sparse import csr_matrix from scipy.sparse.csgraph import connected_components from rgpycrumbs._aux import _import_from_parent_env
[docs] tbliteinterface = _import_from_parent_env("tblite.interface")
[docs] class DetectionMethod(StrEnum): """Available detection methodologies. .. versionadded:: 0.0.6 """
[docs] GEOMETRIC = "geometric"
[docs] BOND_ORDER = "bond-order"
[docs] DEFAULT_BOND_MULTIPLIER = 1.2
[docs] DEFAULT_BOND_ORDER_THRESHOLD = 0.8
[docs] def find_fragments_geometric( atoms: Atoms, bond_multiplier: float, radius_type: str = "natural" ) -> tuple[int, np.ndarray]: """Detect molecular fragments using scaled covalent radii. .. versionadded:: 0.0.6 """ num_atoms = len(atoms) if num_atoms == 0: return 0, np.array([]) # Selection of radii generation strategy if radius_type == "covalent": # Direct usage of ASE standard covalent radii # We apply the multiplier directly to these radii cutoffs = covalent_radii[atoms.get_atomic_numbers()] * bond_multiplier else: # Default to ASE 'natural' cutoffs (Cordero parameters) # natural_cutoffs handles the multiplier internally cutoffs = natural_cutoffs(atoms, mult=bond_multiplier) nl = build_neighbor_list(atoms, cutoffs=cutoffs, self_interaction=False) row_indices, col_indices = [], [] for i in range(num_atoms): indices, _ = nl.get_neighbors(i) for j in indices: if i < j: row_indices.append(i) col_indices.append(j) return build_graph_and_find_components(num_atoms, row_indices, col_indices)
[docs] def find_fragments_bond_order( atoms: Atoms, threshold: float, charge: int, multiplicity: int, method: str = "GFN2-xTB", ) -> tuple[int, np.ndarray, np.ndarray, np.ndarray]: """ Analyze connectivity via the Wiberg Bond Order (WBO) matrix. Calculate electronic structure using the specified xTB level. .. versionadded:: 0.0.6 """ num_atoms = len(atoms) if num_atoms == 0: return 0, np.array([]), np.array([]), np.array([]) logging.info(f"Running {method} for {atoms.get_chemical_formula(mode='hill')}...") # Initialize the calculator with the chosen xTB method calc = tbliteinterface.Calculator( method=method, numbers=atoms.get_atomic_numbers(), positions=atoms.get_positions() / Bohr, charge=float(charge), uhf=int(multiplicity - 1), ) results = calc.singlepoint() bond_order_matrix = results.get("bond-orders") if bond_order_matrix is None: rerr = f"The method {method} did not return bond orders." raise ValueError(rerr) # WBO matrix analysis # k=1 excludes the diagonal (self-interactions/valency) indices = np.argwhere(np.triu(bond_order_matrix, k=1) > threshold) row_indices, col_indices = indices[:, 0], indices[:, 1] n_components, labels = build_graph_and_find_components( num_atoms, row_indices.tolist(), col_indices.tolist() ) return n_components, labels, indices, bond_order_matrix
[docs] def build_graph_and_find_components( num_atoms: int, row_indices: np.ndarray | list[int], col_indices: np.ndarray | list[int], ) -> tuple[int, np.ndarray]: """ Identify connected components using direct CSR sparse matrix construction. .. versionadded:: 0.0.6 This function avoids Python list overhead by passing interaction indices directly to the SciPy sparse engine. """ # Convert inputs to numpy arrays to ensure efficient slicing and memory access rows = np.asarray(row_indices) cols = np.asarray(col_indices) if rows.size == 0: return num_atoms, np.arange(num_atoms) # Define bond weights as a simple integer array # Using int8 saves memory for large systems data = np.ones(rows.size, dtype=np.int8) # Construct the Compressed Sparse Row matrix # SciPy handles the undirected nature when directed=False adj = csr_matrix((data, (rows, cols)), shape=(num_atoms, num_atoms)) # Calculate connected components using the Laplacian-based graph traversal return connected_components(csgraph=adj, directed=False, return_labels=True)
[docs] def merge_fragments_by_distance( atoms: Atoms, n_components: int, labels: np.ndarray, min_dist: float ) -> tuple[int, np.ndarray]: """Merges fragments with geometric centers closer than the specified distance. .. versionadded:: 0.0.6 """ if n_components <= 1: return n_components, labels centers = np.array( [atoms.positions[labels == i].mean(axis=0) for i in range(n_components)] ) row_indices, col_indices = [], [] for i in range(n_components): for j in range(i + 1, n_components): if np.linalg.norm(centers[i] - centers[j]) < min_dist: row_indices.append(i) col_indices.append(j) if not row_indices: return n_components, labels fragment_adj = csr_matrix( ( np.ones(len(row_indices) * 2), ( np.concatenate([row_indices, col_indices]), np.concatenate([col_indices, row_indices]), ), ), shape=(n_components, n_components), ) new_n, merge_labels = connected_components( fragment_adj, directed=False, return_labels=True ) final_labels = -np.ones_like(labels) for i in range(n_components): final_labels[np.where(labels == i)[0]] = merge_labels[i] return new_n, final_labels