Source code for rgpycrumbs.plumed.direct_reconstruction

# /// script
# requires-python = "<3.10"
# dependencies = [
#  "pandas", "numpy", "matplotlib", "cmcrameri"
# ]
# ///

import string
import sys
import warnings

import numpy as np
import pandas as pd

# This does direct summation and assumes convergence



[docs]
def _calculate_fes_2d(hills_data, x, y, per, npoints):
    """
    Helper function to calculate the 2D Free Energy Surface.

    The FES is calculated as the negative sum of Gaussian kernels.
    FES(x, y) = - sum_{i} [ H_i * exp( - (dx_i^2 / (2*sx_i^2)) - (dy_i^2 / (2*sy_i^2)) ) ]
    """
    # Create a 2D grid of coordinates
    # Note: The 'ij' indexing ensures the output grid has shape (len(x), len(y))
    grid_x, grid_y = np.meshgrid(x, y, indexing="ij")

    # Initialize FES to zero
    fes = np.zeros((npoints, npoints))

    # Get periodicity and ranges
    per_x, per_y = per
    range_x = x[-1] - x[0]
    range_y = y[-1] - y[0]

    # Unpack hills data for clarity
    centers_x = hills_data[:, 1]
    centers_y = hills_data[:, 2]
    sigmas_x = hills_data[:, 3]
    sigmas_y = hills_data[:, 4]
    heights = hills_data[:, 5]

    # Loop over each Gaussian hill and add it to the FES
    for i in range(len(heights)):
        # Calculate distance from grid points to the center of the current hill
        dx = grid_x - centers_x[i]
        dy = grid_y - centers_y[i]

        # Apply periodic boundary conditions if necessary
        if per_x:
            dx = dx - range_x * np.round(dx / range_x)
        if per_y:
            dy = dy - range_y * np.round(dy / range_y)

        # Calculate the Gaussian kernel for the current hill
        arg_x = (dx / sigmas_x[i]) ** 2
        arg_y = (dy / sigmas_y[i]) ** 2

        # Add the contribution of this hill to the total bias potential
        fes += heights[i] * np.exp(-0.5 * (arg_x + arg_y))

    # The Free Energy Surface is the negative of the summed bias potential
    return -fes




[docs]
def _calculate_fes_1d(hills_data, x, per, npoints):
    """
    Helper function to calculate the 1D Free Energy Surface.

    FES(x) = - sum_{i} [ H_i * exp( - (dx_i^2 / (2*sx_i^2)) ) ]
    """
    # Grid is just the 1D vector x
    grid_x = x

    # Initialize FES to zero
    fes = np.zeros(npoints)

    # Get periodicity and range
    per_x = per[0]
    range_x = x[-1] - x[0]

    # Unpack hills data
    centers_x = hills_data[:, 1]
    sigmas_x = hills_data[:, 2]
    heights = hills_data[:, 3]

    # Loop over each Gaussian hill
    for i in range(len(heights)):
        # Calculate distance
        dx = grid_x - centers_x[i]

        # Apply periodic boundary conditions if necessary
        if per_x:
            dx = dx - range_x * np.round(dx / range_x)

        # Calculate Gaussian kernel
        arg_x = (dx / sigmas_x[i]) ** 2

        # Add to bias potential
        fes += heights[i] * np.exp(-0.5 * arg_x)

    # The FES is the negative of the bias potential
    return -fes




[docs]
def calculate_fes_from_hills(hills, imin=1, imax=None, xlim=None, ylim=None, npoints=256):
    """
    Calculates a 1D or 2D Free Energy Surface (FES) from a metadynamics hills file.

    This is a Python/NumPy translation of the R function fes2.hillsfile.

    Args:
        hills (dict): A dictionary containing the metadynamics data. Expected keys:
            'hillsfile' (np.ndarray): The hills data. For 1D FES, shape is (N, 5).
                                      For 2D FES, shape is (N, 7).
                                      Columns are typically:
                                      time, cv1, (cv2), sigma_cv1, (sigma_cv2), height, ...
            'per' (list or tuple): A boolean list indicating periodicity for each CV.
                                   e.g., [False, False].
            'pcv1' (list or tuple): Periodic boundary limits for CV1. e.g., [0, 2*pi].
            'pcv2' (list or tuple): Periodic boundary limits for CV2.
        imin (int, optional): The starting hill index (1-based) to include in the FES.
                              Defaults to 1.
        imax (int, optional): The final hill index (1-based) to include. If None, all
                              hills from imin to the end are used. Defaults to None.
        xlim (list or tuple, optional): Manual limits for the x-axis (CV1).
                                        Defaults to None, which auto-detects limits.
        ylim (list or tuple, optional): Manual limits for the y-axis (CV2).
                                        Defaults to None, which auto-detects limits.
        npoints (int, optional): The number of grid points for each dimension.
                                 Defaults to 256.

    Returns:
        dict: A dictionary containing the FES and associated metadata, with keys:
              'fes': The calculated FES as a 1D or 2D NumPy array.
              'hills': The original hills data used.
              'rows': Number of grid points (npoints).
              'dimension': 1 or 2.
              'per': Periodicity flags.
              'x': The grid coordinates for the first dimension (CV1).
              'y': The grid coordinates for the second dimension (CV2, if applicable).
              'pcv1', 'pcv2': Periodic boundary values.
    """
    hills_data = hills["hillsfile"]
    num_hills, num_cols = hills_data.shape

    # --- Parameter Validation and Setup ---
    if imax is not None and num_hills < imax:
        warnings.warn(
            f"Warning: You requested imax={imax}, but only {num_hills} hills are available. Using all hills."
        )
        imax = num_hills

    if imax is None:
        imax = num_hills

    if imax > 0 and imin > imax:
        raise ValueError("Error: imax cannot be lower than imin.")

    # Convert 1-based R-style indexing to 0-based Python slicing
    # Note: The `imax` in a Python slice [start:end] is exclusive, so it's correct.
    start_index = imin - 1
    end_index = imax

    # --- Main Logic: Branch based on dimension (number of columns) ---

    # --- Case 1: 2D FES (CV1, CV2, sigma1, sigma2, height) ---
    if num_cols >= 7:  # Usually 7 for Plumed output
        dimension = 2

        if imax == 0:
            # Create an empty grid if no hills are requested
            min_cv1, max_cv1 = (0, 1) if xlim is None else xlim
            min_cv2, max_cv2 = (0, 1) if ylim is None else ylim

            # Add 5% padding
            dx = max_cv1 - min_cv1
            dy = max_cv2 - min_cv2
            xlims = [min_cv1 - 0.05 * dx, max_cv1 + 0.05 * dx]
            ylims = [min_cv2 - 0.05 * dy, max_cv2 + 0.05 * dy]

            x = np.linspace(xlims[0], xlims[1], npoints)
            y = np.linspace(ylims[0], ylims[1], npoints)
            fesm = np.zeros((npoints, npoints))
        else:
            # Determine grid boundaries
            min_cv1, max_cv1 = np.min(hills_data[:, 1]), np.max(hills_data[:, 1])
            min_cv2, max_cv2 = np.min(hills_data[:, 2]), np.max(hills_data[:, 2])

            dx = max_cv1 - min_cv1
            dy = max_cv2 - min_cv2
            xlims = [min_cv1 - 0.05 * dx, max_cv1 + 0.05 * dx]
            ylims = [min_cv2 - 0.05 * dy, max_cv2 + 0.05 * dy]

            # Override with user-defined or periodic limits
            if xlim is not None:
                xlims = xlim
            elif hills["per"][0] and "pcv1" in hills:
                xlims = hills["pcv1"]

            if ylim is not None:
                ylims = ylim
            elif hills["per"][1] and "pcv2" in hills:
                ylims = hills["pcv2"]

            # Create grid coordinates
            x = np.linspace(xlims[0], xlims[1], npoints)
            y = np.linspace(ylims[0], ylims[1], npoints)

            # Select hills and calculate FES
            selected_hills = hills_data[start_index:end_index]
            fesm = _calculate_fes_2d(selected_hills, x, y, hills["per"], npoints)

        # Prepare results
        result = {
            "fes": fesm,
            "hills": hills_data,
            "rows": npoints,
            "dimension": dimension,
            "per": hills["per"],
            "x": x,
            "y": y,
            "pcv1": hills.get("pcv1"),
            "pcv2": hills.get("pcv2"),
        }

    # --- Case 2: 1D FES (CV1, sigma1, height) ---
    elif num_cols >= 5:  # Usually 5 for Plumed output
        dimension = 1

        if imax == 0:
            # Create an empty line
            min_cv1, max_cv1 = (0, 1) if xlim is None else xlim
            dx = max_cv1 - min_cv1
            xlims = [min_cv1 - 0.05 * dx, max_cv1 + 0.05 * dx]

            x = np.linspace(xlims[0], xlims[1], npoints)
            fesm = np.zeros(npoints)
        else:
            # Determine grid boundaries
            min_cv1, max_cv1 = np.min(hills_data[:, 1]), np.max(hills_data[:, 1])
            dx = max_cv1 - min_cv1
            xlims = [min_cv1 - 0.05 * dx, max_cv1 + 0.05 * dx]

            # Override with user-defined or periodic limits
            if xlim is not None:
                xlims = xlim
            elif hills["per"][0] and "pcv1" in hills:
                xlims = hills["pcv1"]

            # Create grid coordinates
            x = np.linspace(xlims[0], xlims[1], npoints)

            # Select hills and calculate FES
            selected_hills = hills_data[start_index:end_index]
            fesm = _calculate_fes_1d(selected_hills, x, hills["per"], npoints)

        # Prepare results
        result = {
            "fes": fesm,
            "hills": hills_data,
            "rows": npoints,
            "dimension": dimension,
            "per": hills["per"],
            "x": x,
            "pcv1": hills.get("pcv1"),
            "pcv2": hills.get("pcv2"),
        }

    else:
        raise ValueError(
            f"Unsupported number of columns in hillsfile: {num_cols}. "
            "Expected >= 5 for 1D or >= 7 for 2D."
        )

    return result




[docs]
def find_fes_minima(fes_result, nbins=8):
    """
    Finds local minima on a Free Energy Surface by dividing it into bins.

    This is a Python/Pandas translation of the R function fesminima.fes.

    Args:
        fes_result (dict): The output dictionary from calculate_fes_from_hills.
        nbins (int): The number of bins to divide each dimension into for the search.

    Returns:
        dict: A dictionary containing a pandas DataFrame of the minima and the original
              FES data. Returns None if no minima are found.
    """
    fes = fes_result["fes"]
    rows = fes_result["rows"]
    dimension = fes_result["dimension"]
    per = fes_result["per"]

    if rows % nbins != 0:
        raise ValueError(
            "Error: npoints (rows) in FES must be an integer multiple of nbins."
        )

    rb = rows // nbins
    if rb < 2:
        raise ValueError("Error: nbins is too high for the grid size, try reducing it.")

    minima_info = []

    if dimension == 2:
        for i in range(nbins):
            for j in range(nbins):
                # Define the search window for this bin, including a 1-point border
                # This is the key to checking if a minimum is truly local
                # and not on the edge of the search window.
                i_start, i_end = i * rb - 1, (i + 1) * rb + 1
                j_start, j_end = j * rb - 1, (j + 1) * rb + 1

                # Create index arrays for slicing, handling periodicity and boundaries
                indices_i = np.arange(i_start, i_end)
                indices_j = np.arange(j_start, j_end)

                # Handle non-periodic boundaries by clipping
                if not per[0]:
                    indices_i = np.clip(indices_i, 0, rows - 1)
                if not per[1]:
                    indices_j = np.clip(indices_j, 0, rows - 1)

                # Create the sub-FES view, using np.take for periodic wrapping
                sub_fes = np.take(
                    np.take(fes, indices_i, axis=0, mode="wrap" if per[0] else "clip"),
                    indices_j,
                    axis=1,
                    mode="wrap" if per[1] else "clip",
                )

                # Find the location of the minimum within this sub-grid
                min_loc_flat = np.argmin(sub_fes)
                min_loc_i, min_loc_j = np.unravel_index(min_loc_flat, sub_fes.shape)

                # CRUCIAL CHECK: Is the minimum on the border of the search window?
                # If so, it's not a true local minimum for this bin, so we discard it.
                is_on_border = (
                    min_loc_i == 0
                    or min_loc_i == len(indices_i) - 1
                    or min_loc_j == 0
                    or min_loc_j == len(indices_j) - 1
                )

                if not is_on_border:
                    # Convert local sub-grid index to global FES index
                    global_i = indices_i[min_loc_i] % rows
                    global_j = indices_j[min_loc_j] % rows
                    minima_info.append(
                        {
                            "CV1bin": global_i,
                            "CV2bin": global_j,
                            "CV1": fes_result["x"][global_i],
                            "CV2": fes_result["y"][global_j],
                            "free_energy": fes[global_i, global_j],
                        }
                    )

    elif dimension == 1:
        for i in range(nbins):
            i_start, i_end = i * rb - 1, (i + 1) * rb + 1
            indices_i = np.arange(i_start, i_end)

            if not per[0]:
                indices_i = np.clip(indices_i, 0, rows - 1)

            sub_fes = np.take(fes, indices_i, mode="wrap" if per[0] else "clip")
            min_loc_i = np.argmin(sub_fes)

            is_on_border = min_loc_i == 0 or min_loc_i == len(indices_i) - 1

            if not is_on_border:
                global_i = indices_i[min_loc_i] % rows
                minima_info.append(
                    {
                        "CV1bin": global_i,
                        "CV1": fes_result["x"][global_i],
                        "free_energy": fes[global_i],
                    }
                )

    if not minima_info:
        return None  # No minima found

    # Convert to a pandas DataFrame and remove duplicates
    minima_df = pd.DataFrame(minima_info)
    if dimension == 2:
        minima_df = minima_df.drop_duplicates(subset=["CV1bin", "CV2bin"]).reset_index(
            drop=True
        )
    else:
        minima_df = minima_df.drop_duplicates(subset=["CV1bin"]).reset_index(drop=True)

    # Sort by free energy and add letter labels
    minima_df = minima_df.sort_values(by="free_energy").reset_index(drop=True)

    # Generate labels (A, B, ..., Z, AA, AB, ...)
    labels = list(string.ascii_uppercase)
    if len(minima_df) > 26:
        extra_labels = [
            f"{c1}{c2}" for c1 in string.ascii_uppercase for c2 in string.ascii_uppercase
        ]
        labels.extend(extra_labels)
    minima_df.insert(0, "letter", labels[: len(minima_df)])

    # Create a copy of the input dictionary and add the minima DataFrame
    minima_result = fes_result.copy()
    minima_result["minima"] = minima_df
    return minima_result



# --- Example Usage ---
if __name__ == "__main__":

[docs]
    HILLS_FILENAME = "HILLS"


    # [CV1_is_periodic, CV2_is_periodic]
    # For a 1D FES, only the first value is used.
    IS_PERIODIC = [False, False]

    # If periodic, what are the boundaries? E.g., [-np.pi, np.pi] or [0, 360]
    # Set to None if not periodic.
    PERIODIC_BOUNDS_CV1 = None
    PERIODIC_BOUNDS_CV2 = None

    # How many points should the FES grid have in each dimension?
    N_POINTS_GRID = 128

    # To reconstruct the FES using only the first N hills, set a number.
    # Set to None to use all hills in the file.
    MAX_HILLS_TO_USE = None

    # --- 2. LOAD DATA AND RUN CALCULATION ---
    try:
        print(f"Loading data from '{HILLS_FILENAME}'...")
        # Plumed HILLS files often start with comment lines like "#! FIELDS ..."
        # np.loadtxt handles these automatically.
        hills_data = np.loadtxt(HILLS_FILENAME)
        print(f"Successfully loaded {hills_data.shape[0]} hills.")
    except FileNotFoundError:
        print(f"\nError: The file '{HILLS_FILENAME}' was not found.")
        print("Please make sure the file is in the same directory as the script,")
        print("or change the HILLS_FILENAME variable.")
        sys.exit(1)  # Exit the script
    except Exception as e:
        print(f"\nAn error occurred while loading the file: {e}")
        sys.exit(1)

    num_hills, num_cols = hills_data.shape

    # Automatically detect dimension and prepare the calculation
    if num_cols >= 7:  # 2D Case
        print("Detected 2D data (>= 7 columns).")
        hills_dict = {
            "hillsfile": hills_data,
            "per": IS_PERIODIC,
            "pcv1": PERIODIC_BOUNDS_CV1,
            "pcv2": PERIODIC_BOUNDS_CV2,
        }
    elif num_cols >= 5:  # 1D Case
        print("Detected 1D data (>= 5 columns).")
        hills_dict = {
            "hillsfile": hills_data,
            "per": [IS_PERIODIC[0]],  # Use only the first periodicity flag
            "pcv1": PERIODIC_BOUNDS_CV1,
            "pcv2": None,  # Not used for 1D
        }
    else:
        print(f"\nError: Unsupported number of columns ({num_cols}) in the HILLS file.")
        print("Expected >= 5 for 1D or >= 7 for 2D.")
        sys.exit(1)

    # Calculate the FES
    print("Calculating Free Energy Surface...")
    fes_result = calculate_fes_from_hills(
        hills_dict, imax=MAX_HILLS_TO_USE, npoints=N_POINTS_GRID
    )
    print("Calculation complete.")

    # --- 3. FIND AND PRINT MINIMA ---
    print("\nSearching for FES minima...")
    try:
        # We need to shift the FES to have a minimum of 0 before finding minima for consistency
        fes_result["fes"] -= np.min(fes_result["fes"])
        minima_result = find_fes_minima(fes_result, nbins=8)
        if minima_result:
            print("Found minima:")
            # Use to_string() to ensure the full DataFrame is printed
            print(minima_result["minima"].to_string())
        else:
            print("No local minima found with the current nbins setting.")
    except ValueError as e:
        print(f"Could not find minima: {e}")
        minima_result = None

    # --- 4. PLOT THE RESULTS ---
    try:
        import cmcrameri.cm as cmc
        import matplotlib.pyplot as plt

        print("\nPlotting results...")

        fes_data = fes_result["fes"]

        if fes_result["dimension"] == 2:
            fig, ax = plt.subplots(figsize=(8, 6))
            x = fes_result["x"]
            y = fes_result["y"]
            contour = ax.contourf(x, y, fes_data.T, levels=25, cmap=cmc.batlow)
            ax.contour(
                x, y, fes_data.T, levels=contour.levels, colors="black", linewidths=0.5
            )
            fig.colorbar(contour, ax=ax, label="Free Energy (kJ/mol)")
            ax.set_title("2D Free Energy Surface")
            ax.set_xlabel("Collective Variable 1")
            ax.set_ylabel("Collective Variable 2")

            # Add markers for minima if found
            if minima_result:
                minima_df = minima_result["minima"]
                ax.scatter(
                    minima_df["CV1"],
                    minima_df["CV2"],
                    s=100,
                    c="red",
                    marker="x",
                    label="Minima",
                )
                for i, row in minima_df.iterrows():
                    ax.text(
                        row["CV1"],
                        row["CV2"],
                        f"  {row['letter']}",
                        color="white",
                        fontsize=12,
                        fontweight="bold",
                    )
                ax.legend()

        elif fes_result["dimension"] == 1:
            fig, ax = plt.subplots(figsize=(8, 5))
            x = fes_result["x"]
            ax.plot(x, fes_data)
            title = "1D Free Energy Surface"
            if MAX_HILLS_TO_USE is not None:
                title += f" (first {MAX_HILLS_TO_USE} hills)"
            ax.set_title(title)
            ax.set_xlabel("Collective Variable 1")
            ax.set_ylabel("Free Energy (kJ/mol)")
            ax.grid(True)

            # Add markers for minima if found
            if minima_result:
                minima_df = minima_result["minima"]
                ax.scatter(
                    minima_df["CV1"],
                    minima_df["free_energy"],
                    s=100,
                    c="red",
                    marker="x",
                    zorder=5,
                    label="Minima",
                )
                for i, row in minima_df.iterrows():
                    ax.text(
                        row["CV1"],
                        row["free_energy"],
                        f"  {row['letter']}",
                        color="black",
                        fontsize=12,
                    )
                ax.legend()

        plt.tight_layout()
        plt.savefig("fes_plot_with_minima.png", dpi=300)
        print("Saved plot to fes_plot_with_minima.png")
        plt.show()

    except ImportError:
        print("\nMatplotlib not found. Skipping plots.")
    except Exception as e:
        print(f"\nAn error occurred during plotting: {e}")