# /// script
# requires-python = "<3.10"
# dependencies = [
# "pandas", "numpy", "matplotlib", "cmcrameri"
# ]
# ///
import string
import sys
import warnings
import numpy as np
import pandas as pd
# This does direct summation and assumes convergence
[docs]
def _calculate_fes_2d(hills_data, x, y, per, npoints):
"""
Helper function to calculate the 2D Free Energy Surface.
The FES is calculated as the negative sum of Gaussian kernels.
FES(x, y) = - sum_{i} [ H_i * exp( - (dx_i^2 / (2*sx_i^2)) - (dy_i^2 / (2*sy_i^2)) ) ]
"""
# Create a 2D grid of coordinates
# Note: The 'ij' indexing ensures the output grid has shape (len(x), len(y))
grid_x, grid_y = np.meshgrid(x, y, indexing="ij")
# Initialize FES to zero
fes = np.zeros((npoints, npoints))
# Get periodicity and ranges
per_x, per_y = per
range_x = x[-1] - x[0]
range_y = y[-1] - y[0]
# Unpack hills data for clarity
centers_x = hills_data[:, 1]
centers_y = hills_data[:, 2]
sigmas_x = hills_data[:, 3]
sigmas_y = hills_data[:, 4]
heights = hills_data[:, 5]
# Loop over each Gaussian hill and add it to the FES
for i in range(len(heights)):
# Calculate distance from grid points to the center of the current hill
dx = grid_x - centers_x[i]
dy = grid_y - centers_y[i]
# Apply periodic boundary conditions if necessary
if per_x:
dx = dx - range_x * np.round(dx / range_x)
if per_y:
dy = dy - range_y * np.round(dy / range_y)
# Calculate the Gaussian kernel for the current hill
arg_x = (dx / sigmas_x[i]) ** 2
arg_y = (dy / sigmas_y[i]) ** 2
# Add the contribution of this hill to the total bias potential
fes += heights[i] * np.exp(-0.5 * (arg_x + arg_y))
# The Free Energy Surface is the negative of the summed bias potential
return -fes
[docs]
def _calculate_fes_1d(hills_data, x, per, npoints):
"""
Helper function to calculate the 1D Free Energy Surface.
FES(x) = - sum_{i} [ H_i * exp( - (dx_i^2 / (2*sx_i^2)) ) ]
"""
# Grid is just the 1D vector x
grid_x = x
# Initialize FES to zero
fes = np.zeros(npoints)
# Get periodicity and range
per_x = per[0]
range_x = x[-1] - x[0]
# Unpack hills data
centers_x = hills_data[:, 1]
sigmas_x = hills_data[:, 2]
heights = hills_data[:, 3]
# Loop over each Gaussian hill
for i in range(len(heights)):
# Calculate distance
dx = grid_x - centers_x[i]
# Apply periodic boundary conditions if necessary
if per_x:
dx = dx - range_x * np.round(dx / range_x)
# Calculate Gaussian kernel
arg_x = (dx / sigmas_x[i]) ** 2
# Add to bias potential
fes += heights[i] * np.exp(-0.5 * arg_x)
# The FES is the negative of the bias potential
return -fes
[docs]
def calculate_fes_from_hills(hills, imin=1, imax=None, xlim=None, ylim=None, npoints=256):
"""
Calculates a 1D or 2D Free Energy Surface (FES) from a metadynamics hills file.
This is a Python/NumPy translation of the R function fes2.hillsfile.
Args:
hills (dict): A dictionary containing the metadynamics data. Expected keys:
'hillsfile' (np.ndarray): The hills data. For 1D FES, shape is (N, 5).
For 2D FES, shape is (N, 7).
Columns are typically:
time, cv1, (cv2), sigma_cv1, (sigma_cv2), height, ...
'per' (list or tuple): A boolean list indicating periodicity for each CV.
e.g., [False, False].
'pcv1' (list or tuple): Periodic boundary limits for CV1. e.g., [0, 2*pi].
'pcv2' (list or tuple): Periodic boundary limits for CV2.
imin (int, optional): The starting hill index (1-based) to include in the FES.
Defaults to 1.
imax (int, optional): The final hill index (1-based) to include. If None, all
hills from imin to the end are used. Defaults to None.
xlim (list or tuple, optional): Manual limits for the x-axis (CV1).
Defaults to None, which auto-detects limits.
ylim (list or tuple, optional): Manual limits for the y-axis (CV2).
Defaults to None, which auto-detects limits.
npoints (int, optional): The number of grid points for each dimension.
Defaults to 256.
Returns:
dict: A dictionary containing the FES and associated metadata, with keys:
'fes': The calculated FES as a 1D or 2D NumPy array.
'hills': The original hills data used.
'rows': Number of grid points (npoints).
'dimension': 1 or 2.
'per': Periodicity flags.
'x': The grid coordinates for the first dimension (CV1).
'y': The grid coordinates for the second dimension (CV2, if applicable).
'pcv1', 'pcv2': Periodic boundary values.
"""
hills_data = hills["hillsfile"]
num_hills, num_cols = hills_data.shape
# --- Parameter Validation and Setup ---
if imax is not None and num_hills < imax:
warnings.warn(
f"Warning: You requested imax={imax}, but only {num_hills} hills are available. Using all hills."
)
imax = num_hills
if imax is None:
imax = num_hills
if imax > 0 and imin > imax:
raise ValueError("Error: imax cannot be lower than imin.")
# Convert 1-based R-style indexing to 0-based Python slicing
# Note: The `imax` in a Python slice [start:end] is exclusive, so it's correct.
start_index = imin - 1
end_index = imax
# --- Main Logic: Branch based on dimension (number of columns) ---
# --- Case 1: 2D FES (CV1, CV2, sigma1, sigma2, height) ---
if num_cols >= 7: # Usually 7 for Plumed output
dimension = 2
if imax == 0:
# Create an empty grid if no hills are requested
min_cv1, max_cv1 = (0, 1) if xlim is None else xlim
min_cv2, max_cv2 = (0, 1) if ylim is None else ylim
# Add 5% padding
dx = max_cv1 - min_cv1
dy = max_cv2 - min_cv2
xlims = [min_cv1 - 0.05 * dx, max_cv1 + 0.05 * dx]
ylims = [min_cv2 - 0.05 * dy, max_cv2 + 0.05 * dy]
x = np.linspace(xlims[0], xlims[1], npoints)
y = np.linspace(ylims[0], ylims[1], npoints)
fesm = np.zeros((npoints, npoints))
else:
# Determine grid boundaries
min_cv1, max_cv1 = np.min(hills_data[:, 1]), np.max(hills_data[:, 1])
min_cv2, max_cv2 = np.min(hills_data[:, 2]), np.max(hills_data[:, 2])
dx = max_cv1 - min_cv1
dy = max_cv2 - min_cv2
xlims = [min_cv1 - 0.05 * dx, max_cv1 + 0.05 * dx]
ylims = [min_cv2 - 0.05 * dy, max_cv2 + 0.05 * dy]
# Override with user-defined or periodic limits
if xlim is not None:
xlims = xlim
elif hills["per"][0] and "pcv1" in hills:
xlims = hills["pcv1"]
if ylim is not None:
ylims = ylim
elif hills["per"][1] and "pcv2" in hills:
ylims = hills["pcv2"]
# Create grid coordinates
x = np.linspace(xlims[0], xlims[1], npoints)
y = np.linspace(ylims[0], ylims[1], npoints)
# Select hills and calculate FES
selected_hills = hills_data[start_index:end_index]
fesm = _calculate_fes_2d(selected_hills, x, y, hills["per"], npoints)
# Prepare results
result = {
"fes": fesm,
"hills": hills_data,
"rows": npoints,
"dimension": dimension,
"per": hills["per"],
"x": x,
"y": y,
"pcv1": hills.get("pcv1"),
"pcv2": hills.get("pcv2"),
}
# --- Case 2: 1D FES (CV1, sigma1, height) ---
elif num_cols >= 5: # Usually 5 for Plumed output
dimension = 1
if imax == 0:
# Create an empty line
min_cv1, max_cv1 = (0, 1) if xlim is None else xlim
dx = max_cv1 - min_cv1
xlims = [min_cv1 - 0.05 * dx, max_cv1 + 0.05 * dx]
x = np.linspace(xlims[0], xlims[1], npoints)
fesm = np.zeros(npoints)
else:
# Determine grid boundaries
min_cv1, max_cv1 = np.min(hills_data[:, 1]), np.max(hills_data[:, 1])
dx = max_cv1 - min_cv1
xlims = [min_cv1 - 0.05 * dx, max_cv1 + 0.05 * dx]
# Override with user-defined or periodic limits
if xlim is not None:
xlims = xlim
elif hills["per"][0] and "pcv1" in hills:
xlims = hills["pcv1"]
# Create grid coordinates
x = np.linspace(xlims[0], xlims[1], npoints)
# Select hills and calculate FES
selected_hills = hills_data[start_index:end_index]
fesm = _calculate_fes_1d(selected_hills, x, hills["per"], npoints)
# Prepare results
result = {
"fes": fesm,
"hills": hills_data,
"rows": npoints,
"dimension": dimension,
"per": hills["per"],
"x": x,
"pcv1": hills.get("pcv1"),
"pcv2": hills.get("pcv2"),
}
else:
raise ValueError(
f"Unsupported number of columns in hillsfile: {num_cols}. "
"Expected >= 5 for 1D or >= 7 for 2D."
)
return result
[docs]
def find_fes_minima(fes_result, nbins=8):
"""
Finds local minima on a Free Energy Surface by dividing it into bins.
This is a Python/Pandas translation of the R function fesminima.fes.
Args:
fes_result (dict): The output dictionary from calculate_fes_from_hills.
nbins (int): The number of bins to divide each dimension into for the search.
Returns:
dict: A dictionary containing a pandas DataFrame of the minima and the original
FES data. Returns None if no minima are found.
"""
fes = fes_result["fes"]
rows = fes_result["rows"]
dimension = fes_result["dimension"]
per = fes_result["per"]
if rows % nbins != 0:
raise ValueError(
"Error: npoints (rows) in FES must be an integer multiple of nbins."
)
rb = rows // nbins
if rb < 2:
raise ValueError("Error: nbins is too high for the grid size, try reducing it.")
minima_info = []
if dimension == 2:
for i in range(nbins):
for j in range(nbins):
# Define the search window for this bin, including a 1-point border
# This is the key to checking if a minimum is truly local
# and not on the edge of the search window.
i_start, i_end = i * rb - 1, (i + 1) * rb + 1
j_start, j_end = j * rb - 1, (j + 1) * rb + 1
# Create index arrays for slicing, handling periodicity and boundaries
indices_i = np.arange(i_start, i_end)
indices_j = np.arange(j_start, j_end)
# Handle non-periodic boundaries by clipping
if not per[0]:
indices_i = np.clip(indices_i, 0, rows - 1)
if not per[1]:
indices_j = np.clip(indices_j, 0, rows - 1)
# Create the sub-FES view, using np.take for periodic wrapping
sub_fes = np.take(
np.take(fes, indices_i, axis=0, mode="wrap" if per[0] else "clip"),
indices_j,
axis=1,
mode="wrap" if per[1] else "clip",
)
# Find the location of the minimum within this sub-grid
min_loc_flat = np.argmin(sub_fes)
min_loc_i, min_loc_j = np.unravel_index(min_loc_flat, sub_fes.shape)
# CRUCIAL CHECK: Is the minimum on the border of the search window?
# If so, it's not a true local minimum for this bin, so we discard it.
is_on_border = (
min_loc_i == 0
or min_loc_i == len(indices_i) - 1
or min_loc_j == 0
or min_loc_j == len(indices_j) - 1
)
if not is_on_border:
# Convert local sub-grid index to global FES index
global_i = indices_i[min_loc_i] % rows
global_j = indices_j[min_loc_j] % rows
minima_info.append(
{
"CV1bin": global_i,
"CV2bin": global_j,
"CV1": fes_result["x"][global_i],
"CV2": fes_result["y"][global_j],
"free_energy": fes[global_i, global_j],
}
)
elif dimension == 1:
for i in range(nbins):
i_start, i_end = i * rb - 1, (i + 1) * rb + 1
indices_i = np.arange(i_start, i_end)
if not per[0]:
indices_i = np.clip(indices_i, 0, rows - 1)
sub_fes = np.take(fes, indices_i, mode="wrap" if per[0] else "clip")
min_loc_i = np.argmin(sub_fes)
is_on_border = min_loc_i == 0 or min_loc_i == len(indices_i) - 1
if not is_on_border:
global_i = indices_i[min_loc_i] % rows
minima_info.append(
{
"CV1bin": global_i,
"CV1": fes_result["x"][global_i],
"free_energy": fes[global_i],
}
)
if not minima_info:
return None # No minima found
# Convert to a pandas DataFrame and remove duplicates
minima_df = pd.DataFrame(minima_info)
if dimension == 2:
minima_df = minima_df.drop_duplicates(subset=["CV1bin", "CV2bin"]).reset_index(
drop=True
)
else:
minima_df = minima_df.drop_duplicates(subset=["CV1bin"]).reset_index(drop=True)
# Sort by free energy and add letter labels
minima_df = minima_df.sort_values(by="free_energy").reset_index(drop=True)
# Generate labels (A, B, ..., Z, AA, AB, ...)
labels = list(string.ascii_uppercase)
if len(minima_df) > 26:
extra_labels = [
f"{c1}{c2}" for c1 in string.ascii_uppercase for c2 in string.ascii_uppercase
]
labels.extend(extra_labels)
minima_df.insert(0, "letter", labels[: len(minima_df)])
# Create a copy of the input dictionary and add the minima DataFrame
minima_result = fes_result.copy()
minima_result["minima"] = minima_df
return minima_result
# --- Example Usage ---
if __name__ == "__main__":
[docs]
HILLS_FILENAME = "HILLS"
# [CV1_is_periodic, CV2_is_periodic]
# For a 1D FES, only the first value is used.
IS_PERIODIC = [False, False]
# If periodic, what are the boundaries? E.g., [-np.pi, np.pi] or [0, 360]
# Set to None if not periodic.
PERIODIC_BOUNDS_CV1 = None
PERIODIC_BOUNDS_CV2 = None
# How many points should the FES grid have in each dimension?
N_POINTS_GRID = 128
# To reconstruct the FES using only the first N hills, set a number.
# Set to None to use all hills in the file.
MAX_HILLS_TO_USE = None
# --- 2. LOAD DATA AND RUN CALCULATION ---
try:
print(f"Loading data from '{HILLS_FILENAME}'...")
# Plumed HILLS files often start with comment lines like "#! FIELDS ..."
# np.loadtxt handles these automatically.
hills_data = np.loadtxt(HILLS_FILENAME)
print(f"Successfully loaded {hills_data.shape[0]} hills.")
except FileNotFoundError:
print(f"\nError: The file '{HILLS_FILENAME}' was not found.")
print("Please make sure the file is in the same directory as the script,")
print("or change the HILLS_FILENAME variable.")
sys.exit(1) # Exit the script
except Exception as e:
print(f"\nAn error occurred while loading the file: {e}")
sys.exit(1)
num_hills, num_cols = hills_data.shape
# Automatically detect dimension and prepare the calculation
if num_cols >= 7: # 2D Case
print("Detected 2D data (>= 7 columns).")
hills_dict = {
"hillsfile": hills_data,
"per": IS_PERIODIC,
"pcv1": PERIODIC_BOUNDS_CV1,
"pcv2": PERIODIC_BOUNDS_CV2,
}
elif num_cols >= 5: # 1D Case
print("Detected 1D data (>= 5 columns).")
hills_dict = {
"hillsfile": hills_data,
"per": [IS_PERIODIC[0]], # Use only the first periodicity flag
"pcv1": PERIODIC_BOUNDS_CV1,
"pcv2": None, # Not used for 1D
}
else:
print(f"\nError: Unsupported number of columns ({num_cols}) in the HILLS file.")
print("Expected >= 5 for 1D or >= 7 for 2D.")
sys.exit(1)
# Calculate the FES
print("Calculating Free Energy Surface...")
fes_result = calculate_fes_from_hills(
hills_dict, imax=MAX_HILLS_TO_USE, npoints=N_POINTS_GRID
)
print("Calculation complete.")
# --- 3. FIND AND PRINT MINIMA ---
print("\nSearching for FES minima...")
try:
# We need to shift the FES to have a minimum of 0 before finding minima for consistency
fes_result["fes"] -= np.min(fes_result["fes"])
minima_result = find_fes_minima(fes_result, nbins=8)
if minima_result:
print("Found minima:")
# Use to_string() to ensure the full DataFrame is printed
print(minima_result["minima"].to_string())
else:
print("No local minima found with the current nbins setting.")
except ValueError as e:
print(f"Could not find minima: {e}")
minima_result = None
# --- 4. PLOT THE RESULTS ---
try:
import cmcrameri.cm as cmc
import matplotlib.pyplot as plt
print("\nPlotting results...")
fes_data = fes_result["fes"]
if fes_result["dimension"] == 2:
fig, ax = plt.subplots(figsize=(8, 6))
x = fes_result["x"]
y = fes_result["y"]
contour = ax.contourf(x, y, fes_data.T, levels=25, cmap=cmc.batlow)
ax.contour(
x, y, fes_data.T, levels=contour.levels, colors="black", linewidths=0.5
)
fig.colorbar(contour, ax=ax, label="Free Energy (kJ/mol)")
ax.set_title("2D Free Energy Surface")
ax.set_xlabel("Collective Variable 1")
ax.set_ylabel("Collective Variable 2")
# Add markers for minima if found
if minima_result:
minima_df = minima_result["minima"]
ax.scatter(
minima_df["CV1"],
minima_df["CV2"],
s=100,
c="red",
marker="x",
label="Minima",
)
for i, row in minima_df.iterrows():
ax.text(
row["CV1"],
row["CV2"],
f" {row['letter']}",
color="white",
fontsize=12,
fontweight="bold",
)
ax.legend()
elif fes_result["dimension"] == 1:
fig, ax = plt.subplots(figsize=(8, 5))
x = fes_result["x"]
ax.plot(x, fes_data)
title = "1D Free Energy Surface"
if MAX_HILLS_TO_USE is not None:
title += f" (first {MAX_HILLS_TO_USE} hills)"
ax.set_title(title)
ax.set_xlabel("Collective Variable 1")
ax.set_ylabel("Free Energy (kJ/mol)")
ax.grid(True)
# Add markers for minima if found
if minima_result:
minima_df = minima_result["minima"]
ax.scatter(
minima_df["CV1"],
minima_df["free_energy"],
s=100,
c="red",
marker="x",
zorder=5,
label="Minima",
)
for i, row in minima_df.iterrows():
ax.text(
row["CV1"],
row["free_energy"],
f" {row['letter']}",
color="black",
fontsize=12,
)
ax.legend()
plt.tight_layout()
plt.savefig("fes_plot_with_minima.png", dpi=300)
print("Saved plot to fes_plot_with_minima.png")
plt.show()
except ImportError:
print("\nMatplotlib not found. Skipping plots.")
except Exception as e:
print(f"\nAn error occurred during plotting: {e}")