|
| 1 | +""" |
| 2 | +Compare pyMBE's polyprotic Henderson-Hasselbalch implementation against: |
| 3 | + 1. The analytical calculate_Z formula from pbs_simulations |
| 4 | + 2. Ideal constant-pH simulation data from pbs_simulations sample_data |
| 5 | +
|
| 6 | +Produces comparison plots for monoprotic, diprotic, and triprotic acids. |
| 7 | +""" |
| 8 | +import sys |
| 9 | +import os |
| 10 | +import numpy as np |
| 11 | +import pandas as pd |
| 12 | +import matplotlib.pyplot as plt |
| 13 | + |
| 14 | +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) |
| 15 | +import pyMBE |
| 16 | + |
| 17 | +# Add pbs_simulations analysis to path |
| 18 | +PBS_ROOT = os.path.expanduser("~/ntnu/pbs_simulations") |
| 19 | +sys.path.insert(0, os.path.join(PBS_ROOT, "scripts")) |
| 20 | +from analysis import analyze_time_series |
| 21 | + |
| 22 | +# ---------- Reference analytical formula from pbs_simulations ---------- |
| 23 | + |
| 24 | +def calculate_Z_reference(pK, pH_array): |
| 25 | + """Exact polyprotic HH formula from pbs_simulations/scripts/post_processing.py""" |
| 26 | + pK = np.asarray(pK) |
| 27 | + pH_array = np.atleast_1d(pH_array).astype(float) |
| 28 | + y = pH_array[None, :] # (1, N_pH) |
| 29 | + i_range = np.arange(1, len(pK) + 1)[:, None] # (n, 1) |
| 30 | + cumsum_pK = np.cumsum(pK)[:, None] # (n, 1) |
| 31 | + fractions = 10.0 ** (-cumsum_pK + i_range * y) |
| 32 | + numerator = np.sum(i_range * fractions, axis=0) |
| 33 | + denominator = 1.0 + np.sum(fractions, axis=0) |
| 34 | + return -numerator / denominator |
| 35 | + |
| 36 | + |
| 37 | +def compute_sim_Z(sample_dir, n): |
| 38 | + """Compute average charge Z from simulation sample data using block binning.""" |
| 39 | + analyzed = analyze_time_series(path_to_datafolder=sample_dir, |
| 40 | + ignore_files=["analyzed_data.csv"]) |
| 41 | + pH_vals = [] |
| 42 | + Z_vals = [] |
| 43 | + Z_err = [] |
| 44 | + for idx in range(len(analyzed)): |
| 45 | + row = analyzed.iloc[idx] |
| 46 | + pH_val = float(row[("pH", "value")]) |
| 47 | + n_ha = float(row[("mean", "n_ha")]) |
| 48 | + Z_num = -sum(float(row[("mean", f"n_a{i+1}")]) * (i + 1) for i in range(n)) |
| 49 | + Z_den = n_ha + sum(float(row[("mean", f"n_a{i+1}")]) for i in range(n)) |
| 50 | + pH_vals.append(pH_val) |
| 51 | + Z_vals.append(Z_num / Z_den if Z_den > 0 else 0.0) |
| 52 | + order = np.argsort(pH_vals) |
| 53 | + return np.array(pH_vals)[order], np.array(Z_vals)[order] |
| 54 | + |
| 55 | + |
| 56 | +def compute_pyMBE_Z(pka_list, pH_array): |
| 57 | + """Compute charge using pyMBE's calculate_HH.""" |
| 58 | + pmb = pyMBE.pymbe_library(seed=42) |
| 59 | + n = len(pka_list) |
| 60 | + if n == 1: |
| 61 | + pmb.define_particle(name="acid", |
| 62 | + sigma=0.35 * pmb.units.nm, |
| 63 | + epsilon=1 * pmb.units("reduced_energy"), |
| 64 | + acidity="acidic", |
| 65 | + pka=pka_list[0]) |
| 66 | + else: |
| 67 | + pmb.define_polyprotic_particle(name="acid", |
| 68 | + sigma=0.35 * pmb.units.nm, |
| 69 | + epsilon=1 * pmb.units("reduced_energy"), |
| 70 | + n=n, |
| 71 | + acidity="acidic", |
| 72 | + pka_list=pka_list) |
| 73 | + pmb.define_residue(name="res", central_bead="acid", side_chains=[]) |
| 74 | + pmb.define_molecule(name="mol", residue_list=["res"]) |
| 75 | + return pmb.calculate_HH(template_name="mol", pH_list=list(pH_array)) |
| 76 | + |
| 77 | + |
| 78 | +# ---------- Configuration ---------- |
| 79 | + |
| 80 | +pK_values = { |
| 81 | + "monoprotic": [2.16], |
| 82 | + "diprotic": [2.16, 7.21], |
| 83 | + "triprotic": [2.16, 7.21, 12.32], |
| 84 | +} |
| 85 | + |
| 86 | +sample_dirs = { |
| 87 | + "monoprotic": os.path.join(PBS_ROOT, "sample_data", "ideal-monoprotic"), |
| 88 | + "diprotic": os.path.join(PBS_ROOT, "sample_data", "ideal-diprotic"), |
| 89 | + "triprotic": os.path.join(PBS_ROOT, "sample_data", "ideal-triprotic"), |
| 90 | +} |
| 91 | + |
| 92 | +# ---------- Plot ---------- |
| 93 | + |
| 94 | +fig, axes = plt.subplots(1, 3, figsize=(15, 5), sharey=True) |
| 95 | + |
| 96 | +for ax, label in zip(axes, ["monoprotic", "diprotic", "triprotic"]): |
| 97 | + pK = pK_values[label] |
| 98 | + n = len(pK) |
| 99 | + pH_fine = np.linspace(0.5, 14.5, 200) |
| 100 | + |
| 101 | + # 1. Reference analytical formula |
| 102 | + Z_ref = calculate_Z_reference(pK, pH_fine) |
| 103 | + |
| 104 | + # 2. pyMBE calculate_HH |
| 105 | + Z_pyMBE = compute_pyMBE_Z(pK, pH_fine) |
| 106 | + |
| 107 | + # 3. Simulation data |
| 108 | + pH_sim, Z_sim = compute_sim_Z(sample_dirs[label], n) |
| 109 | + |
| 110 | + # Plot |
| 111 | + ax.plot(pH_fine, Z_ref, "k-", lw=2, label="Analytical (pbs_simulations)") |
| 112 | + ax.plot(pH_fine, Z_pyMBE, "r--", lw=2, label="pyMBE calculate_HH") |
| 113 | + ax.scatter(pH_sim, Z_sim, c="blue", s=30, zorder=5, label="Simulation data (ideal)") |
| 114 | + |
| 115 | + # pKa markers |
| 116 | + for i, pk in enumerate(pK): |
| 117 | + ax.axvline(x=pk, color="gray", ls=":", alpha=0.6) |
| 118 | + ax.text(pk + 0.2, -n + 0.3, f"pK{i+1}={pk}", fontsize=8, color="gray") |
| 119 | + |
| 120 | + ax.set_xlabel("pH") |
| 121 | + ax.set_title(f"{n}-protic acid") |
| 122 | + ax.grid(alpha=0.3) |
| 123 | + |
| 124 | + # MSE between pyMBE and reference |
| 125 | + mse = np.mean((np.array(Z_pyMBE) - Z_ref) ** 2) |
| 126 | + ax.text(0.05, 0.05, f"MSE(pyMBE vs analytical) = {mse:.2e}", |
| 127 | + transform=ax.transAxes, fontsize=8, |
| 128 | + bbox=dict(boxstyle="round", facecolor="wheat", alpha=0.5)) |
| 129 | + |
| 130 | +axes[0].set_ylabel("Average charge Z") |
| 131 | +axes[0].legend(loc="lower left", fontsize=8) |
| 132 | + |
| 133 | +plt.suptitle("Polyprotic HH: pyMBE vs Analytical vs Simulation", fontsize=14) |
| 134 | +plt.tight_layout() |
| 135 | +plt.savefig("tests/polyprotic_HH_comparison.png", dpi=150, bbox_inches="tight") |
| 136 | +print("Plot saved to tests/polyprotic_HH_comparison.png") |
| 137 | +plt.show() |
0 commit comments