import numpy as np
import matplotlib.pyplot as plt
from pymatgen.io.vasp import Vasprun
from pymatgen.core.structure import Structure
from scipy.signal import savgol_filter
from scipy.spatial import cKDTree
from tqdm import tqdm
import matplotlib as mpl
import warnings
from collections import defaultdict
import os
import csv
# 忽略可能的警告
warnings.filterwarnings("ignore", category=UserWarning)
# 专业绘图设置
plt.style.use('seaborn-v0_8-whitegrid')
mpl.rcParams.update({
'font.family': 'sans-serif',
'font.sans-serif': ['Arial', 'DejaVu Sans'],
'font.size': 12,
'axes.labelsize': 14,
'axes.titlesize': 16,
'xtick.labelsize': 12,
'ytick.labelsize': 12,
'figure.dpi': 300,
'savefig.dpi': 300,
'figure.figsize': (10, 7),
'lines.linewidth': 2.5,
'legend.fontsize': 11,
'legend.framealpha': 0.9,
'mathtext.default': 'regular' # 启用数学文本支持
})
# 1. 增强的原子类型识别函数 - 明确区分所有原子类型
def identify_atom_types(struct):
"""识别所有关键原子类型并排除自身化学键"""
# 磷酸氧分类
p_oxygens = {"P=O": [], "P-O": [], "P-OH": []}
phosphate_hydrogens = [] # 仅P-OH基团中的H原子
# 水合氢离子识别
hydronium_oxygens = []
hydronium_hydrogens = [] # H₃O⁺中的H原子
# 普通水分子
water_oxygens = []
water_hydrogens = [] # 普通水中的H原子
# 氟离子
fluoride_atoms = [i for i, site in enumerate(struct) if site.species_string == "F"]
# 铝离子
aluminum_atoms = [i for i, site in enumerate(struct) if site.species_string == "Al"]
# 创建快速邻居查找表
neighbor_cache = defaultdict(list)
for i, site in enumerate(struct):
if site.species_string == "O":
neighbors = struct.get_neighbors(site, r=1.3)
h_neighbors = [n[0] for n in neighbors if n[0].species_string == "H"]
neighbor_cache[i] = h_neighbors
# 识别水合氢离子 (H₃O⁺)
if len(h_neighbors) == 3:
hydronium_oxygens.append(i)
for h_site in h_neighbors:
hydronium_hydrogens.append(h_site.index)
# 识别磷酸基团
for site in struct:
if site.species_string == "P":
neighbors = struct.get_neighbors(site, r=2.0) # 扩大搜索半径
# 筛选氧原子邻居
o_neighbors = [(n[0], n[1]) for n in neighbors if n[0].species_string == "O"]
if len(o_neighbors) < 4:
# 如果找不到4个氧原子,使用旧方法
for neighbor in o_neighbors:
nn_site = neighbor[0]
if neighbor[1] < 1.55:
p_oxygens["P=O"].append(nn_site.index)
else:
if any(n[0].species_string == "H" for n in struct.get_neighbors(nn_site, r=1.3)):
p_oxygens["P-OH"].append(nn_site.index)
else:
p_oxygens["P-O"].append(nn_site.index)
continue
# 按距离排序
o_neighbors.sort(key=lambda x: x[1])
# 最近的氧原子为P=O
p_double_o = o_neighbors[0][0]
p_oxygens["P=O"].append(p_double_o.index)
# 其他三个氧原子
for i in range(1, 4):
o_site = o_neighbors[i][0]
# 检查氧原子上是否有氢
if neighbor_cache.get(o_site.index, []):
p_oxygens["P-OH"].append(o_site.index)
else:
p_oxygens["P-O"].append(o_site.index)
# 识别P-OH基团中的H原子 (磷酸中的H)
for o_idx in p_oxygens["P-OH"]:
# 获取与P-OH氧相连的H原子
h_neighbors = neighbor_cache.get(o_idx, [])
for h_site in h_neighbors:
if h_site.species_string == "H":
phosphate_hydrogens.append(h_site.index)
# 识别普通水分子 (排除磷酸氧和水合氢离子)
for i, site in enumerate(struct):
if site.species_string == "O" and i not in hydronium_oxygens:
is_phosphate_oxygen = False
for cat in p_oxygens.values():
if i in cat:
is_phosphate_oxygen = True
break
if not is_phosphate_oxygen:
water_oxygens.append(i)
# 识别普通水分子中的H原子 (水中的H)
for o_idx in water_oxygens:
h_neighbors = neighbor_cache.get(o_idx, [])
for h_site in h_neighbors:
if h_site.species_string == "H":
water_hydrogens.append(h_site.index)
return {
"phosphate_oxygens": p_oxygens,
"phosphate_hydrogens": phosphate_hydrogens,
"water_oxygens": water_oxygens,
"water_hydrogens": water_hydrogens,
"hydronium_oxygens": hydronium_oxygens,
"hydronium_hydrogens": hydronium_hydrogens,
"fluoride_atoms": fluoride_atoms,
"aluminum_atoms": aluminum_atoms
}
# 2. RDF计算函数 - 完全排除自身化学键(动态原子类型识别)
def calculate_rdf(structures, center_selector, target_selector,
r_max=8.0, bin_width=0.05, progress=True,
exclude_bonds=True, bond_threshold=1.3):
"""
计算径向分布函数(完全排除自身化学键)
:param center_selector: 函数,用于从原子类型字典中选择中心原子
:param target_selector: 函数,用于从原子类型字典中选择目标原子
:param r_max: 最大距离
:param bin_width: 分箱宽度
:param exclude_bonds: 是否排除化学键
:param bond_threshold: 化学键距离阈值
:return: (bins, rdf, peak_info)
"""
bins = np.arange(0, r_max, bin_width)
hist = np.zeros(len(bins) - 1)
total_centers = 0
total_targets = 0
total_volume = 0
iterator = tqdm(structures, desc="Calculating RDF") if progress else structures
for struct_idx, struct in enumerate(iterator):
# 关键修改:对每一帧进行原子类型识别
atom_types = identify_atom_types(struct)
# 识别中心原子
centers = center_selector(atom_types)
# 识别目标原子
targets = target_selector(atom_types)
# 检查是否有足够的原子
if len(centers) == 0 or len(targets) == 0:
continue
total_centers += len(centers)
total_targets += len(targets)
total_volume += struct.volume
# 获取坐标
center_coords = np.array([struct[i].coords for i in centers])
target_coords = np.array([struct[i].coords for i in targets])
# 使用KDTree高效查询
lattice = struct.lattice
kdtree = cKDTree(target_coords, boxsize=lattice.abc)
# 计算所有中心原子的距离分布
distances, indices = kdtree.query(center_coords, k=min(50, len(targets)),
distance_upper_bound=r_max)
# 过滤有效距离并排除自身化学键
valid_distances = []
for i, dist_list in enumerate(distances):
center_idx = centers[i]
for j, dist in enumerate(dist_list):
if dist > r_max:
continue
target_idx = targets[indices[i][j]]
# 排除自身化学键
if exclude_bonds:
# 计算中心原子和目标原子的实际距离
actual_dist = struct.get_distance(center_idx, target_idx)
if actual_dist < bond_threshold:
continue
valid_distances.append(dist)
# 统计距离分布
if len(valid_distances) > 0:
hist += np.histogram(valid_distances, bins=bins)[0]
# 修正后的归一化处理
n_frames = len(structures)
avg_density = total_targets / total_volume # 移除 n_frames 因子
r = bins[:-1] + bin_width/2
rdf = np.zeros_like(r)
for i in range(len(hist)):
r_lower = bins[i]
r_upper = bins[i+1]
shell_vol = 4/3 * np.pi * (r_upper**3 - r_lower**3)
expected_count = shell_vol * avg_density * total_centers # 移除 /n_frames
if expected_count > 0:
rdf[i] = hist[i] / expected_count
else:
rdf[i] = 0
# 平滑处理
if len(rdf) > 5:
window_length = min(11, len(rdf)//2*2+1) # 确保为奇数
rdf_smoothed = savgol_filter(rdf, window_length=window_length, polyorder=3)
else:
rdf_smoothed = rdf
# 计算主要峰值位置 (1.5-3.0Å范围内)
peak_info = {}
mask = (r >= 1.5) & (r <= 3.0)
if np.any(mask):
peak_idx = np.argmax(rdf_smoothed[mask])
peak_pos = r[mask][peak_idx]
peak_val = rdf_smoothed[mask][peak_idx]
peak_info = {"position": peak_pos, "value": peak_val}
else:
peak_info = {"position": None, "value": None}
return r, rdf_smoothed, peak_info
# 3. 定义RDF分组(使用正确的LaTeX格式)
rdf_groups = {
"Phosphate_H_Bonds": [
# 磷酸作为受体
(lambda s: s["phosphate_oxygens"]["P=O"],
lambda s: s["water_hydrogens"] + s["hydronium_hydrogens"],
"P=O···H", "#1f77b4"),
(lambda s: s["phosphate_oxygens"]["P-OH"],
lambda s: s["water_hydrogens"] + s["hydronium_hydrogens"],
"P-OH···H", "#ff7f0e"),
(lambda s: s["phosphate_oxygens"]["P-O"],
lambda s: s["water_hydrogens"] + s["hydronium_hydrogens"],
"P-O···H", "#17becf"),
# 磷酸作为供体
(lambda s: s["phosphate_hydrogens"],
lambda s: s["water_oxygens"] + s["hydronium_oxygens"],
"P-OH···O", "#d62728"),
],
"Hydronium_H_Bonds": [
# 水合氢离子作为受体
(lambda s: s["hydronium_oxygens"],
lambda s: s["water_hydrogens"] + s["phosphate_hydrogens"],
r"H$_3$O$^+$ O···H", "#9467bd"),
# 水合氢离子作为供体
(lambda s: s["hydronium_hydrogens"],
lambda s: s["water_oxygens"],
r"H$_3$O$^+$ H···O$_w$", "#8c564b"),
(lambda s: s["hydronium_hydrogens"],
lambda s: s["phosphate_oxygens"]["P=O"] +
s["phosphate_oxygens"]["P-O"] +
s["phosphate_oxygens"]["P-OH"],
r"H$_3$O$^+$ H···O$_p$", "#e377c2"),
],
"Water_Network": [
# 水分子之间的氢键
(lambda s: s["water_oxygens"],
lambda s: s["water_hydrogens"],
r"O$_w$···H$_w$", "#2ca02c"),
# 水作为受体与水合氢离子供体
(lambda s: s["water_oxygens"],
lambda s: s["hydronium_hydrogens"],
r"O$_w$···H$_h$", "#d62728"),
],
"Fluoride_H_Bonds": [
# 氟离子作为受体
(lambda s: s["fluoride_atoms"],
lambda s: s["water_hydrogens"],
r"F···H$_w$", "#2ca02c"),
(lambda s: s["fluoride_atoms"],
lambda s: s["phosphate_hydrogens"],
r"F···H$_p$", "#d62728"),
(lambda s: s["fluoride_atoms"],
lambda s: s["hydronium_hydrogens"],
r"F···H$_h$", "#9467bd"),
],
"Aluminum_Coordination": [
# 铝与水中的氧
(lambda s: s["aluminum_atoms"],
lambda s: s["water_oxygens"],
r"Al···O$_w$", "#1f77b4"),
# 铝与磷酸中的氧
(lambda s: s["aluminum_atoms"],
lambda s: s["phosphate_oxygens"]["P=O"] +
s["phosphate_oxygens"]["P-O"] +
s["phosphate_oxygens"]["P-OH"],
r"Al···O$_p$", "#ff7f0e"),
# 铝与氟的配位
(lambda s: s["aluminum_atoms"],
lambda s: s["fluoride_atoms"],
r"Al···F", "#17becf"),
],
"Phosphate_Phosphate_H_Bonds": [
# 磷酸基团内部的氢键作用
(lambda s: s["phosphate_hydrogens"],
lambda s: s["phosphate_oxygens"]["P=O"],
r"P-OH···P=O", "#1f77b4"),
(lambda s: s["phosphate_hydrogens"],
lambda s: s["phosphate_oxygens"]["P-O"],
r"P-OH···P-O", "#ff7f0e"),
(lambda s: s["phosphate_hydrogens"],
lambda s: s["phosphate_oxygens"]["P-OH"],
r"P-OH···P-OH", "#d62728"),
],
"Phosphate_Phosphate_Interactions": [
# 1. 所有磷酸氧之间的整体聚集
(lambda s: s["phosphate_oxygens"]["P=O"] +
s["phosphate_oxygens"]["P-O"] +
s["phosphate_oxygens"]["P-OH"],
lambda s: s["phosphate_oxygens"]["P=O"] +
s["phosphate_oxygens"]["P-O"] +
s["phosphate_oxygens"]["P-OH"],
"All P-Oxygens", "#1f77b4"),
# 2. 不同类型磷酸氧之间的特定相互作用
(lambda s: s["phosphate_oxygens"]["P=O"],
lambda s: s["phosphate_oxygens"]["P=O"],
"P=O···P=O", "#ff7f0e"),
(lambda s: s["phosphate_oxygens"]["P=O"],
lambda s: s["phosphate_oxygens"]["P-O"],
"P=O···P-O", "#2ca02c"),
(lambda s: s["phosphate_oxygens"]["P=O"],
lambda s: s["phosphate_oxygens"]["P-OH"],
"P=O···P-OH", "#d62728"),
(lambda s: s["phosphate_oxygens"]["P-O"],
lambda s: s["phosphate_oxygens"]["P-OH"],
"P-O···P-OH", "#9467bd"),
(lambda s: s["phosphate_oxygens"]["P-OH"],
lambda s: s["phosphate_oxygens"]["P-OH"],
"P-OH···P-OH", "#8c564b"),
# 3. 氢键供体-受体关系 (P-OH中的H与其他磷酸氧)
(lambda s: s["phosphate_hydrogens"],
lambda s: s["phosphate_oxygens"]["P=O"],
"P-OH···P=O (H-bond)", "#e377c2"),
(lambda s: s["phosphate_hydrogens"],
lambda s: s["phosphate_oxygens"]["P-O"],
"P-OH···P-O (H-bond)", "#7f7f7f"),
(lambda s: s["phosphate_hydrogens"],
lambda s: s["phosphate_oxygens"]["P-OH"],
"P-OH···P-OH (H-bond)", "#bcbd22")
]
}
# 4. 主程序 - 优化版:相同类型图表使用统一y轴范围
if __name__ == "__main__":
# 定义要处理的体系
vasprun_files = {
"System1": "vasprun1.xml",
"System2": "vasprun2.xml",
"System3": "vasprun3.xml",
"System4": "vasprun4.xml"
}
# 存储所有数据
all_system_data = {}
group_y_max = {group_name: 0 for group_name in list(rdf_groups.keys()) + ["Phosphate_Phosphate_H_Bonds"]} # 每个分组的最大y值
global_x_max = 6.0
# 创建输出目录
os.makedirs("RDF_Plots", exist_ok=True)
# 第一步:计算所有体系的所有RDF数据,并确定每个分组的最大y值
for system_name, vasprun_file in vasprun_files.items():
print(f"\n{'='*50}")
print(f"Processing {system_name}: {vasprun_file}")
print(f"{'='*50}")
try:
# 加载VASP结果
vr = Vasprun(vasprun_file, ionic_step_skip=5)
structures = vr.structures
print(f"Loaded {len(structures)} frames")
# 测试原子识别
test_struct = structures[0]
atom_types = identify_atom_types(test_struct)
print("\nAtom identification test:")
print(f"P=O atoms: {len(atom_types['phosphate_oxygens']['P=O'])}")
print(f"P-O atoms: {len(atom_types['phosphate_oxygens']['P-O'])}")
print(f"P-OH atoms: {len(atom_types['phosphate_oxygens']['P-OH'])}")
print(f"磷酸中的H (P-OH): {len(atom_types['phosphate_hydrogens'])}")
print(f"水中的O: {len(atom_types['water_oxygens'])}")
print(f"水中的H: {len(atom_types['water_hydrogens'])}")
print(f"H₃O⁺中的O: {len(atom_types['hydronium_oxygens'])}")
print(f"H₃O⁺中的H: {len(atom_types['hydronium_hydrogens'])}")
print(f"氟离子: {len(atom_types['fluoride_atoms'])}")
print(f"铝离子: {len(atom_types['aluminum_atoms'])}")
# 存储体系数据
system_data = {
"atom_types": atom_types,
"rdf_results": {},
"peak_infos": {}
}
# 计算所有RDF分组
for group_name, pairs in rdf_groups.items():
system_data["rdf_results"][group_name] = {}
system_data["peak_infos"][group_name] = {}
# 当前分组在当前体系中的最大y值
group_y_max_current = 0
for center_sel, target_sel, label, color in pairs:
print(f"\nCalculating RDF for: {label}")
try:
# 准备选择器函数
r, rdf, peak_info = calculate_rdf(
structures,
lambda s: center_sel(atom_types),
lambda s: target_sel(atom_types),
r_max=global_x_max,
exclude_bonds=True,
bond_threshold=1.3
)
system_data["rdf_results"][group_name][label] = (r, rdf, color)
system_data["peak_infos"][group_name][label] = peak_info
# 更新当前分组在当前体系中的最大y值
if len(rdf) > 0:
current_max = np.max(rdf)
if current_max > group_y_max_current:
group_y_max_current = current_max
# 打印峰值信息
if peak_info["position"] is not None:
print(f" Peak for {label}: {peak_info['position']:.3f} Å (g(r) = {peak_info['value']:.2f})")
else:
print(f" No significant peak found for {label} in 1.5-3.0 Å range")
except Exception as e:
print(f"Error calculating RDF for {label}: {str(e)}")
system_data["rdf_results"][group_name][label] = (np.array([]), np.array([]), color)
system_data["peak_infos"][group_name][label] = {"position": None, "value": None}
# 更新该分组的全局最大y值(所有体系中该分组的最大y值)
if group_y_max_current > group_y_max[group_name]:
group_y_max[group_name] = group_y_max_current
# 保存体系数据
all_system_data[system_name] = system_data
print(f"\nCompleted processing for {system_name}")
except Exception as e:
print(f"Error processing {system_name}: {str(e)}")
# 为每个分组添加15%的余量
for group_name in group_y_max:
group_y_max[group_name] = group_y_max[group_name] * 1.15
print(f"\n{'='*50}")
print("Group-wise y-axis maximum values:")
for group_name, y_max in group_y_max.items():
print(f"{group_name}: {y_max:.2f}")
print(f"{'='*50}")
# 第二步:为所有体系的所有分组生成图表(相同类型图表使用统一y轴范围)
for system_name, system_data in all_system_data.items():
print(f"\n\n{'='*50}")
print(f"Generating plots for {system_name}")
print(f"{'='*50}")
# 为每组创建单独的图表
for group_name, group_data in system_data["rdf_results"].items():
print(f"\nGenerating plot for {system_name} - {group_name}")
fig, ax = plt.subplots(figsize=(10, 7))
# 绘制所有RDF曲线
for label, (r, rdf, color) in group_data.items():
if len(r) > 0 and len(rdf) > 0:
ax.plot(r, rdf, color=color, label=label, alpha=0.9, linewidth=2.5)
# 应用统一坐标尺度
ax.set_xlim(0, global_x_max)
# 为相同类型的图表设置统一的y轴范围
if group_y_max[group_name] > 0:
ax.set_ylim(0, group_y_max[group_name])
else:
# 如果该分组没有有效数据,自动确定y轴范围
ax.set_ylim(0, 5)
# 图表装饰
ax.set_xlabel('Radial Distance (Å)', fontweight='bold')
ax.set_ylabel('g(r)', fontweight='bold')
# 根据组名设置标题
title_map = {
"Phosphate_H_Bonds": "Phosphate Hydrogen Bonding",
"Hydronium_H_Bonds": "Hydronium Ion Hydrogen Bonding",
"Water_Network": "Water Network Hydrogen Bonding",
"Fluoride_H_Bonds": "Fluoride Ion Hydrogen Bonding",
"Aluminum_Coordination": "Aluminum Coordination Environment",
"Phosphate_Phosphate_H_Bonds": "Phosphate-Phosphate Hydrogen Bonding",
"Phosphate_Phosphate_Interactions": "Phosphate-Phosphate Interactions"
}
# 添加体系名称到标题
ax.set_title(f"{system_name}: {title_map[group_name]}", fontsize=16, pad=15)
# 图例布局
ax.legend(ncol=1, loc='best', framealpha=0.95)
# 添加网格
ax.grid(True, linestyle='--', alpha=0.7)
# 添加氢键区域标记(1.3-2.5Å)
ax.axvspan(1.5, 2.5, alpha=0.1, color='green')
plt.tight_layout()
filename = os.path.join("RDF_Plots", f"RDF_{system_name}_{group_name}.png")
plt.savefig(filename, bbox_inches='tight', dpi=300)
print(f"Saved plot: {filename}")
plt.close() # 关闭图形以释放内存
# 打印该体系的峰值信息汇总
print(f"\n\n===== Peak Position Summary for {system_name} =====")
for group_name, peaks in system_data["peak_infos"].items():
print(f"\n--- {group_name.replace('_', ' ')} ---")
for label, info in peaks.items():
if info["position"] is not None:
print(f"{label}: {info['position']:.3f} Å")
else:
print(f"{label}: No significant peak")
print("\nAll RDF plots generated successfully for all systems!")
# 创建数据保存目录
os.makedirs("RDF_Data", exist_ok=True)
print("\n\nSaving all RDF data to TXT files...")
# 保存所有RDF数据
for system_name, system_data in all_system_data.items():
print(f"\nSaving RDF data for {system_name}")
# 为每个体系创建子目录
system_dir = os.path.join("RDF_Data", system_name)
os.makedirs(system_dir, exist_ok=True)
# 保存峰值信息为TXT格式
peak_info_path = os.path.join(system_dir, f"Peak_Positions_{system_name}.txt")
with open(peak_info_path, 'w') as f:
# 写入表头
f.write("{:<40} {:<30} {:<15} {:<15}\n".format(
"Group", "Interaction", "Peak Position (A)", "g(r) Value"))
f.write("-" * 100 + "\n")
for group_name, peaks in system_data["peak_infos"].items():
for label, info in peaks.items():
if info["position"] is not None:
f.write("{:<40} {:<30} {:<15.3f} {:<15.3f}\n".format(
group_name, label, info["position"], info["value"]))
else:
f.write("{:<40} {:<30} {:<15} {:<15}\n".format(
group_name, label, "N/A", "N/A"))
print(f"Saved peak positions: {peak_info_path}")
# 保存所有RDF曲线数据为TXT格式
for group_name, group_results in system_data["rdf_results"].items():
# 为每个分组创建子目录
group_dir = os.path.join(system_dir, group_name)
os.makedirs(group_dir, exist_ok=True)
for label, (r, rdf, color) in group_results.items():
# 确保有有效数据
if len(r) > 0 and len(rdf) > 0:
# 创建安全的文件名
safe_label = label.replace(" ", "_").replace("/", "_").replace("=", "_")
safe_label = safe_label.replace("(", "").replace(")", "").replace("$", "")
filename = f"RDF_{system_name}_{group_name}_{safe_label}.txt"
filepath = os.path.join(group_dir, filename)
# 写入TXT文件 - 便于复制粘贴的格式
with open(filepath, 'w') as f:
# 写入标题和描述
f.write(f"Radial Distribution Function (RDF) Data\n")
f.write(f"System: {system_name}\n")
f.write(f"Group: {group_name}\n")
f.write(f"Interaction: {label}\n")
f.write(f"Distance (A) g(r)\n")
f.write("=" * 40 + "\n")
# 写入数据,每行一个数据点
for i in range(len(r)):
f.write(f"{r[i]:10.6f} {rdf[i]:10.6f}\n")
print(f"Saved RDF data: {filename}")
else:
print(f"No valid RDF data for {label} in {system_name} - {group_name}")
print("\nAll RDF data saved successfully in TXT format!")
以上是湿法磷酸体系,磷酸 水 氟以及可能存在的水合氢离子之间的不同氢氧来源的RDF计算,结果讨论发现似乎该代码并没有实现逐帧重新识别原子类型,只是在第一帧识别的基础上进行后续的RDF计算,所以在这里我需要你修改这部分内容,保证该代码能够实现逐帧重新识别原子类型,给出包含质子转移变化过程的RDF计算。其次,结果讨论的时候还发现第一波峰周围出现不合理的负值,所以该代码可能还 存在其他的问题需要修正。最后这图是需要符合The Journal of Chemical Physics的论文图表要求,同时修改txt为origin可读可修改的文件形式,方便后续修改。以上代码修改完后可能会增加很多计算量,所以将上诉代码改为可在anaconda promt中可执行的py文件,代码文件内容中仍旧包含四个体系,在promt的控制台只需要输入执行代码文件以及--workers 58,如果有需要可以进行分块或并行来加速该运算效率。
最新发布