得益于毕设需要找一些热电的数据集,因此我是有结构formula的csv文件,但是icsd号对不上,因此希望通过formula+spacegroup+natoms+volume+band_gap确定想要的结构。
代码如下:
import json
import os
from tqdm import tqdm
import pandas as pd
#https://next-gen.materialsproject.org/api登录访问新版32位密钥
API_KEY = "xxx32位,而不是16位" # 注册账号时给的KEY,具体位置是官网右上角my DashBoard
mpr = MPRester(api_key=API_KEY) # 创建MPRester对象进行认证
def download_te_data(root_path, csv_name, ltc_dataset):
with open(ltc_dataset, "r") as f:
ltc_data = json.load(f)
df = pd.read_csv(os.path.join(root_path,csv_name))
formula_list = []
error_list = []
with open("TEDesignLab_dataset.csv","w") as f:
f.write("poscar_name,tc\n")
for id, tgt_crys in tqdm(df.iterrows()):
formula = tgt_crys["compound"]
if formula not in ltc_data:
id = "icsd-" + str(tgt_crys["icsd"])
sg = tgt_crys["sg"]
natoms = tgt_crys["natoms"]
eg =tgt_crys["Eg (eV)"]
vol = tgt_crys["volume"]
docs = mpr.summary.search(formula=formula) # 设定搜索条件
for crys in docs:
idx_sg = crys.symmetry.number
nsites = crys.nsites
try:
icsd_id = crys.database_IDs["icsd"]
if id in icsd_id and sg == idx_sg and natoms == nsites:#根据icsd_id、空间群、原子数确定一个结构是否是想要的结构
stru = crys.structure
poscar = Poscar(stru)
if formula not in formula_list:
poscar.write_file(os.path.join(root_path, formula + "-POSCAR"))
crys_name = formula + "-POSCAR"
formula_list.append(formula)
else:
poscar.write_file(os.path.join(root_path, formula + "-" + str(sg) + "-POSCAR"))
crys_name = formula + "-" + str(sg) + "-POSCAR"
f.write(f"{crys_name},{K}\n")
break
except:
error_list.append(formula)
band_gap = crys.band_gap
diff_Eg = band_gap - eg
diff_vol =crys.volume - vol
if sg == idx_sg and natoms == nsites and abs(diff_Eg)<=1 and abs(diff_vol)<=10:#根据icsd_id、空间群、原子数确定一个结构是否是想要的结构
stru = crys.structure
poscar = Poscar(stru)
if formula not in formula_list:
poscar.write_file(os.path.join(root_path, formula + "-POSCAR"))
crys_name = formula + "-POSCAR"
formula_list.append(formula)
else:
poscar.write_file(os.path.join(root_path, formula + "-" + str(sg) + "-POSCAR"))
crys_name = formula + "-" + str(sg) + "-POSCAR"
f.write(f"{crys_name},{K}\n")
break
print(error_list)
def convert_csv_to_json(root_path, csv_file1, csv_file2):
crys_dict = {}
df = pd.read_csv(os.path.join(root_path, csv_file1))
df2 = pd.read_csv(os.path.join(root_path, csv_file2))
for id, row in tqdm(df.iterrows()):
crys_dict[row["poscar_name"]] = row["tc"]
for id, row in df2.iterrows():
crys_dict[row["poscar_name"]] = row["tc"]
json_data = json.dumps(crys_dict)
with open("total_dataset.json","w") as f:
f.write(json_data)
# for name in column_names:
# print(name)
# for id, row in df.iterrows():
# el_name = row["Symbol"]
if __name__ =="__main__":
root_path = "new_tc_dataset"
csv_name = "example_dataset.csv"
ltc_dataset = "total_dataset.json"
#convert_csv_to_json(root_path, "datasets.csv", "TE-dataset.csv")
#download_te_data(root_path, csv_name, ltc_dataset)
代码临时写的,用gpt稍微读读应该差不多,不写注释和优化代码逻辑了,能跑