使用Materials Project的API接口下载材料数据集(索引formula)

得益于毕设需要找一些热电的数据集,因此我是有结构formula的csv文件,但是icsd号对不上,因此希望通过formula+spacegroup+natoms+volume+band_gap确定想要的结构。
代码如下:
 

import json
import os
from tqdm import tqdm
import pandas as pd
#https://next-gen.materialsproject.org/api登录访问新版32位密钥
API_KEY = "xxx32位,而不是16位"  # 注册账号时给的KEY,具体位置是官网右上角my DashBoard
mpr = MPRester(api_key=API_KEY)  # 创建MPRester对象进行认证

def download_te_data(root_path, csv_name, ltc_dataset):
    with open(ltc_dataset, "r") as f:
        ltc_data = json.load(f)
    df = pd.read_csv(os.path.join(root_path,csv_name))
    formula_list = []
    error_list = []
    with open("TEDesignLab_dataset.csv","w") as f:
        f.write("poscar_name,tc\n")
        for id, tgt_crys in tqdm(df.iterrows()):
            formula = tgt_crys["compound"]
            if formula not in ltc_data:
                id = "icsd-" + str(tgt_crys["icsd"])
                sg = tgt_crys["sg"]
                natoms = tgt_crys["natoms"]
                eg =tgt_crys["Eg (eV)"]
                vol = tgt_crys["volume"]
                docs = mpr.summary.search(formula=formula)  # 设定搜索条件
                for crys in docs:
                    idx_sg = crys.symmetry.number
                    nsites = crys.nsites
                    try:
                        icsd_id = crys.database_IDs["icsd"]
                        if id in icsd_id and sg == idx_sg and natoms == nsites:#根据icsd_id、空间群、原子数确定一个结构是否是想要的结构
                            stru = crys.structure
                            poscar = Poscar(stru)
                            if formula not in formula_list:
                                poscar.write_file(os.path.join(root_path, formula + "-POSCAR"))
                                crys_name = formula + "-POSCAR"
                                formula_list.append(formula)
                            else:
                                poscar.write_file(os.path.join(root_path, formula + "-" + str(sg) + "-POSCAR"))
                                crys_name = formula + "-" + str(sg) + "-POSCAR"
                            f.write(f"{crys_name},{K}\n")
                            break
                    except:
                        error_list.append(formula)
                        band_gap = crys.band_gap
                        diff_Eg = band_gap - eg
                        diff_vol =crys.volume - vol
                        if sg == idx_sg and natoms == nsites and abs(diff_Eg)<=1 and abs(diff_vol)<=10:#根据icsd_id、空间群、原子数确定一个结构是否是想要的结构
                            stru = crys.structure
                            poscar = Poscar(stru)
                            if formula not in formula_list:
                                poscar.write_file(os.path.join(root_path, formula + "-POSCAR"))
                                crys_name = formula + "-POSCAR"
                                formula_list.append(formula)
                            else:
                                poscar.write_file(os.path.join(root_path, formula + "-" + str(sg) + "-POSCAR"))
                                crys_name = formula + "-" + str(sg) + "-POSCAR"
                            f.write(f"{crys_name},{K}\n")
                            break
        print(error_list)

def convert_csv_to_json(root_path, csv_file1, csv_file2):
    crys_dict = {}
    df = pd.read_csv(os.path.join(root_path, csv_file1))
    df2 = pd.read_csv(os.path.join(root_path, csv_file2))
    for id, row in tqdm(df.iterrows()):
        crys_dict[row["poscar_name"]] = row["tc"]
    for id, row in df2.iterrows():
        crys_dict[row["poscar_name"]] = row["tc"]

    json_data = json.dumps(crys_dict)
    with open("total_dataset.json","w") as f:
        f.write(json_data)

    # for name in column_names:
    #     print(name)
    # for id, row in df.iterrows():
    #     el_name = row["Symbol"]

if __name__ =="__main__":
    root_path = "new_tc_dataset"
    csv_name = "example_dataset.csv"
    ltc_dataset = "total_dataset.json"
    #convert_csv_to_json(root_path, "datasets.csv", "TE-dataset.csv")
    #download_te_data(root_path, csv_name, ltc_dataset)

代码临时写的,用gpt稍微读读应该差不多,不写注释和优化代码逻辑了,能跑

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

成贤往事

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值