import pandas as pd
import jieba
from nltk.translate.meteor_score import meteor_score
def preprocess_text(text):
# 使用 jieba 进行中文分词
words = jieba.lcut(text)
return words
def calculate_meteor(reference, candidate):
# 预处理文本
reference = preprocess_text(reference)
candidate = preprocess_text(candidate)
print("-------------------")
print(reference)
print("-------------------")
print(candidate)
print("-------------------")
# 计算METEOR
score = meteor_score([reference], candidate)
return score
def main():
# 读取xlsx文件
file_path = '路径'
df = pd.read_excel(file_path)
# 打印列名以进行调试
print("Columns in the dataframe:", df.columns)
# 假设数据有一列 'reference' 和五列 'candidate1', 'candidate2', 'candidate3', 'candidate4', 'candidate5'
references = df['参考答案']
candidate_columns = ['one', 'two', 'three', 'four', 'five']
# 计算每个reference与每个candidate的METEOR指标
for candidate in candidate_columns:
if candidate not in df.columns:
print(f"Column {candidate} not found in the dataframe.")
continue
meteor_scores = []
for ref, cand in zip(references, df[candidate]):
if pd.isna(ref) or pd.isna(cand):
meteor_scores.append(0)
continue
meteor_score_value = calculate_meteor(ref, cand)
meteor_scores.append(meteor_score_value)
# 将METEOR指标添加到DataFrame中
df[f'METEOR_{candidate}'] = meteor_scores
# 保存结果到新的xlsx文件
output_path = '路径'
df.to_excel(output_path, index=False)
print("Saved METEOR scores to:", output_path)
if __name__ == "__main__":
main()
【python】读取xlsx文件,并测METEOR指标
最新推荐文章于 2024-07-16 16:07:16 发布