import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['SimHei']
path=r"C:\Users\k\Desktop\2011score.xlsx"
df=pd.read_excel(path)
cet_4=df[df['kcm']=='大学英语四级考试(CET-4)'][['xm','xh','kccj']]
english_1=df[df['kcm']=='大学英语Ⅰ'][['xm','xh','kccj']].drop_duplicates('xh')
english_2=df[df['kcm']=='大学英语Ⅱ'][['xm','xh','kccj']].drop_duplicates('xh')
english_3=df[df['kcm']=='大学英语Ⅲ'][['xm','xh','kccj']].drop_duplicates('xh')
english_4=df[df['kcm']=='大学英语Ⅳ'][['xm','xh','kccj']].drop_duplicates('xh')
result_1=pd.DataFrame(data=english_1.values,columns=['姓名','学号','英语I'])
result_2=pd.DataFrame(data=english_2.values,columns=['姓名','学号','英语II'])
result_3=pd.DataFrame(data=english_3.values,columns=['姓名','学号','英语III'])
result_4=pd.DataFrame(data=english_4.values,columns=['姓名','学号','英语IV'])
cet=pd.DataFrame(data=cet_4.values,columns=['姓名','学号','四级成绩'])
merge_1=pd.merge(result_1,result_2,on=['学号','姓名'])
merge_2=pd.merge(merge_1,result_3,on=['学号','姓名'])
merge_3=pd.merge(merge_2,result_4,on=['学号','姓名'])
merge_4=pd.merge(merge_3,cet,on=['学号','姓名'])
#英语1-4的平均成绩与英语四级成绩
ave=merge_4[['英语I','英语II','英语III','英语IV']].values.mean(axis=1)
data1=merge_4[['学号','姓名','四级成绩']].values
final=pd.DataFrame(data=data1,columns=['学号','姓名','四级成绩'])
final['平均成绩']=ave
plt.plot(final['四级成绩'],final['平均成绩'])
plt.xlabel('四级成绩')
plt.ylabel('平均成绩')
pandas merge()函数 四级与英语成绩的相关性分析
最新推荐文章于 2022-10-29 19:13:38 发布