import pandas as pd
import os
import re
#moveDocLocation 调档分位次 15\16\17的平均值
def avg_moveDocLocation(historydata,year=2018):
historydata=historydata.fillna(-1)
historydata["matricDiffYear"]=historydata["matricDiffYear"].astype('int64')
historydata=historydata.sort_values(by=["collegeCode","order","matricDiffYear"])
collegecode=historydata["collegeCode"]
unique_collegecode = collegecode.unique()
result=pd.DataFrame()
for code in unique_collegecode:
temp=historydata.loc[historydata.collegeCode==code,["collegeCode","collegeName","order","matricDiffYear","moveDocLocation","averageLocation","moveDocGrade"]]
name= temp.loc[0,"collegeName"]
tt_rst=pd.DataFrame()
for order in temp["order"].unique():
temp_order=temp.loc[temp.order==order,:]
temp_year1 = temp_order.loc[temp_order.matricDiffYear>=(year-3),:]
temp_year2=temp_year1.loc[temp_year1.matricDiffYear<=(year-1),:]
temp_year3=temp_year2.loc[temp_year2.moveDocLocation>0,:]
temp_year4=temp_year3.loc[temp_year3.averageLocation>0,:]
tmp_value=temp_order.loc[temp_order.matricDiffYear==year,"moveDocGrade"]
if len(tmp_value)==0:
tmp_moveDocGrade=0
else:
tmp_moveDocGrade=int(tmp_value)
mean_mov_avg=list(temp_year4[["moveDocLocation","averageLocation"]].mean())
temp_result = pd.DataFrame([code,name,order,mean_mov_avg[0],mean_mov_avg[1],tmp_moveDocGrade],
index = ["collegecode","collegeName","order","moveDocLocation","averageLocation","moveDocGrade"])
te