用了1个星期 终于算把 penalty 在python里实现了下面是代码
# encoding: utf-8
import pandas as pd
import numpy as np
from pandas import DataFrame as df
data=pd.read_csv('wdata.csv',header=None,names=['ID','Leg','A0','A1','A1a','A1b','A2a','A2b','A3','A4','A5','A6','A7','A8','A9','A10','A11','A12','A13','A14','A15','A16','A17_1a','A17_1b','A17_2a','A17_2b','A17_4a','A17_4b','A17_5a','A17_5b','A17_6','A20','A21','A22','A23','A24a','A25','A26','A27','A28','A29','A30','A31','A32','A33','A34','A35','A35b','A35c','A36','A37','A38','A39','A40','A41','A42','A43','A44','A45','A46'])
de_data=['A5','A7','A9','A11']
"""
def def_jar(de_data):
data_list=data.ix[:,de_data]
for var_i in de_data:
for var_id in range(0,data_list.shape[0]+1):
if data_list.ix[var_id:var_id,var_i] is 5:
data_list.ix[var_id:var_id,'new_%s'%var_i]='lev1'
elif data_list.ix[var_id:var_id,var_i] is 4:
data_list.ix[var_id:var_id,'new_%s'%var_i]='lev2'
else :
data_list.ix[var_id:var_id,'new_%s'%var_i]='lev3'
print(data_list.head())
"""
def def_var(de_data):
for i in de_data:
new_data=[] #初始化中转列表 初始化和清空必须要相同缩进
for each_line in data[i]:
if each_line ==3:
new_data.append('JAR')
elif each_line >3:
new_data.append('Too Much')
else:
new_data.append('Not Enough')
data['N_%s'%i]=new_data
new_data=new_data.clear #清空中转列表
#def_var(de_data)
def_varlist=[]
def def_var2(de_data):
for i in de_data:
data['N_%s'%i]=np.where(data[i]==3,'JAR',np.where(data[i]>3,'much','not'))
def_varlist.append('N_%s'%i)
def_var2(de_data)
print(data.head())
# table_mean2=pd.DataFrame(data.pivot_table('A10',columns='N_A5',aggfunc='mean'),columns='A10_A5')
de_data2=def_varlist
# 计算A10的三个组别的mean dataframe
def creat_meantable(inde_var,de_data2):
table_mean=pd.DataFrame()
for var in de_data2:
temp=pd.DataFrame(data.pivot_table(inde_var,columns=var,aggfunc='mean')).T
table_mean=table_mean.append(temp,ignore_index=True)
table_mean.index=de_data2
return table_mean
def creat_penc(inde_var,de_data2):
table_penc=pd.DataFrame()
for var in de_data2:
temp=pd.DataFrame(data[var].value_counts(normalize=True)).T
table_penc=table_penc.append(temp,ignore_index=True)
table_penc.index=de_data2
return table_penc
table_meanall=creat_meantable('A10',de_data2)
table_penall=creat_penc('A10',de_data2)
table_penall['m_drop']=table_meanall['much']-table_meanall['JAR']
table_penall['n_drop']=table_meanall['not']-table_meanall['JAR']
table_penall['m_penalty']=-table_penall['m_drop']*table_penall['much']
table_penall['n_penalty']=table_penall['n_drop']*table_penall['not']
print(table_penall[['m_penalty','n_penalty']])