1、新增一列
trainset1['fold'] = 1
2、导入导出CSV
trainset1=pd.read_csv("E:/anaconda3/DATA/trainset1.csv")
trainset.to_csv('C:/Users/Lenovo/Desktop/trainset.csv',index=False)
3、数据框合并(默认axis=0)
validationset=pd.concat([validationset1,validationset2,validationset3,validationset4,validationset5])
4、找到最大值的索引
#fitness是列表
bestFitnessId = np.where(fitness == np.max(fitness))
bestFitnessId = bestFitnessId[0][0]#选第一个数组的第一个元素
bestFitnessIndex = np.where(s1 == np.max(s1))[0][0]
5、.loc提取数据框某一行或某一列或特定位置元素
s1 = History.loc[:,'fitness']
History.loc[bestFitnessIndex,:][0]
6、%d
trainset=pd.read_csv("E:/anaconda3/DATA/trainset%d.csv" %a)
7、两个列表合并数据框
#array转化为list
a=list()
b=list()
c={"a的变量名" : a,
"b的变量名" : b}
result=pd.DataFrame(c)
8、列表ifelse条件赋值
threshold[threshold>0.5]=1
threshold[threshold<=0.5]=0
9.筛选违约样本
trainset1[trainset1['PD']==1]
trainset1[trainset1['PD']==0]