训练集读入:
user_info_train = pd.read_csv("./train/user_info_train.txt",header=None,
names=['用户id','性别','职业','教育程度','婚姻状态', '户口类型'])
bank_detail_train = pd.read_csv("./train/bank_detail_train.txt",header=None,
names=['用户id','流水时间戳','交易类型','交易金额','工资收入标记'])
browse_history_train = pd.read_csv("./train/browse_history_train.txt",header=None,
names=['用户id','浏览时间戳','浏览行为数据','浏览子行为编号'])
bill_detail_train = pd.read_csv("./train/bill_detail_train.txt",header=None,
names=['用户id','账单时间戳','银行id','上期账单金额','上期还款金额','信用卡额度',
'本期账单余额','本期账单最低还款额','消费笔数','本期账单金额','调整金额',
'循环利息','可用余额','预借现金额度','还款状态'])
loan_time_train = pd.read_csv("./train/loan_time_train.txt",header=None,
names=['用户id','放款时间'])
overdue_train = pd.read_csv("./train/overdue_train.txt",header=None,
names=['用户id','样本标签'])
将loan_time_train合并到bank_detail_train、browse_history_train和bill_detail_train中(时间已知):
bank_detail_train_merge = pd.merge(bank_detail_train, loan_time_train, on = "用户id")
bank_detail_train_yizhi = bank_detail_train_merge[(bank_detail_train_merge['流水时间戳']>0)]
browse_history_train_merge = pd.merge(browse_history_train, loan_time_train, on = "用户id")
browse_history_train_yizhi = browse_history_train_merge[(browse_history_train_merge['浏览时间戳']>0)]
bill_detail_train_merge = pd.merge(bill_detail_train, loan_time_train, on = "用户id")
bill_detail_train_yizhi = bill_detail_train_merge[(bill_detail_train_merge['账单时间戳']>0)]