分类算法:LR/RF/GBDT/ADABOOST
python包:sklearn
# -*- coding: utf-8 -*-
"""
Created on Wed May 9 10:37:12 2018
比较不同分类算法效果
分类算法:LR/RF/GBDT/ADABOOST
@author: DELL
"""
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
import pandas as pd
import numpy as np
# 读入数据,数据格式为 user,feature,pv,label
data = pd.read_csv('E:\\creditCard\\rf_data\\train_500.csv') # 正样本数据
data1 = pd.read_csv('E:\\creditCard\\rf_data\\train_1w.csv') # 负样本数据
data = data.append(data1) # 合并
# 整理数据,将数据整理成 用户-特征 矩阵
grp=data.groupby(['user','feature'])
grp_1 = grp.agg(np.sum) # pv 累加
grp_2 = grp_1.reset_index() # 重置行索引
grp_3 = grp_2.pivot_table(index='user',columns='