xgboost 实现鸢尾花红酒数据集的多分类问题

np.split()的用法

在这里插入图片描述

np.split(m , (3,) ,axis = 1)

鸢尾花数据集概览:

逗号隔开
在这里插入图片描述


import xgboost as xgb
import numpy as np
from sklearn.model_selection import train_test_split   # cross_validation


def iris_type(s):
    it = {b'Iris-setosa': 0,
          b'Iris-versicolor': 1,
          b'Iris-virginica': 2}
    return it[s]


if __name__ == "__main__":
    path = 'C:/Users/lb/Desktop/test/iris.data'  # 数据文件路径
    data = np.loadtxt(path, dtype=float, delimiter=',', converters={4: iris_type})
    #
    x, y = np.split(data, (4,), axis=1)
    
    x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1, test_size=50)

    data_train = xgb.DMatrix(x_train, label=y_train)
    data_test = xgb.DMatrix(x_test, label=y_test)
    #
    watch_list = [(data_test, 'eval'), (data_train, 'train')]
    param = {'max_depth': 2, 'eta': 0.3, 'silent': 1, 'objective': 'multi:softmax', 'num_class': 3}
    #                                  
    bst = xgb.train(param, data_train, num_boost_round=6, evals=watch_list)
    y_hat = bst.predict(data_test)
    result = y_test.reshape(1, -1) == y_hat 
    print('正确率:\t', float(np.sum(result)) / len(y_hat))
    print('END.....\n')

参数那里的3 是指有三个分类:因为数据表那里有三个类别
在这里插入图片描述

在这里插入图片描述
在这里插入图片描述

红酒数据概览:

在这里插入图片描述

逻辑回归和XGB对比

# /usr/bin/python
# -*- encoding:utf-8 -*-

import xgboost as xgb
import numpy as np
from sklearn.model_selection import train_test_split   # cross_validation
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings("ignore")

def show_accuracy(a, b, tip):
    acc = a.ravel() == b.ravel()
    print(acc)
    print(tip + '正确率:\t', float(acc.sum()) / a.size)


if __name__ == "__main__":
    data = np.loadtxt('./data/wine.data', dtype=float, delimiter=',')
    # (1,):第一列是标记 ,剩下的且不包含这一列为特征/13列
    y, x = np.split(data, (1,), axis=1) # 以列为划分

    # x = StandardScaler().fit_transform(x)
    x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1, test_size=0.5)

    # Logistic回归
    lr = LogisticRegression(penalty='l2') # LR正则
    lr.fit(x_train, y_train.ravel())
    y_hat = lr.predict(x_test)
    show_accuracy(y_hat, y_test, 'Logistic回归 ')

    # XGBoost
    y_train[y_train == 3] = 0 # 第3个类别标记为0
    y_test[y_test == 3] = 0
    data_train = xgb.DMatrix(x_train, label=y_train)
    data_test = xgb.DMatrix(x_test, label=y_test)
    watch_list = [(data_test, 'eval'), (data_train, 'train')]
    #                                                              多分类
    param = {'max_depth': 3, 'eta': 1, 'silent': 0, 'objective': 'multi:softmax', 'num_class': 3}
    bst = xgb.train(param, data_train, num_boost_round=4, evals=watch_list)
    y_hat = bst.predict(data_test)
    show_accuracy(y_hat, y_test, 'XGBoost ')

在这里插入图片描述
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值