集成学习算法_otto案例:商品分类

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import log_loss

数据获取

data = pd.read_csv("./data/otto/train.csv")
data.head()
idfeat_1feat_2feat_3feat_4feat_5feat_6feat_7feat_8feat_9...feat_85feat_86feat_87feat_88feat_89feat_90feat_91feat_92feat_93target
01100000000...100000000Class_1
12000000010...000000000Class_1
23000000010...000000000Class_1
34100161500...012000000Class_1
45000000000...100001000Class_1

5 rows × 95 columns

  • id - 产品id
  • feat_1, feat_2, …, feat_93 - 产品的各个特征
  • target - 产品被划分的类别
data.shape

(61878, 95)

data.describe()
idfeat_1feat_2feat_3feat_4feat_5feat_6feat_7feat_8feat_9...feat_84feat_85feat_86feat_87feat_88feat_89feat_90feat_91feat_92feat_93
count61878.00000061878.0000061878.00000061878.00000061878.00000061878.00000061878.00000061878.00000061878.00000061878.000000...61878.00000061878.00000061878.00000061878.00000061878.00000061878.00000061878.00000061878.00000061878.00000061878.000000
mean30939.5000000.386680.2630660.9014670.7790810.0710430.0256960.1937040.6624331.011296...0.0707520.5323061.1285760.3935490.8749150.4577720.8124210.2649410.3801190.126135
std17862.7843151.525331.2520732.9348182.7880050.4389020.2153331.0301022.2557703.474822...1.1514601.9004382.6815541.5754552.1154661.5273854.5978042.0456460.9823851.201720
min1.0000000.000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%15470.2500000.000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
50%30939.5000000.000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
75%46408.7500000.000000.0000000.0000000.0000000.0000000.0000000.0000001.0000000.000000...0.0000000.0000001.0000000.0000001.0000000.0000000.0000000.0000000.0000000.000000
max61878.00000061.0000051.00000064.00000070.00000019.00000010.00000038.00000076.00000043.000000...76.00000055.00000065.00000067.00000030.00000061.000000130.00000052.00000019.00000087.000000

8 rows × 94 columns

# 图形可视化,查看数据分布
import seaborn as sns

sns.countplot(data.target)

plt.show()

在这里插入图片描述

由上图可以看出,该数据类别不均衡,所以需要后期处理

数据基本处理

数据已经经过脱敏,不再需要特殊处理

  • 数据的脱敏: 对敏感数据比如 手机号、银行卡号 等信息,进行转换或者修改的一种技术手段,防止敏感数据直接在不可靠的环境下使用。

截取部分数据

new1_data = data[:10000]
new1_data.shape

(10000, 95)

# 图形可视化,查看数据分布
import seaborn as sns

sns.countplot(new1_data.target)

plt.show()

在这里插入图片描述

使用上面方式获取数据不可行,然后使用随机欠采样获取响应的数据

# 随机欠采样获取数据
# 首先需要确定特征值\标签值

y = data["target"]
x = data.drop(["id", "target"], axis=1)
x.head()
feat_1feat_2feat_3feat_4feat_5feat_6feat_7feat_8feat_9feat_10...feat_84feat_85feat_86feat_87feat_88feat_89feat_90feat_91feat_92feat_93
01000000000...0100000000
10000000100...0000000000
20000000100...0000000000
31001615001...22012000000
40000000000...0100001000

5 rows × 93 columns

y.head()
0    Class_1
1    Class_1
2    Class_1
3    Class_1
4    Class_1
Name: target, dtype: object
# 欠采样获取数据
from imblearn.under_sampling import RandomUnderSampler

rus = RandomUnderSampler(random_state=0)

X_resampled, y_resampled = rus.fit_resample(x, y)
x.shape, y.shape

((61878, 93), (61878,))

X_resampled.shape, y_resampled.shape

((17361, 93), (17361,))

# 图形可视化,查看数据分布
import seaborn as sns

sns.countplot(y_resampled)

plt.show()

在这里插入图片描述

把标签值转换为数字

y_resampled.head()
0    Class_1
1    Class_1
2    Class_1
3    Class_1
4    Class_1
Name: target, dtype: object
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_resampled = le.fit_transform(y_resampled)

y_resampled

array([0, 0, 0, …, 8, 8, 8])

分割数据

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2)
x_train.shape, y_train.shape

((13888, 93), (13888,))

x_test.shape, y_test.shape

((3473, 93), (3473,))

模型训练

基本模型训练

from sklearn.ensemble import RandomForestClassifier # 随机森林


rf = RandomForestClassifier(oob_score=True) # 外包估计
rf.fit(x_train, y_train)

RandomForestClassifier(oob_score=True)

y_pre = rf.predict(x_test)
y_pre

array([3, 7, 8, …, 3, 2, 5])

rf.score(x_test, y_test)

0.7854880506766484

rf.oob_score_

0.7633208525345622

# 图形可视化,查看数据分布
import seaborn as sns

sns.countplot(y_pre)

plt.show()

在这里插入图片描述

# logloss模型评估
# from sklearn.metrics import log_loss

# log_loss(y_test, y_pre, eps=1e-15, normalize=True)

# y_test, y_pre

上面报错原因:logloss使用过程中,必须要求将输出用one-hot表示,

需要将这个多类别问题的输出结果通过OneHotEncoder修改为如下:

from sklearn.preprocessing import OneHotEncoder

one_hot = OneHotEncoder(sparse=False)

y_test1 = one_hot.fit_transform(y_test.reshape(-1, 1))
y_pre1 = one_hot.fit_transform(y_pre.reshape(-1, 1))

y_test1
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       ...,
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])
y_pre1
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])
# logloss模型评估

log_loss(y_test1, y_pre1, eps=1e-15, normalize=True)

7.40898025171566

# 改变预测值的输出模式,让输出结果为百分占比,降低logloss值
y_pre_proba = rf.predict_proba(x_test)
y_pre_proba
array([[0.01, 0.16, 0.16, ..., 0.06, 0.  , 0.  ],
       [0.14, 0.  , 0.  , ..., 0.02, 0.81, 0.02],
       [0.1 , 0.01, 0.03, ..., 0.1 , 0.03, 0.5 ],
       ...,
       [0.02, 0.23, 0.3 , ..., 0.09, 0.  , 0.05],
       [0.  , 0.27, 0.48, ..., 0.01, 0.01, 0.08],
       [0.02, 0.01, 0.  , ..., 0.02, 0.02, 0.  ]])
rf.oob_score_

0.7633208525345622

# logloss模型评估

log_loss(y_test1, y_pre_proba, eps=1e-15, normalize=True)

0.7800586622785408

模型调优

n_estimators, max_feature, max_depth, min_samples_leaf

评分标准

具体公式:
在这里插入图片描述

上公式中,

  • i 表示样本,j 表示类别。Pij 代表第 i 个样本属于类别 j 的概率,
  • 如果第 i 个样本真的属于类别 j ,则 yij 等于1,否则为0。
  • 根据上公式,假如你将所有的测试样本都正确分类,所有pij都是1,那每个log(pij)都是0,最终的logloss也是0。
  • 假如第1个样本本来是属于1类别的,但是你给它的类别概率pij=0.1,那logloss就会累加上log(0.1)这⼀项。我们知 道这⼀项是负数,⽽且pij越⼩,负得越多,如果pij=0,将是⽆穷。这会导致这种情况:你分错了⼀个,logloss就是 ⽆穷。这当然不合理,为了避免这⼀情况,我们对⾮常⼩的值做如下处理:
    在这里插入图片描述

也就是说最⼩不会⼩于10^-15。

确定最优的n_estimators

# 确定n_estimators的取值范围
tuned_parameters = range(10, 200, 10)

# 创建添加accuracy的一个numpy
accuracy_t = np.zeros(len(tuned_parameters))

# 创建添加error的一个numpy
error_t = np.zeros(len(tuned_parameters))

# 调优过程实现
for j, one_parameter in enumerate(tuned_parameters):
    rf2 = RandomForestClassifier(n_estimators=one_parameter, 
                                 max_depth=10, 
                                 max_features=10, 
                                 min_samples_leaf=10, 
                                 oob_score=True, 
                                 random_state=0, 
                                 n_jobs=-1)
    
    rf2.fit(x_train, y_train)
    
    # 输出accuracy
    accuracy_t[j] = rf2.oob_score_
    
    # 输出log_loss
    y_pre = rf2.predict_proba(x_test)
    error_t[j] = log_loss(y_test, y_pre, eps=1e-15, normalize=True)
    
    print(error_t)

[1.11969649 0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.        ]
[1.11969649 1.11497311 0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.        ]
[1.11969649 1.11497311 1.11608578 0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.        ]
[1.11969649 1.11497311 1.11608578 1.11172577 0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.        ]
[1.11969649 1.11497311 1.11608578 1.11172577 1.11057536 0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.        ]
[1.11969649 1.11497311 1.11608578 1.11172577 1.11057536 1.11115966
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.        ]
[1.11969649 1.11497311 1.11608578 1.11172577 1.11057536 1.11115966
 1.11000822 0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.        ]
[1.11969649 1.11497311 1.11608578 1.11172577 1.11057536 1.11115966
 1.11000822 1.11048257 0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.        ]
[1.11969649 1.11497311 1.11608578 1.11172577 1.11057536 1.11115966
 1.11000822 1.11048257 1.11173118 0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.        ]
[1.11969649 1.11497311 1.11608578 1.11172577 1.11057536 1.11115966
 1.11000822 1.11048257 1.11173118 1.10990752 0.         0.
 0.         0.         0.         0.         0.         0.
 0.        ]
[1.11969649 1.11497311 1.11608578 1.11172577 1.11057536 1.11115966
 1.11000822 1.11048257 1.11173118 1.10990752 1.10799403 0.
 0.         0.         0.         0.         0.         0.
 0.        ]
[1.11969649 1.11497311 1.11608578 1.11172577 1.11057536 1.11115966
 1.11000822 1.11048257 1.11173118 1.10990752 1.10799403 1.10837138
 0.         0.         0.         0.         0.         0.
 0.        ]
[1.11969649 1.11497311 1.11608578 1.11172577 1.11057536 1.11115966
 1.11000822 1.11048257 1.11173118 1.10990752 1.10799403 1.10837138
 1.10756294 0.         0.         0.         0.         0.
 0.        ]
[1.11969649 1.11497311 1.11608578 1.11172577 1.11057536 1.11115966
 1.11000822 1.11048257 1.11173118 1.10990752 1.10799403 1.10837138
 1.10756294 1.10880934 0.         0.         0.         0.
 0.        ]
[1.11969649 1.11497311 1.11608578 1.11172577 1.11057536 1.11115966
 1.11000822 1.11048257 1.11173118 1.10990752 1.10799403 1.10837138
 1.10756294 1.10880934 1.10678094 0.         0.         0.
 0.        ]
[1.11969649 1.11497311 1.11608578 1.11172577 1.11057536 1.11115966
 1.11000822 1.11048257 1.11173118 1.10990752 1.10799403 1.10837138
 1.10756294 1.10880934 1.10678094 1.1060839  0.         0.
 0.        ]
[1.11969649 1.11497311 1.11608578 1.11172577 1.11057536 1.11115966
 1.11000822 1.11048257 1.11173118 1.10990752 1.10799403 1.10837138
 1.10756294 1.10880934 1.10678094 1.1060839  1.10634137 0.
 0.        ]
[1.11969649 1.11497311 1.11608578 1.11172577 1.11057536 1.11115966
 1.11000822 1.11048257 1.11173118 1.10990752 1.10799403 1.10837138
 1.10756294 1.10880934 1.10678094 1.1060839  1.10634137 1.10661559
 0.        ]
[1.11969649 1.11497311 1.11608578 1.11172577 1.11057536 1.11115966
 1.11000822 1.11048257 1.11173118 1.10990752 1.10799403 1.10837138
 1.10756294 1.10880934 1.10678094 1.1060839  1.10634137 1.10661559
 1.1074386 ]
# 优化结果过程可视化
fig,axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 4), dpi=100)

axes[0].plot(tuned_parameters, error_t)
axes[1].plot(tuned_parameters, accuracy_t)

axes[0].set_xlabel("n_estimators")
axes[0].set_ylabel("error_t")
axes[1].set_xlabel("n_estimators")
axes[1].set_ylabel("accuracy_t")

axes[0].grid(True)
axes[1].grid(True)


plt.show()

在这里插入图片描述

经过图像展示,最后确定n_estimators=175的时候,表现效果不错

确定最优的max_features

# 确定n_estimators的取值范围
tuned_parameters = range(5, 40, 5)

# 创建添加accuracy的一个numpy
accuracy_t = np.zeros(len(tuned_parameters))

# 创建添加error的一个numpy
error_t = np.zeros(len(tuned_parameters))

# 调优过程实现
for j, one_parameter in enumerate(tuned_parameters):
    rf2 = RandomForestClassifier(n_estimators=175, 
                                 max_depth=10, 
                                 max_features=one_parameter, 
                                 min_samples_leaf=10, 
                                 oob_score=True, 
                                 random_state=0, 
                                 n_jobs=-1)
    
    rf2.fit(x_train, y_train)
    
    # 输出accuracy
    accuracy_t[j] = rf2.oob_score_
    
    # 输出log_loss
    y_pre = rf2.predict_proba(x_test)
    error_t[j] = log_loss(y_test, y_pre, eps=1e-15, normalize=True)
    
    print(error_t)

[1.20308122 0.         0.         0.         0.         0.
 0.        ]
[1.20308122 1.1069064  0.         0.         0.         0.
 0.        ]
[1.20308122 1.1069064  1.07297852 0.         0.         0.
 0.        ]
[1.20308122 1.1069064  1.07297852 1.06020463 0.         0.
 0.        ]
[1.20308122 1.1069064  1.07297852 1.06020463 1.05062457 0.
 0.        ]
[1.20308122 1.1069064  1.07297852 1.06020463 1.05062457 1.05232421
 0.        ]
[1.20308122 1.1069064  1.07297852 1.06020463 1.05062457 1.05232421
 1.05392152]
# 优化结果过程可视化
fig,axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 4), dpi=100)

axes[0].plot(tuned_parameters, error_t)
axes[1].plot(tuned_parameters, accuracy_t)

axes[0].set_xlabel("max_features")
axes[0].set_ylabel("error_t")
axes[1].set_xlabel("max_features")
axes[1].set_ylabel("accuracy_t")

axes[0].grid(True)
axes[1].grid(True)


plt.show()

在这里插入图片描述

经过图像展示,最后确定max_features=15的时候,表现效果不错

确定最优的max_depth

# 确定n_estimators的取值范围
tuned_parameters = range(10, 100, 10)

# 创建添加accuracy的一个numpy
accuracy_t = np.zeros(len(tuned_parameters))

# 创建添加error的一个numpy
error_t = np.zeros(len(tuned_parameters))

# 调优过程实现
for j, one_parameter in enumerate(tuned_parameters):
    rf2 = RandomForestClassifier(n_estimators=175, 
                                 max_depth=one_parameter, 
                                 max_features=15, 
                                 min_samples_leaf=10, 
                                 oob_score=True, 
                                 random_state=0, 
                                 n_jobs=-1)
    
    rf2.fit(x_train, y_train)
    
    # 输出accuracy
    accuracy_t[j] = rf2.oob_score_
    
    # 输出log_loss
    y_pre = rf2.predict_proba(x_test)
    error_t[j] = log_loss(y_test, y_pre, eps=1e-15, normalize=True)
    
    print(error_t)

[1.07297852 0.         0.         0.         0.         0.
 0.         0.         0.        ]
[1.07297852 0.84671097 0.         0.         0.         0.
 0.         0.         0.        ]
[1.07297852 0.84671097 0.82480963 0.         0.         0.
 0.         0.         0.        ]
[1.07297852 0.84671097 0.82480963 0.8257728  0.         0.
 0.         0.         0.        ]
[1.07297852 0.84671097 0.82480963 0.8257728  0.8256875  0.
 0.         0.         0.        ]
[1.07297852 0.84671097 0.82480963 0.8257728  0.8256875  0.8256875
 0.         0.         0.        ]
[1.07297852 0.84671097 0.82480963 0.8257728  0.8256875  0.8256875
 0.8256875  0.         0.        ]
[1.07297852 0.84671097 0.82480963 0.8257728  0.8256875  0.8256875
 0.8256875  0.8256875  0.        ]
[1.07297852 0.84671097 0.82480963 0.8257728  0.8256875  0.8256875
 0.8256875  0.8256875  0.8256875 ]
# 优化结果过程可视化
fig,axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 4), dpi=100)

axes[0].plot(tuned_parameters, error_t)
axes[1].plot(tuned_parameters, accuracy_t)

axes[0].set_xlabel("max_depth")
axes[0].set_ylabel("error_t")
axes[1].set_xlabel("max_depth")
axes[1].set_ylabel("accuracy_t")

axes[0].grid(True)
axes[1].grid(True)


plt.show()

在这里插入图片描述

经过图像展示,最后确定max_depth=30的时候,表现效果不错

确定最优的min_sample_leaf

# 确定n_estimators的取值范围
tuned_parameters = range(1, 10, 2)

# 创建添加accuracy的一个numpy
accuracy_t = np.zeros(len(tuned_parameters))

# 创建添加error的一个numpy
error_t = np.zeros(len(tuned_parameters))

# 调优过程实现
for j, one_parameter in enumerate(tuned_parameters):
    rf2 = RandomForestClassifier(n_estimators=175, 
                                 max_depth=30, 
                                 max_features=15, 
                                 min_samples_leaf=one_parameter, 
                                 oob_score=True, 
                                 random_state=0, 
                                 n_jobs=-1)
    
    rf2.fit(x_train, y_train)
    
    # 输出accuracy
    accuracy_t[j] = rf2.oob_score_
    
    # 输出log_loss
    y_pre = rf2.predict_proba(x_test)
    error_t[j] = log_loss(y_test, y_pre, eps=1e-15, normalize=True)
    
    print(error_t)

[0.70599298 0.         0.         0.         0.        ]
[0.70599298 0.74117382 0.         0.         0.        ]
[0.70599298 0.74117382 0.77111756 0.         0.        ]
[0.70599298 0.74117382 0.77111756 0.79627966 0.        ]
[0.70599298 0.74117382 0.77111756 0.79627966 0.81900835]
# 优化结果过程可视化
fig,axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 4), dpi=100)

axes[0].plot(tuned_parameters, error_t)
axes[1].plot(tuned_parameters, accuracy_t)

axes[0].set_xlabel("min_sample_leaf")
axes[0].set_ylabel("error_t")
axes[1].set_xlabel("min_sample_leaf")
axes[1].set_ylabel("accuracy_t")

axes[0].grid(True)
axes[1].grid(True)


plt.show()

在这里插入图片描述

经过图像展示,最后确定min_sample_leaf=1的时候,表现效果不错

确定最优模型

n_estimators=175,

max_depth=30,

max_features=15,

min_samples_leaf=1,

rf3 = RandomForestClassifier(n_estimators=175, max_depth=30, max_features=15, min_samples_leaf=1, 
                             oob_score=True, random_state=40, n_jobs=-1)

rf3.fit(x_train, y_train)
RandomForestClassifier(max_depth=30, max_features=15, n_estimators=175,
                       n_jobs=-1, oob_score=True, random_state=40)
rf3.score(x_test, y_test)

0.7782896631154621

rf3.oob_score_

0.7710973502304147

y_pre_proba1 = rf3.predict_proba(x_test)

log_loss(y_test, y_pre_proba1)

0.7013205968285336

生成提交数据

test_data = pd.read_csv("./data/otto/test.csv")
test_data.head()
idfeat_1feat_2feat_3feat_4feat_5feat_6feat_7feat_8feat_9...feat_84feat_85feat_86feat_87feat_88feat_89feat_90feat_91feat_92feat_93
01000000000...001112000000
1222141600000...0000040020
230112100000...0000200001
34000100000...0310000000
45100100120...0000000900

5 rows × 94 columns

test_data_drop_id = test_data.drop(["id"], axis=1)
test_data_drop_id.head()
feat_1feat_2feat_3feat_4feat_5feat_6feat_7feat_8feat_9feat_10...feat_84feat_85feat_86feat_87feat_88feat_89feat_90feat_91feat_92feat_93
00000000003...001112000000
1221416000000...0000040020
201121000000...0000200001
30001000000...0310000000
41001001203...0000000900

5 rows × 93 columns

y_pre_test = rf3.predict_proba(test_data_drop_id)
y_pre_test
array([[1.71428571e-02, 5.32533417e-02, 8.00000000e-02, ...,
        5.14285714e-02, 0.00000000e+00, 1.14285714e-02],
       [1.08571429e-01, 7.42857143e-02, 1.14285714e-02, ...,
        2.28571429e-02, 2.45714286e-01, 1.71428571e-02],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        1.14285714e-02, 5.71428571e-03, 0.00000000e+00],
       ...,
       [1.14285714e-02, 3.23500847e-01, 3.48548842e-01, ...,
        4.57142857e-02, 5.71428571e-03, 0.00000000e+00],
       [1.14560440e-02, 2.08943705e-01, 1.44017533e-01, ...,
        1.17582418e-02, 2.74725275e-05, 1.71703297e-02],
       [9.10536850e-03, 1.95349907e-01, 2.94229924e-01, ...,
        2.11763951e-01, 3.03030303e-04, 8.00014117e-03]])
result_data = pd.DataFrame(y_pre_test, columns=["Class_"+str(i) for i in range(1, 10)])
result_data.head()
Class_1Class_2Class_3Class_4Class_5Class_6Class_7Class_8Class_9
00.0171430.0532530.0800000.7810320.0000000.0057140.0514290.0000000.011429
10.1085710.0742860.0114290.0857140.0171430.4171430.0228570.2457140.017143
20.0000000.0000000.0000000.0000000.0000000.9828570.0114290.0057140.000000
30.0457140.3390480.2800000.1638100.0000000.0057140.0171430.0171430.131429
40.2238100.0000000.0000000.0000000.0000000.0114290.0285710.2533330.482857
result_data.insert(loc=0, column="id", value=test_data.id)
result_data.head()
idClass_1Class_2Class_3Class_4Class_5Class_6Class_7Class_8Class_9
010.0171430.0532530.0800000.7810320.0000000.0057140.0514290.0000000.011429
120.1085710.0742860.0114290.0857140.0171430.4171430.0228570.2457140.017143
230.0000000.0000000.0000000.0000000.0000000.9828570.0114290.0057140.000000
340.0457140.3390480.2800000.1638100.0000000.0057140.0171430.0171430.131429
450.2238100.0000000.0000000.0000000.0000000.0114290.0285710.2533330.482857
result_data.to_csv("./data/otto/submission.csv", index=False)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

¥骁勇善战¥

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值