心跳信号分类预测_baseline_v3:每个标签参数进行了均分
# 2021.05.08
# 在v2基础上 让样本均匀分布,对小样本更有利
# 本文原创 望赞鼓励,转载请说明出处.
末尾有惊喜
总结为什么?度怎么把握
Datawhile三月选题:心跳信号分类
import os
import gc
import math
import pandas as pd
import numpy as np
import lightgbm as lgb
#import xgboost as xgb
from catboost import CatBoostRegressor
from sklearn.linear_model import SGDRegressor, LinearRegression, Ridge
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from tqdm import tqdm
import matplotlib.pyplot as plt
import time
import warnings
warnings.filterwarnings('ignore')
#reading dataset
train = pd.read_csv("train.csv")
test = pd.read_csv("testA.csv")
train
id | heartbeat_signals | label | |
---|---|---|---|
0 | 0 | 0.9912297987616655,0.9435330436439665,0.764677... | 0.0 |
1 | 1 | 0.9714822034884503,0.9289687459588268,0.572932... | 0.0 |
2 | 2 | 1.0,0.9591487564065292,0.7013782792997189,0.23... | 2.0 |
3 | 3 | 0.9757952826275774,0.9340884687738161,0.659636... | 0.0 |
4 | 4 | 0.0,0.055816398940721094,0.26129357194994196,0... | 2.0 |
... | ... | ... | ... |
99995 | 99995 | 1.0,0.677705342021188,0.22239242747868546,0.25... | 0.0 |
99996 | 99996 | 0.9268571578157265,0.9063471198026871,0.636993... | 2.0 |
99997 | 99997 | 0.9258351628306013,0.5873839035878395,0.633226... | 3.0 |
99998 | 99998 | 1.0,0.9947621698382489,0.8297017704865509,0.45... | 2.0 |
99999 | 99999 | 0.9259994004527861,0.916476635326053,0.4042900... | 0.0 |
100000 rows × 3 columns
def reduce_mem_usage(df):
start_mem = df.memory_usage().sum() / 1024**2
print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
for col in df.columns:
col_type = df[col].dtype
if col_type != object:
c_min = df[col].min()
c_max = df[col].max()
if str(col_type)[:3] == 'int':
if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
df[col] = df[col].astype(np.int8)
elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
df[col] = df[col].astype(np.int16)
elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
df[col] = df[col].astype(np.int32)
elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
df[col] = df[col].astype(np.int64)
else:
if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
df[col] = df[col].astype(np.float16)
elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
df[col] = df[col].astype(np.float32)
else:
df[col] = df[col].astype(np.float64)
else:
df[col] = df[col].astype('category')
end_mem = df.memory_usage().sum() / 1024**2
print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
return df
# 简单预处理
train_list = []
for items in train.values:
train_list.append([items[0]] + [float(i) for i in items[1].split(',')] + [items[2]])
train = pd.DataFrame(np.array(train_list))
train.columns = ['id'] + ['s_'+str(i) for i in range(len(train_list[0])-2)] + ['label']
train = reduce_mem_usage(train)
test_list=[]
for items in test.values:
test_list.append([items[0]] + [float(i) for i in items[1].split(',')])
test = pd.DataFrame(np.array(test_list))
test.columns = ['id'] + ['s_'+str(i) for i in range(len(test_list[0])-1)]
test = reduce_mem_usage(test)
Memory usage of dataframe is 157.93 MB
Memory usage after optimization is: 39.67 MB
Decreased by 74.9%
Memory usage of dataframe is 31.43 MB
Memory usage after optimization is: 7.90 MB
Decreased by 74.9%
test
id | s_0 | s_1 | s_2 | s_3 | s_4 | s_5 | s_6 | s_7 | s_8 | ... | s_195 | s_196 | s_197 | s_198 | s_199 | s_200 | s_201 | s_202 | s_203 | s_204 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 100000.0 | 0.991699 | 1.000000 | 0.631836 | 0.136230 | 0.041412 | 0.102722 | 0.120850 | 0.123413 | 0.107910 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
1 | 100001.0 | 0.607422 | 0.541504 | 0.340576 | 0.000000 | 0.090698 | 0.164917 | 0.195068 | 0.168823 | 0.198853 | ... | 0.389893 | 0.386963 | 0.367188 | 0.364014 | 0.360596 | 0.357178 | 0.350586 | 0.350586 | 0.350586 | 0.36377 |
2 | 100002.0 | 0.975098 | 0.670898 | 0.686523 | 0.708496 | 0.718750 | 0.716797 | 0.720703 | 0.701660 | 0.596680 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
3 | 100003.0 | 0.995605 | 0.916992 | 0.520996 | 0.000000 | 0.221802 | 0.404053 | 0.490479 | 0.527344 | 0.518066 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
4 | 100004.0 | 1.000000 | 0.888184 | 0.745605 | 0.531738 | 0.380371 | 0.224609 | 0.091125 | 0.057648 | 0.003914 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
19995 | 119995.0 | 1.000000 | 0.833008 | 0.634277 | 0.639160 | 0.624023 | 0.598145 | 0.613770 | 0.624023 | 0.628906 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
19996 | 119996.0 | 1.000000 | 0.826172 | 0.452148 | 0.082214 | 0.000000 | 0.137085 | 0.201050 | 0.165649 | 0.158081 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
19997 | 119997.0 | 0.951660 | 0.916504 | 0.667480 | 0.352051 | 0.255371 | 0.197388 | 0.173584 | 0.141968 | 0.134521 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
19998 | 119998.0 | 0.927734 | 0.677246 | 0.242920 | 0.055359 | 0.102112 | 0.072266 | 0.021011 | 0.038300 | 0.048553 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
19999 | 119999.0 | 0.665527 | 0.526855 | 0.516602 | 0.376465 | 0.489258 | 0.480713 | 0.459229 | 0.482910 | 0.469971 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
20000 rows × 206 columns
train
id | s_0 | s_1 | s_2 | s_3 | s_4 | s_5 | s_6 | s_7 | s_8 | ... | s_196 | s_197 | s_198 | s_199 | s_200 | s_201 | s_202 | s_203 | s_204 | label | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.0 | 0.991211 | 0.943359 | 0.764648 | 0.618652 | 0.379639 | 0.190796 | 0.040222 | 0.026001 | 0.031708 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1 | 1.0 | 0.971680 | 0.929199 | 0.572754 | 0.178467 | 0.122986 | 0.132324 | 0.094421 | 0.089600 | 0.030487 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2 | 2.0 | 1.000000 | 0.958984 | 0.701172 | 0.231812 | 0.000000 | 0.080688 | 0.128418 | 0.187500 | 0.280762 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 |
3 | 3.0 | 0.975586 | 0.934082 | 0.659668 | 0.249878 | 0.237061 | 0.281494 | 0.249878 | 0.249878 | 0.241455 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
4 | 4.0 | 0.000000 | 0.055817 | 0.261230 | 0.359863 | 0.433105 | 0.453613 | 0.499023 | 0.542969 | 0.616699 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
99995 | 99995.0 | 1.000000 | 0.677734 | 0.222412 | 0.257080 | 0.204712 | 0.054657 | 0.026154 | 0.118164 | 0.244873 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
99996 | 99996.0 | 0.926758 | 0.906250 | 0.637207 | 0.415039 | 0.374756 | 0.382568 | 0.358887 | 0.341309 | 0.336426 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 |
99997 | 99997.0 | 0.925781 | 0.587402 | 0.633301 | 0.632324 | 0.639160 | 0.614258 | 0.599121 | 0.517578 | 0.403809 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 3.0 |
99998 | 99998.0 | 1.000000 | 0.994629 | 0.829590 | 0.458252 | 0.264160 | 0.240234 | 0.213745 | 0.189331 | 0.203857 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 |
99999 | 99999.0 | 0.925781 | 0.916504 | 0.404297 | 0.000000 | 0.262939 | 0.385498 | 0.361084 | 0.332764 | 0.339844 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
100000 rows × 207 columns
train['label'].value_counts()
0.0 64327
3.0 17912
2.0 14199
1.0 3562
Name: label, dtype: int64
#存在严重的数据不均衡
# 为均等数据处理做准备
df_1 = train[train['label'] == 1]
df_2 = train[train['label'] == 2]
df_3 = train[train['label'] == 3]
# df_1
# # 经检查没问题,取得是相应label
# 对数据量不足的填充 replace=ture意思是可以重复,随机种子要一致 不然每次随出来的都不一样 样本填充到20.000
from sklearn.utils import resample
df_1_upsample = resample(df_1, n_samples = 20000, replace = True, random_state = 123)
df_2_upsample = resample(df_2, n_samples = 20000, replace = True, random_state = 123)
df_3_upsample = resample(df_3, n_samples = 20000, replace = True, random_state = 123)
#0号lebal因为样本量大 所以随机出来20,000
df_0 = train[train['label']==0].sample(n =20000, random_state=123)
#把这四个样本合并成一个新的样本
#!!!!!ignore_index=True!!!一定要加 这个不加序号是乱的 后面再运行函数cv时候会报错
train_df = pd.concat([df_0, df_1_upsample, df_2_upsample, df_3_upsample],ignore_index=True)
#新样本 四个lebal这时候均分了,余下地方和V2一致
train_df['label'].value_counts()
0.0 20000
3.0 20000
1.0 20000
2.0 20000
Name: label, dtype: int64
train_df
id | s_0 | s_1 | s_2 | s_3 | s_4 | s_5 | s_6 | s_7 | s_8 | ... | s_196 | s_197 | s_198 | s_199 | s_200 | s_201 | s_202 | s_203 | s_204 | label | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 12037.0 | 0.981445 | 0.986816 | 0.558105 | 0.000000 | 0.295898 | 0.424805 | 0.431152 | 0.455322 | 0.448975 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1 | 1389.0 | 0.986328 | 0.864746 | 0.645996 | 0.352783 | 0.087463 | 0.023849 | 0.078979 | 0.109924 | 0.112671 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2 | 11045.0 | 0.000000 | 0.093140 | 0.274414 | 0.442139 | 0.564941 | 0.613770 | 0.726074 | 0.769531 | 0.796387 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
3 | 4849.0 | 1.000000 | 0.999023 | 0.883789 | 0.583984 | 0.535156 | 0.607910 | 0.593750 | 0.596680 | 0.628418 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
4 | 40943.0 | 0.991211 | 0.941895 | 0.336914 | 0.037476 | 0.223389 | 0.366699 | 0.380127 | 0.390625 | 0.366699 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
79995 | 16396.0 | 0.904297 | 0.866699 | 0.812988 | 0.765137 | 0.686035 | 0.593262 | 0.460449 | 0.328125 | 0.197754 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 3.0 |
79996 | 48610.0 | 0.914551 | 0.879883 | 0.831543 | 0.785645 | 0.727539 | 0.648926 | 0.561035 | 0.456055 | 0.327881 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 3.0 |
79997 | 31521.0 | 0.753906 | 0.655762 | 0.537109 | 0.412842 | 0.242432 | 0.144897 | 0.051422 | 0.024811 | 0.044800 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 3.0 |
79998 | 69413.0 | 0.664062 | 0.611328 | 0.588379 | 0.540527 | 0.516113 | 0.466064 | 0.428711 | 0.343994 | 0.286865 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 3.0 |
79999 | 25387.0 | 0.857910 | 0.773926 | 0.676758 | 0.550293 | 0.393066 | 0.213257 | 0.102844 | 0.001230 | 0.000549 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 3.0 |
80000 rows × 207 columns
# #这里做相应的改变,是从新样本中取
# x_train = train.drop(['id','label'], axis=1)
# y_train = train['label']
# x_test=test.drop(['id'], axis=1)
#这里做相应的改变,是从新样本中取
x_train = train_df.drop(['id','label'], axis=1)
y_train = train_df['label']
x_test=test.drop(['id'], axis=1)
x_train
s_0 | s_1 | s_2 | s_3 | s_4 | s_5 | s_6 | s_7 | s_8 | s_9 | ... | s_195 | s_196 | s_197 | s_198 | s_199 | s_200 | s_201 | s_202 | s_203 | s_204 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.981445 | 0.986816 | 0.558105 | 0.000000 | 0.295898 | 0.424805 | 0.431152 | 0.455322 | 0.448975 | 0.447266 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1 | 0.986328 | 0.864746 | 0.645996 | 0.352783 | 0.087463 | 0.023849 | 0.078979 | 0.109924 | 0.112671 | 0.098755 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2 | 0.000000 | 0.093140 | 0.274414 | 0.442139 | 0.564941 | 0.613770 | 0.726074 | 0.769531 | 0.796387 | 0.782715 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
3 | 1.000000 | 0.999023 | 0.883789 | 0.583984 | 0.535156 | 0.607910 | 0.593750 | 0.596680 | 0.628418 | 0.619141 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
4 | 0.991211 | 0.941895 | 0.336914 | 0.037476 | 0.223389 | 0.366699 | 0.380127 | 0.390625 | 0.366699 | 0.387939 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
79995 | 0.904297 | 0.866699 | 0.812988 | 0.765137 | 0.686035 | 0.593262 | 0.460449 | 0.328125 | 0.197754 | 0.150024 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
79996 | 0.914551 | 0.879883 | 0.831543 | 0.785645 | 0.727539 | 0.648926 | 0.561035 | 0.456055 | 0.327881 | 0.238647 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
79997 | 0.753906 | 0.655762 | 0.537109 | 0.412842 | 0.242432 | 0.144897 | 0.051422 | 0.024811 | 0.044800 | 0.111633 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
79998 | 0.664062 | 0.611328 | 0.588379 | 0.540527 | 0.516113 | 0.466064 | 0.428711 | 0.343994 | 0.286865 | 0.125244 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
79999 | 0.857910 | 0.773926 | 0.676758 | 0.550293 | 0.393066 | 0.213257 | 0.102844 | 0.001230 | 0.000549 | 0.035858 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
80000 rows × 205 columns
x_test
s_0 | s_1 | s_2 | s_3 | s_4 | s_5 | s_6 | s_7 | s_8 | s_9 | ... | s_195 | s_196 | s_197 | s_198 | s_199 | s_200 | s_201 | s_202 | s_203 | s_204 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.991699 | 1.000000 | 0.631836 | 0.136230 | 0.041412 | 0.102722 | 0.120850 | 0.123413 | 0.107910 | 0.110535 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
1 | 0.607422 | 0.541504 | 0.340576 | 0.000000 | 0.090698 | 0.164917 | 0.195068 | 0.168823 | 0.198853 | 0.153564 | ... | 0.389893 | 0.386963 | 0.367188 | 0.364014 | 0.360596 | 0.357178 | 0.350586 | 0.350586 | 0.350586 | 0.36377 |
2 | 0.975098 | 0.670898 | 0.686523 | 0.708496 | 0.718750 | 0.716797 | 0.720703 | 0.701660 | 0.596680 | 0.487061 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
3 | 0.995605 | 0.916992 | 0.520996 | 0.000000 | 0.221802 | 0.404053 | 0.490479 | 0.527344 | 0.518066 | 0.545410 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
4 | 1.000000 | 0.888184 | 0.745605 | 0.531738 | 0.380371 | 0.224609 | 0.091125 | 0.057648 | 0.003914 | 0.007820 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
19995 | 1.000000 | 0.833008 | 0.634277 | 0.639160 | 0.624023 | 0.598145 | 0.613770 | 0.624023 | 0.628906 | 0.624023 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
19996 | 1.000000 | 0.826172 | 0.452148 | 0.082214 | 0.000000 | 0.137085 | 0.201050 | 0.165649 | 0.158081 | 0.165649 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
19997 | 0.951660 | 0.916504 | 0.667480 | 0.352051 | 0.255371 | 0.197388 | 0.173584 | 0.141968 | 0.134521 | 0.127075 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
19998 | 0.927734 | 0.677246 | 0.242920 | 0.055359 | 0.102112 | 0.072266 | 0.021011 | 0.038300 | 0.048553 | 0.017532 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
19999 | 0.665527 | 0.526855 | 0.516602 | 0.376465 | 0.489258 | 0.480713 | 0.459229 | 0.482910 | 0.469971 | 0.399170 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
20000 rows × 205 columns
y_train
0 0.0
1 0.0
2 0.0
3 0.0
4 0.0
...
79995 3.0
79996 3.0
79997 3.0
79998 3.0
79999 3.0
Name: label, Length: 80000, dtype: float16
# loss函数
def abs_sum(y_pre,y_tru):
y_pre=np.array(y_pre)
y_tru=np.array(y_tru)
loss=sum(sum(abs(y_pre-y_tru)))
return loss
from keras.models import Sequential
from keras.layers import Dense # for fully connected layers dense will be used
from keras.layers import Conv1D, MaxPooling1D, Flatten
from keras.optimizers import Adam
# avoid overfitting by normalizing the samples
from keras.layers.normalization import BatchNormalization
# cnn
def build_model():
model = Sequential()
#过滤器=神经元总数中的单位
#Padding='相同',零填充,在输入数据周围添加零像素
model.add(Conv1D(filters = 64, kernel_size = 6, activation='relu', padding = 'same', input_shape = (205, 1))) #we pass individual values hence not 100000,187,1
#:(none,205,64)
# Normalization to avoid overfitting
# 大概就是保持数据的敏感 ,加在全连接函数和激励函数之间:https://www.bilibili.com/video/BV1Lx411j7GT?from=search&seid=5048435414489430319
model.add(BatchNormalization())
# #:(none,205,64)#只是把数据重初始化一下,也就是重新分布了一下,为了使得这些数在下面的激励函数中区分的更大
# Pooling
model.add(MaxPooling1D(pool_size=(3), strides = (2), padding = 'same'))
# :(none,103,64)# 因为strides是2 一下跳两步 padding=“same" 所以变化前特征总数不变 那么输出就是187/2=94
model.add(Conv1D(filters = 64, kernel_size = 6, activation='relu', padding = 'same'))# (none, 103, 64)
model.add(BatchNormalization()) #:(none, 103, 64)
model.add(MaxPooling1D(pool_size=(3), strides = (2), padding = 'same')) #:(none, 52, 64)
model.add(Conv1D( filters = 64, kernel_size = 6, activation='relu', padding = 'same'))#:(none, 52, 64)
model.add(BatchNormalization()) #:(none, 52, 64)
model.add(MaxPooling1D(pool_size=(3), strides = (2), padding = 'same')) #:(none, 26, 64)
# Flatten
model.add(Flatten())
#:(none, 1664) 这一步就是把多维的数据铺平 26*64 = 1664
# Fully connected layer
# input layer
#Dense(用来写输出层)
#当输入序列的长度固定时,该值为其长度。如果要在该层后接Flatten层,然后接Dense层,则必须指定该参数,否则Dense层的输出维度无法自动推断。
#units:大于0的整数,代表该层的输出维度。
model.add(Dense(units = 64, activation='relu'))
# Hidden Layer
model.add(Dense(units = 64, activation='relu'))
# Output Layer
model.add(Dense(units = 4, activation='softmax'))
# loss = 'categorical_crossentropy'
model.compile(optimizer = 'Adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
return model
model = build_model()
# This is for one sample, i.e. one row
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv1d (Conv1D) (None, 205, 64) 448
_________________________________________________________________
batch_normalization (BatchNo (None, 205, 64) 256
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 103, 64) 0
_________________________________________________________________
conv1d_1 (Conv1D) (None, 103, 64) 24640
_________________________________________________________________
batch_normalization_1 (Batch (None, 103, 64) 256
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 52, 64) 0
_________________________________________________________________
conv1d_2 (Conv1D) (None, 52, 64) 24640
_________________________________________________________________
batch_normalization_2 (Batch (None, 52, 64) 256
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 26, 64) 0
_________________________________________________________________
flatten (Flatten) (None, 1664) 0
_________________________________________________________________
dense (Dense) (None, 64) 106560
_________________________________________________________________
dense_1 (Dense) (None, 64) 4160
_________________________________________________________________
dense_2 (Dense) (None, 4) 260
=================================================================
Total params: 161,476
Trainable params: 161,092
Non-trainable params: 384
_________________________________________________________________
from keras.utils.np_utils import to_categorical
def cv_model(clf, train_x, train_y, test_x, clf_name):
folds = 4
seed = 2021
#k-交叉验证KFold
# n_split:要划分的折数
#shuffle: 每次都进行shuffle,测试集中折数的总和就是训练集的个数
#random_state:随机状态 总结:对于那些本质上是随机的过程,我们有必要控制随机的状态,这样才能重复的展现相同的结果。
#如果,对随机状态不加控制,那么实验的结果就无法固定,而是随机的显现。比喻的说一下,也不知道准不准确。
# 一个容器中放置一定量的沙子,每次用手去抓沙子的时候,抓取的结果会受到抓取的力度、一只手抓还是两只手抓、手是干的或湿的等诸多因素的影响(将影响因素定为A={a,b,c,d,e,f,……})。
#固定random_state后,每次抓取沙子时的影响因素将被固定为具体的某一组,这样每次抓取的沙子就具有了相同的状态。
kf = KFold(n_splits=folds, shuffle=True, random_state=seed)
#为预测做准备
test = np.zeros((test_x.shape[0],4))
test_x = test_x.iloc[:, :].values
test_x = test_x.reshape(len(test_x),test_x.shape[1],1)
cv_scores = []
onehot_encoder = OneHotEncoder(sparse=False)
for i, (train_index, test_index) in enumerate(kf.split(x_train, y_train)):
print('************************************ {} ************************************'.format(str(i+1)))
x_kf_train, y_kf_train, x_kf_test, y_kf_test = x_train.iloc[train_index], y_train[train_index], x_train.iloc[test_index], y_train[test_index]
if clf_name == "cnn":
x_kf_train = x_kf_train.iloc[:, :].values
x_kf_test = x_kf_test.iloc[:, :].values
x_kf_train = x_kf_train.reshape(len(x_kf_train),x_kf_train.shape[1],1)
x_kf_test = x_kf_test.reshape(len(x_kf_test),x_kf_test.shape[1],1)
y_kf_train = to_categorical(y_kf_train)
y_kf_test = to_categorical(y_kf_test)
history = model.fit(x_kf_train,y_kf_train, epochs = 15, batch_size = 32, validation_data=(x_kf_test, y_kf_test))
x_kf_test_pred = model.predict(x_kf_test)
test_pred = model.predict(test_x)
print("y_kf_test++++++:")
print(y_kf_test)
print('预测的概率矩阵为test_pred:')
print(test_pred)
print("abs_sum++++++:")
score=abs_sum(y_kf_test, x_kf_test_pred)
cv_scores.append(score)
print("cv_scores+++++:")
print(cv_scores)
# test += test_pred
# test=test/kf.n_splits
# 将将要预测的带到
return test_pred
def lgb_model(x_train, y_train, x_test):
lgb_test = cv_model(lgb, x_train, y_train, x_test, "cnn")
return lgb_test
lgb_test = lgb_model(x_train, y_train, x_test)
************************************ 1 ************************************
Epoch 1/15
1875/1875 [==============================] - 60s 31ms/step - loss: 0.2759 - accuracy: 0.8949 - val_loss: 0.1356 - val_accuracy: 0.9588
Epoch 2/15
1875/1875 [==============================] - 59s 32ms/step - loss: 0.0875 - accuracy: 0.9688 - val_loss: 0.0670 - val_accuracy: 0.9768
Epoch 3/15
1875/1875 [==============================] - 56s 30ms/step - loss: 0.0539 - accuracy: 0.9812 - val_loss: 0.0565 - val_accuracy: 0.9797
Epoch 4/15
1875/1875 [==============================] - 56s 30ms/step - loss: 0.0407 - accuracy: 0.9865 - val_loss: 0.0454 - val_accuracy: 0.9859
Epoch 5/15
1875/1875 [==============================] - 56s 30ms/step - loss: 0.0323 - accuracy: 0.9891 - val_loss: 0.0395 - val_accuracy: 0.9867
Epoch 6/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0233 - accuracy: 0.9924 - val_loss: 0.0342 - val_accuracy: 0.9895
Epoch 7/15
1875/1875 [==============================] - 56s 30ms/step - loss: 0.0232 - accuracy: 0.9923 - val_loss: 0.0394 - val_accuracy: 0.9868
Epoch 8/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0151 - accuracy: 0.9951 - val_loss: 0.0416 - val_accuracy: 0.9880
Epoch 9/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0168 - accuracy: 0.9952 - val_loss: 0.0564 - val_accuracy: 0.9827
Epoch 10/15
1875/1875 [==============================] - 55s 29ms/step - loss: 0.0196 - accuracy: 0.9939 - val_loss: 0.0274 - val_accuracy: 0.9926
Epoch 11/15
1875/1875 [==============================] - 56s 30ms/step - loss: 0.0115 - accuracy: 0.9960 - val_loss: 0.0267 - val_accuracy: 0.9938
Epoch 12/15
1875/1875 [==============================] - 56s 30ms/step - loss: 0.0087 - accuracy: 0.9973 - val_loss: 0.0472 - val_accuracy: 0.9862
Epoch 13/15
1875/1875 [==============================] - 59s 31ms/step - loss: 0.0097 - accuracy: 0.9967 - val_loss: 0.0334 - val_accuracy: 0.9923
Epoch 14/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0091 - accuracy: 0.9970 - val_loss: 0.0478 - val_accuracy: 0.9890
Epoch 15/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0133 - accuracy: 0.9961 - val_loss: 0.0329 - val_accuracy: 0.9912
y_kf_test++++++:
[[1. 0. 0. 0.]
[1. 0. 0. 0.]
[1. 0. 0. 0.]
...
[0. 0. 0. 1.]
[0. 0. 0. 1.]
[0. 0. 0. 1.]]
预测的概率矩阵为test_pred:
[[9.9999166e-01 8.3696723e-06 7.0094308e-09 1.5220067e-08]
[6.5147565e-10 2.9474978e-09 1.0000000e+00 3.8487034e-15]
[3.4868474e-20 3.0091077e-13 3.5588698e-16 1.0000000e+00]
...
[9.6342439e-05 3.3096262e-06 9.9989974e-01 5.9694372e-07]
[9.9999988e-01 1.6789036e-07 1.0092740e-08 1.7682024e-11]
[8.7801713e-01 9.7556551e-07 2.5424310e-03 1.1943953e-01]]
abs_sum++++++:
cv_scores+++++:
[417.5972366333008]
************************************ 2 ************************************
Epoch 1/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0186 - accuracy: 0.9949 - val_loss: 0.0089 - val_accuracy: 0.9963
Epoch 2/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0116 - accuracy: 0.9965 - val_loss: 0.0138 - val_accuracy: 0.9955
Epoch 3/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0106 - accuracy: 0.9967 - val_loss: 0.0207 - val_accuracy: 0.9941
Epoch 4/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0093 - accuracy: 0.9971 - val_loss: 0.0101 - val_accuracy: 0.9964
Epoch 5/15
1875/1875 [==============================] - 57s 31ms/step - loss: 0.0070 - accuracy: 0.9978 - val_loss: 0.0196 - val_accuracy: 0.9949
Epoch 6/15
1875/1875 [==============================] - 57s 31ms/step - loss: 0.0073 - accuracy: 0.9977 - val_loss: 0.0124 - val_accuracy: 0.9969
Epoch 7/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0078 - accuracy: 0.9975 - val_loss: 0.0151 - val_accuracy: 0.9956
Epoch 8/15
1875/1875 [==============================] - 61s 32ms/step - loss: 0.0075 - accuracy: 0.9977 - val_loss: 0.0302 - val_accuracy: 0.9913
Epoch 9/15
1875/1875 [==============================] - 60s 32ms/step - loss: 0.0065 - accuracy: 0.9982 - val_loss: 0.0090 - val_accuracy: 0.9971
Epoch 10/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0065 - accuracy: 0.9982 - val_loss: 0.0263 - val_accuracy: 0.9945
Epoch 11/15
1875/1875 [==============================] - 57s 31ms/step - loss: 0.0070 - accuracy: 0.9978 - val_loss: 0.0103 - val_accuracy: 0.9970
Epoch 12/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0056 - accuracy: 0.9985 - val_loss: 0.0134 - val_accuracy: 0.9965
Epoch 13/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0061 - accuracy: 0.9983 - val_loss: 0.0128 - val_accuracy: 0.9963
Epoch 14/15
1875/1875 [==============================] - 59s 31ms/step - loss: 0.0069 - accuracy: 0.9981 - val_loss: 0.0295 - val_accuracy: 0.9904
Epoch 15/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0052 - accuracy: 0.9988 - val_loss: 0.0134 - val_accuracy: 0.9967
y_kf_test++++++:
[[1. 0. 0. 0.]
[1. 0. 0. 0.]
[1. 0. 0. 0.]
...
[0. 0. 0. 1.]
[0. 0. 0. 1.]
[0. 0. 0. 1.]]
预测的概率矩阵为test_pred:
[[9.99999762e-01 2.31557891e-07 1.64789016e-12 1.57000205e-10]
[7.08220592e-13 3.05859255e-10 1.00000000e+00 1.08148854e-23]
[5.37503151e-19 4.81650921e-20 5.44116887e-20 1.00000000e+00]
...
[2.33657775e-03 2.78295577e-03 9.94879365e-01 1.02097715e-06]
[9.99999523e-01 4.99625287e-07 2.50645789e-11 2.91075359e-08]
[1.00000000e+00 1.97536454e-09 4.46538850e-09 6.42643183e-09]]
abs_sum++++++:
cv_scores+++++:
[417.5972366333008, 166.70497226715088]
************************************ 3 ************************************
Epoch 1/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0103 - accuracy: 0.9975 - val_loss: 0.0062 - val_accuracy: 0.9985
Epoch 2/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0042 - accuracy: 0.9988 - val_loss: 0.0129 - val_accuracy: 0.9963
Epoch 3/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0071 - accuracy: 0.9978 - val_loss: 0.0027 - val_accuracy: 0.9992
Epoch 4/15
1875/1875 [==============================] - 61s 32ms/step - loss: 0.0042 - accuracy: 0.9987 - val_loss: 0.0055 - val_accuracy: 0.9982
Epoch 5/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0061 - accuracy: 0.9984 - val_loss: 0.0116 - val_accuracy: 0.9963
Epoch 6/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0048 - accuracy: 0.9986 - val_loss: 0.0055 - val_accuracy: 0.9984
Epoch 7/15
1875/1875 [==============================] - 57s 31ms/step - loss: 0.0045 - accuracy: 0.9991 - val_loss: 0.0084 - val_accuracy: 0.9984
Epoch 8/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0062 - accuracy: 0.9983 - val_loss: 0.0036 - val_accuracy: 0.9989
Epoch 9/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0039 - accuracy: 0.9989 - val_loss: 0.0091 - val_accuracy: 0.9973
Epoch 10/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0054 - accuracy: 0.9986 - val_loss: 0.0167 - val_accuracy: 0.9957
Epoch 11/15
1875/1875 [==============================] - 57s 31ms/step - loss: 0.0061 - accuracy: 0.9986 - val_loss: 0.0188 - val_accuracy: 0.9967
Epoch 12/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0025 - accuracy: 0.9991 - val_loss: 0.0162 - val_accuracy: 0.9948
Epoch 13/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0050 - accuracy: 0.9986 - val_loss: 0.0081 - val_accuracy: 0.9983
Epoch 14/15
1875/1875 [==============================] - 60s 32ms/step - loss: 0.0047 - accuracy: 0.9988 - val_loss: 0.0143 - val_accuracy: 0.9962
Epoch 15/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0046 - accuracy: 0.9989 - val_loss: 0.0293 - val_accuracy: 0.9941
y_kf_test++++++:
[[1. 0. 0. 0.]
[1. 0. 0. 0.]
[1. 0. 0. 0.]
...
[0. 0. 0. 1.]
[0. 0. 0. 1.]
[0. 0. 0. 1.]]
预测的概率矩阵为test_pred:
[[9.9999940e-01 5.5716970e-07 4.2182435e-10 7.5162426e-15]
[3.0866704e-10 2.2008441e-08 1.0000000e+00 3.8451365e-23]
[3.2547221e-23 1.0347285e-20 5.9116691e-22 1.0000000e+00]
...
[7.4469927e-04 4.4713984e-06 9.9902654e-01 2.2425935e-04]
[1.0000000e+00 6.6102653e-12 9.1913356e-16 7.5038865e-26]
[9.9999976e-01 2.0411268e-07 3.4282695e-11 1.1750135e-11]]
abs_sum++++++:
cv_scores+++++:
[417.5972366333008, 166.70497226715088, 286.96989250183105]
************************************ 4 ************************************
Epoch 1/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0064 - accuracy: 0.9982 - val_loss: 0.0071 - val_accuracy: 0.9980
Epoch 2/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0039 - accuracy: 0.9991 - val_loss: 0.0132 - val_accuracy: 0.9973
Epoch 3/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0060 - accuracy: 0.9983 - val_loss: 0.0273 - val_accuracy: 0.9916
Epoch 4/15
1875/1875 [==============================] - 59s 31ms/step - loss: 0.0042 - accuracy: 0.9989 - val_loss: 0.0024 - val_accuracy: 0.9991
Epoch 5/15
1875/1875 [==============================] - 57s 31ms/step - loss: 0.0030 - accuracy: 0.9992 - val_loss: 0.0026 - val_accuracy: 0.9994
Epoch 6/15
1875/1875 [==============================] - 60s 32ms/step - loss: 0.0038 - accuracy: 0.9991 - val_loss: 0.0056 - val_accuracy: 0.9982
Epoch 7/15
1875/1875 [==============================] - 62s 33ms/step - loss: 0.0036 - accuracy: 0.9991 - val_loss: 0.0026 - val_accuracy: 0.9991
Epoch 8/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0042 - accuracy: 0.9987 - val_loss: 0.0255 - val_accuracy: 0.9923
Epoch 9/15
1875/1875 [==============================] - 56s 30ms/step - loss: 0.0042 - accuracy: 0.9990 - val_loss: 0.0107 - val_accuracy: 0.9974
Epoch 10/15
1875/1875 [==============================] - 54s 29ms/step - loss: 0.0034 - accuracy: 0.9991 - val_loss: 0.0022 - val_accuracy: 0.9992
Epoch 11/15
1875/1875 [==============================] - 53s 29ms/step - loss: 0.0039 - accuracy: 0.9989 - val_loss: 0.0222 - val_accuracy: 0.9959
Epoch 12/15
1875/1875 [==============================] - 54s 29ms/step - loss: 0.0024 - accuracy: 0.9992 - val_loss: 0.0033 - val_accuracy: 0.9990
Epoch 13/15
1875/1875 [==============================] - 55s 29ms/step - loss: 0.0033 - accuracy: 0.9991 - val_loss: 0.0153 - val_accuracy: 0.9955
Epoch 14/15
1875/1875 [==============================] - 53s 28ms/step - loss: 0.0037 - accuracy: 0.9990 - val_loss: 0.0063 - val_accuracy: 0.9982
Epoch 15/15
1875/1875 [==============================] - 55s 29ms/step - loss: 0.0027 - accuracy: 0.9991 - val_loss: 0.0042 - val_accuracy: 0.9991
y_kf_test++++++:
[[1. 0. 0. 0.]
[1. 0. 0. 0.]
[1. 0. 0. 0.]
...
[0. 0. 0. 1.]
[0. 0. 0. 1.]
[0. 0. 0. 1.]]
预测的概率矩阵为test_pred:
[[1.0000000e+00 1.2987890e-12 4.3421242e-17 9.6314740e-25]
[1.0664977e-16 1.0852620e-15 1.0000000e+00 3.7733482e-19]
[2.0541892e-26 1.0776717e-29 6.0904610e-24 1.0000000e+00]
...
[2.2533643e-10 1.9210296e-10 1.0000000e+00 6.5144643e-12]
[1.0000000e+00 8.0374228e-11 8.2770508e-16 1.1548383e-20]
[1.0000000e+00 3.1995117e-15 3.1809543e-14 2.6292750e-17]]
abs_sum++++++:
cv_scores+++++:
[417.5972366333008, 166.70497226715088, 286.96989250183105, 46.44265604019165]
temp=pd.DataFrame(lgb_test)
result=pd.read_csv('sample_submit.csv')
result['label_0']=temp[0]
result['label_1']=temp[1]
result['label_2']=temp[2]
result['label_3']=temp[3]
result.to_csv('submit_baseline_v3.0.csv',index=False)
submit_data=pd.read_csv('submit_baseline_v3.0.csv')
submit_data
id | label_0 | label_1 | label_2 | label_3 | |
---|---|---|---|---|---|
0 | 100000 | 1.000000e+00 | 1.298789e-12 | 4.342124e-17 | 9.631474e-25 |
1 | 100001 | 1.066498e-16 | 1.085262e-15 | 1.000000e+00 | 3.773348e-19 |
2 | 100002 | 2.054189e-26 | 1.077672e-29 | 6.090461e-24 | 1.000000e+00 |
3 | 100003 | 1.000000e+00 | 5.491751e-15 | 4.521685e-22 | 1.850977e-25 |
4 | 100004 | 1.000000e+00 | 2.030157e-10 | 9.926018e-13 | 1.117518e-13 |
... | ... | ... | ... | ... | ... |
19995 | 119995 | 1.000000e+00 | 2.684174e-08 | 8.052518e-12 | 5.653323e-13 |
19996 | 119996 | 1.000000e+00 | 9.495173e-12 | 9.100256e-13 | 6.368752e-20 |
19997 | 119997 | 2.253364e-10 | 1.921030e-10 | 1.000000e+00 | 6.514464e-12 |
19998 | 119998 | 1.000000e+00 | 8.037423e-11 | 8.277051e-16 | 1.154838e-20 |
19999 | 119999 | 1.000000e+00 | 3.199512e-15 | 3.180954e-14 | 2.629275e-17 |
20000 rows × 5 columns
for index,row in submit_data.iterrows():
row_max = max(list(row)[1:])
if row_max > 0.9:
for i in range(1,5):
if row[i]>0.9:
submit_data.iloc[index,i] = 1
else:
submit_data.iloc[index,i] = 0
submit_data
id | label_0 | label_1 | label_2 | label_3 | |
---|---|---|---|---|---|
0 | 100000 | 1.0 | 0.0 | 0.0 | 0.0 |
1 | 100001 | 0.0 | 0.0 | 1.0 | 0.0 |
2 | 100002 | 0.0 | 0.0 | 0.0 | 1.0 |
3 | 100003 | 1.0 | 0.0 | 0.0 | 0.0 |
4 | 100004 | 1.0 | 0.0 | 0.0 | 0.0 |
... | ... | ... | ... | ... | ... |
19995 | 119995 | 1.0 | 0.0 | 0.0 | 0.0 |
19996 | 119996 | 1.0 | 0.0 | 0.0 | 0.0 |
19997 | 119997 | 0.0 | 0.0 | 1.0 | 0.0 |
19998 | 119998 | 1.0 | 0.0 | 0.0 | 0.0 |
19999 | 119999 | 1.0 | 0.0 | 0.0 | 0.0 |
20000 rows × 5 columns
submit_data.to_csv('submit_baseline_v3.0.1.csv',index=False)
V3分数降了一百多分!!!!