三月:心跳信号分类预测_baseline_v3:样本中每类别进行了均分

心跳信号分类预测_baseline_v3:每个标签参数进行了均分


# 2021.05.08
# 在v2基础上 让样本均匀分布,对小样本更有利
# 本文原创 望赞鼓励,转载请说明出处.

末尾有惊喜

总结为什么?度怎么把握

Datawhile三月选题:心跳信号分类


import os
import gc
import math

import pandas as pd
import numpy as np

import lightgbm as lgb
#import xgboost as xgb
from catboost import CatBoostRegressor
from sklearn.linear_model import SGDRegressor, LinearRegression, Ridge
from sklearn.preprocessing import MinMaxScaler


from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

from tqdm import tqdm
import matplotlib.pyplot as plt
import time
import warnings
warnings.filterwarnings('ignore')
#reading dataset
train = pd.read_csv("train.csv")
test = pd.read_csv("testA.csv")
train

idheartbeat_signalslabel
000.9912297987616655,0.9435330436439665,0.764677...0.0
110.9714822034884503,0.9289687459588268,0.572932...0.0
221.0,0.9591487564065292,0.7013782792997189,0.23...2.0
330.9757952826275774,0.9340884687738161,0.659636...0.0
440.0,0.055816398940721094,0.26129357194994196,0...2.0
............
99995999951.0,0.677705342021188,0.22239242747868546,0.25...0.0
99996999960.9268571578157265,0.9063471198026871,0.636993...2.0
99997999970.9258351628306013,0.5873839035878395,0.633226...3.0
99998999981.0,0.9947621698382489,0.8297017704865509,0.45...2.0
99999999990.9259994004527861,0.916476635326053,0.4042900...0.0

100000 rows × 3 columns

def reduce_mem_usage(df):
    start_mem = df.memory_usage().sum() / 1024**2 
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2 
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df

# 简单预处理
train_list = []

for items in train.values:
    train_list.append([items[0]] + [float(i) for i in items[1].split(',')] + [items[2]])

train = pd.DataFrame(np.array(train_list))
train.columns = ['id'] + ['s_'+str(i) for i in range(len(train_list[0])-2)] + ['label']
train = reduce_mem_usage(train)

test_list=[]
for items in test.values:
    test_list.append([items[0]] + [float(i) for i in items[1].split(',')])

test = pd.DataFrame(np.array(test_list))
test.columns = ['id'] + ['s_'+str(i) for i in range(len(test_list[0])-1)]
test = reduce_mem_usage(test)
Memory usage of dataframe is 157.93 MB
Memory usage after optimization is: 39.67 MB
Decreased by 74.9%
Memory usage of dataframe is 31.43 MB
Memory usage after optimization is: 7.90 MB
Decreased by 74.9%
test
ids_0s_1s_2s_3s_4s_5s_6s_7s_8...s_195s_196s_197s_198s_199s_200s_201s_202s_203s_204
0100000.00.9916991.0000000.6318360.1362300.0414120.1027220.1208500.1234130.107910...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
1100001.00.6074220.5415040.3405760.0000000.0906980.1649170.1950680.1688230.198853...0.3898930.3869630.3671880.3640140.3605960.3571780.3505860.3505860.3505860.36377
2100002.00.9750980.6708980.6865230.7084960.7187500.7167970.7207030.7016600.596680...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
3100003.00.9956050.9169920.5209960.0000000.2218020.4040530.4904790.5273440.518066...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
4100004.01.0000000.8881840.7456050.5317380.3803710.2246090.0911250.0576480.003914...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
..................................................................
19995119995.01.0000000.8330080.6342770.6391600.6240230.5981450.6137700.6240230.628906...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
19996119996.01.0000000.8261720.4521480.0822140.0000000.1370850.2010500.1656490.158081...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
19997119997.00.9516600.9165040.6674800.3520510.2553710.1973880.1735840.1419680.134521...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
19998119998.00.9277340.6772460.2429200.0553590.1021120.0722660.0210110.0383000.048553...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
19999119999.00.6655270.5268550.5166020.3764650.4892580.4807130.4592290.4829100.469971...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000

20000 rows × 206 columns

train
ids_0s_1s_2s_3s_4s_5s_6s_7s_8...s_196s_197s_198s_199s_200s_201s_202s_203s_204label
00.00.9912110.9433590.7646480.6186520.3796390.1907960.0402220.0260010.031708...0.00.00.00.00.00.00.00.00.00.0
11.00.9716800.9291990.5727540.1784670.1229860.1323240.0944210.0896000.030487...0.00.00.00.00.00.00.00.00.00.0
22.01.0000000.9589840.7011720.2318120.0000000.0806880.1284180.1875000.280762...0.00.00.00.00.00.00.00.00.02.0
33.00.9755860.9340820.6596680.2498780.2370610.2814940.2498780.2498780.241455...0.00.00.00.00.00.00.00.00.00.0
44.00.0000000.0558170.2612300.3598630.4331050.4536130.4990230.5429690.616699...0.00.00.00.00.00.00.00.00.02.0
..................................................................
9999599995.01.0000000.6777340.2224120.2570800.2047120.0546570.0261540.1181640.244873...0.00.00.00.00.00.00.00.00.00.0
9999699996.00.9267580.9062500.6372070.4150390.3747560.3825680.3588870.3413090.336426...0.00.00.00.00.00.00.00.00.02.0
9999799997.00.9257810.5874020.6333010.6323240.6391600.6142580.5991210.5175780.403809...0.00.00.00.00.00.00.00.00.03.0
9999899998.01.0000000.9946290.8295900.4582520.2641600.2402340.2137450.1893310.203857...0.00.00.00.00.00.00.00.00.02.0
9999999999.00.9257810.9165040.4042970.0000000.2629390.3854980.3610840.3327640.339844...0.00.00.00.00.00.00.00.00.00.0

100000 rows × 207 columns

train['label'].value_counts()
0.0    64327
3.0    17912
2.0    14199
1.0     3562
Name: label, dtype: int64

#存在严重的数据不均衡

# 为均等数据处理做准备
df_1 = train[train['label'] == 1]
df_2 = train[train['label'] == 2]
df_3 = train[train['label'] == 3]
# df_1
# # 经检查没问题,取得是相应label
# 对数据量不足的填充 replace=ture意思是可以重复,随机种子要一致 不然每次随出来的都不一样 样本填充到20.000
from sklearn.utils import resample

df_1_upsample = resample(df_1, n_samples = 20000, replace = True, random_state = 123)
df_2_upsample = resample(df_2, n_samples = 20000, replace = True, random_state = 123)
df_3_upsample = resample(df_3, n_samples = 20000, replace = True, random_state = 123)
#0号lebal因为样本量大 所以随机出来20,000
df_0 = train[train['label']==0].sample(n =20000, random_state=123)
#把这四个样本合并成一个新的样本
#!!!!!ignore_index=True!!!一定要加 这个不加序号是乱的 后面再运行函数cv时候会报错
train_df = pd.concat([df_0, df_1_upsample, df_2_upsample, df_3_upsample],ignore_index=True)
#新样本 四个lebal这时候均分了,余下地方和V2一致
train_df['label'].value_counts()
0.0    20000
3.0    20000
1.0    20000
2.0    20000
Name: label, dtype: int64
train_df
ids_0s_1s_2s_3s_4s_5s_6s_7s_8...s_196s_197s_198s_199s_200s_201s_202s_203s_204label
012037.00.9814450.9868160.5581050.0000000.2958980.4248050.4311520.4553220.448975...0.00.00.00.00.00.00.00.00.00.0
11389.00.9863280.8647460.6459960.3527830.0874630.0238490.0789790.1099240.112671...0.00.00.00.00.00.00.00.00.00.0
211045.00.0000000.0931400.2744140.4421390.5649410.6137700.7260740.7695310.796387...0.00.00.00.00.00.00.00.00.00.0
34849.01.0000000.9990230.8837890.5839840.5351560.6079100.5937500.5966800.628418...0.00.00.00.00.00.00.00.00.00.0
440943.00.9912110.9418950.3369140.0374760.2233890.3666990.3801270.3906250.366699...0.00.00.00.00.00.00.00.00.00.0
..................................................................
7999516396.00.9042970.8666990.8129880.7651370.6860350.5932620.4604490.3281250.197754...0.00.00.00.00.00.00.00.00.03.0
7999648610.00.9145510.8798830.8315430.7856450.7275390.6489260.5610350.4560550.327881...0.00.00.00.00.00.00.00.00.03.0
7999731521.00.7539060.6557620.5371090.4128420.2424320.1448970.0514220.0248110.044800...0.00.00.00.00.00.00.00.00.03.0
7999869413.00.6640620.6113280.5883790.5405270.5161130.4660640.4287110.3439940.286865...0.00.00.00.00.00.00.00.00.03.0
7999925387.00.8579100.7739260.6767580.5502930.3930660.2132570.1028440.0012300.000549...0.00.00.00.00.00.00.00.00.03.0

80000 rows × 207 columns



# #这里做相应的改变,是从新样本中取
# x_train = train.drop(['id','label'], axis=1)
# y_train = train['label']
# x_test=test.drop(['id'], axis=1)
#这里做相应的改变,是从新样本中取
x_train = train_df.drop(['id','label'], axis=1)
y_train = train_df['label']
x_test=test.drop(['id'], axis=1)
x_train
s_0s_1s_2s_3s_4s_5s_6s_7s_8s_9...s_195s_196s_197s_198s_199s_200s_201s_202s_203s_204
00.9814450.9868160.5581050.0000000.2958980.4248050.4311520.4553220.4489750.447266...0.00.00.00.00.00.00.00.00.00.0
10.9863280.8647460.6459960.3527830.0874630.0238490.0789790.1099240.1126710.098755...0.00.00.00.00.00.00.00.00.00.0
20.0000000.0931400.2744140.4421390.5649410.6137700.7260740.7695310.7963870.782715...0.00.00.00.00.00.00.00.00.00.0
31.0000000.9990230.8837890.5839840.5351560.6079100.5937500.5966800.6284180.619141...0.00.00.00.00.00.00.00.00.00.0
40.9912110.9418950.3369140.0374760.2233890.3666990.3801270.3906250.3666990.387939...0.00.00.00.00.00.00.00.00.00.0
..................................................................
799950.9042970.8666990.8129880.7651370.6860350.5932620.4604490.3281250.1977540.150024...0.00.00.00.00.00.00.00.00.00.0
799960.9145510.8798830.8315430.7856450.7275390.6489260.5610350.4560550.3278810.238647...0.00.00.00.00.00.00.00.00.00.0
799970.7539060.6557620.5371090.4128420.2424320.1448970.0514220.0248110.0448000.111633...0.00.00.00.00.00.00.00.00.00.0
799980.6640620.6113280.5883790.5405270.5161130.4660640.4287110.3439940.2868650.125244...0.00.00.00.00.00.00.00.00.00.0
799990.8579100.7739260.6767580.5502930.3930660.2132570.1028440.0012300.0005490.035858...0.00.00.00.00.00.00.00.00.00.0

80000 rows × 205 columns

x_test
s_0s_1s_2s_3s_4s_5s_6s_7s_8s_9...s_195s_196s_197s_198s_199s_200s_201s_202s_203s_204
00.9916991.0000000.6318360.1362300.0414120.1027220.1208500.1234130.1079100.110535...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
10.6074220.5415040.3405760.0000000.0906980.1649170.1950680.1688230.1988530.153564...0.3898930.3869630.3671880.3640140.3605960.3571780.3505860.3505860.3505860.36377
20.9750980.6708980.6865230.7084960.7187500.7167970.7207030.7016600.5966800.487061...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
30.9956050.9169920.5209960.0000000.2218020.4040530.4904790.5273440.5180660.545410...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
41.0000000.8881840.7456050.5317380.3803710.2246090.0911250.0576480.0039140.007820...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
..................................................................
199951.0000000.8330080.6342770.6391600.6240230.5981450.6137700.6240230.6289060.624023...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
199961.0000000.8261720.4521480.0822140.0000000.1370850.2010500.1656490.1580810.165649...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
199970.9516600.9165040.6674800.3520510.2553710.1973880.1735840.1419680.1345210.127075...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
199980.9277340.6772460.2429200.0553590.1021120.0722660.0210110.0383000.0485530.017532...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
199990.6655270.5268550.5166020.3764650.4892580.4807130.4592290.4829100.4699710.399170...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000

20000 rows × 205 columns

y_train
0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
79995    3.0
79996    3.0
79997    3.0
79998    3.0
79999    3.0
Name: label, Length: 80000, dtype: float16
# loss函数
def abs_sum(y_pre,y_tru):
    y_pre=np.array(y_pre)
    y_tru=np.array(y_tru)
    loss=sum(sum(abs(y_pre-y_tru)))
    return loss
from keras.models import Sequential
from keras.layers import Dense # for fully connected layers dense will be used
from keras.layers import Conv1D, MaxPooling1D, Flatten
from keras.optimizers import Adam

# avoid overfitting by normalizing the samples
from keras.layers.normalization import BatchNormalization
# cnn
def build_model():
    model = Sequential()
    

    #过滤器=神经元总数中的单位
    #Padding='相同',零填充,在输入数据周围添加零像素
    model.add(Conv1D(filters = 64, kernel_size = 6, activation='relu', padding = 'same', input_shape = (205, 1))) #we pass individual values hence not 100000,187,1
    #:(none,205,64)
    
    # Normalization to avoid overfitting
    # 大概就是保持数据的敏感 ,加在全连接函数和激励函数之间:https://www.bilibili.com/video/BV1Lx411j7GT?from=search&seid=5048435414489430319
    model.add(BatchNormalization())
    # #:(none,205,64)#只是把数据重初始化一下,也就是重新分布了一下,为了使得这些数在下面的激励函数中区分的更大
    
    # Pooling 
    model.add(MaxPooling1D(pool_size=(3), strides = (2), padding = 'same'))
    # :(none,103,64)# 因为strides是2 一下跳两步 padding=“same" 所以变化前特征总数不变 那么输出就是187/2=94

    model.add(Conv1D(filters = 64, kernel_size = 6, activation='relu', padding = 'same'))# (none, 103, 64)
    model.add(BatchNormalization())                                                      #:(none, 103, 64)
    model.add(MaxPooling1D(pool_size=(3), strides = (2), padding = 'same'))              #:(none, 52, 64)

    model.add(Conv1D( filters = 64, kernel_size = 6, activation='relu', padding = 'same'))#:(none, 52, 64)
    model.add(BatchNormalization())                                                       #:(none, 52, 64)
    model.add(MaxPooling1D(pool_size=(3), strides = (2), padding = 'same'))               #:(none, 26, 64)

    # Flatten 
    model.add(Flatten())
    #:(none, 1664) 这一步就是把多维的数据铺平 26*64 = 1664

    # Fully connected layer
    # input layer
    #Dense(用来写输出层)
    #当输入序列的长度固定时,该值为其长度。如果要在该层后接Flatten层,然后接Dense层,则必须指定该参数,否则Dense层的输出维度无法自动推断。
    #units:大于0的整数,代表该层的输出维度。
    model.add(Dense(units = 64, activation='relu'))
    
    # Hidden Layer
    model.add(Dense(units = 64, activation='relu'))
    
    # Output Layer
    model.add(Dense(units = 4, activation='softmax'))

    # loss = 'categorical_crossentropy'
    model.compile(optimizer = 'Adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
    return model
model = build_model()
# This is for one sample, i.e. one row
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv1d (Conv1D)              (None, 205, 64)           448       
_________________________________________________________________
batch_normalization (BatchNo (None, 205, 64)           256       
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 103, 64)           0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 103, 64)           24640     
_________________________________________________________________
batch_normalization_1 (Batch (None, 103, 64)           256       
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 52, 64)            0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 52, 64)            24640     
_________________________________________________________________
batch_normalization_2 (Batch (None, 52, 64)            256       
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 26, 64)            0         
_________________________________________________________________
flatten (Flatten)            (None, 1664)              0         
_________________________________________________________________
dense (Dense)                (None, 64)                106560    
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_2 (Dense)              (None, 4)                 260       
=================================================================
Total params: 161,476
Trainable params: 161,092
Non-trainable params: 384
_________________________________________________________________
from keras.utils.np_utils import to_categorical

def cv_model(clf, train_x, train_y, test_x, clf_name):
    folds = 4
    seed = 2021
    
    #k-交叉验证KFold
    # n_split:要划分的折数
    #shuffle: 每次都进行shuffle,测试集中折数的总和就是训练集的个数
    #random_state:随机状态 总结:对于那些本质上是随机的过程,我们有必要控制随机的状态,这样才能重复的展现相同的结果。
    #如果,对随机状态不加控制,那么实验的结果就无法固定,而是随机的显现。比喻的说一下,也不知道准不准确。
    # 一个容器中放置一定量的沙子,每次用手去抓沙子的时候,抓取的结果会受到抓取的力度、一只手抓还是两只手抓、手是干的或湿的等诸多因素的影响(将影响因素定为A={a,b,c,d,e,f,……})。
    #固定random_state后,每次抓取沙子时的影响因素将被固定为具体的某一组,这样每次抓取的沙子就具有了相同的状态。

    
    kf = KFold(n_splits=folds, shuffle=True, random_state=seed)
    #为预测做准备
    test = np.zeros((test_x.shape[0],4))
    test_x = test_x.iloc[:, :].values
    test_x = test_x.reshape(len(test_x),test_x.shape[1],1)    

    cv_scores = []
    

    onehot_encoder = OneHotEncoder(sparse=False)
    

    for i, (train_index, test_index) in enumerate(kf.split(x_train, y_train)):
        
        print('************************************ {} ************************************'.format(str(i+1)))
        
        x_kf_train, y_kf_train, x_kf_test, y_kf_test = x_train.iloc[train_index], y_train[train_index], x_train.iloc[test_index], y_train[test_index]

        
        if clf_name == "cnn":
            x_kf_train = x_kf_train.iloc[:, :].values
            x_kf_test = x_kf_test.iloc[:, :].values
            x_kf_train = x_kf_train.reshape(len(x_kf_train),x_kf_train.shape[1],1)
            x_kf_test = x_kf_test.reshape(len(x_kf_test),x_kf_test.shape[1],1)
            
            
            y_kf_train = to_categorical(y_kf_train)
            y_kf_test = to_categorical(y_kf_test)
            
            history = model.fit(x_kf_train,y_kf_train, epochs = 15, batch_size = 32, validation_data=(x_kf_test, y_kf_test))
            x_kf_test_pred = model.predict(x_kf_test)
            test_pred = model.predict(test_x)
            
    
      
            
        
        print("y_kf_test++++++:")  
        print(y_kf_test)

        print('预测的概率矩阵为test_pred:')
        print(test_pred)
        print("abs_sum++++++:")
        score=abs_sum(y_kf_test,  x_kf_test_pred)
        cv_scores.append(score)
        print("cv_scores+++++:")
        print(cv_scores)
        
#         test += test_pred
#         test=test/kf.n_splits
        # 将将要预测的带到

            


    return test_pred


def lgb_model(x_train, y_train, x_test):
    lgb_test = cv_model(lgb, x_train, y_train, x_test, "cnn")
    return lgb_test
lgb_test = lgb_model(x_train, y_train, x_test)
************************************ 1 ************************************
Epoch 1/15
1875/1875 [==============================] - 60s 31ms/step - loss: 0.2759 - accuracy: 0.8949 - val_loss: 0.1356 - val_accuracy: 0.9588
Epoch 2/15
1875/1875 [==============================] - 59s 32ms/step - loss: 0.0875 - accuracy: 0.9688 - val_loss: 0.0670 - val_accuracy: 0.9768
Epoch 3/15
1875/1875 [==============================] - 56s 30ms/step - loss: 0.0539 - accuracy: 0.9812 - val_loss: 0.0565 - val_accuracy: 0.9797
Epoch 4/15
1875/1875 [==============================] - 56s 30ms/step - loss: 0.0407 - accuracy: 0.9865 - val_loss: 0.0454 - val_accuracy: 0.9859
Epoch 5/15
1875/1875 [==============================] - 56s 30ms/step - loss: 0.0323 - accuracy: 0.9891 - val_loss: 0.0395 - val_accuracy: 0.9867
Epoch 6/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0233 - accuracy: 0.9924 - val_loss: 0.0342 - val_accuracy: 0.9895
Epoch 7/15
1875/1875 [==============================] - 56s 30ms/step - loss: 0.0232 - accuracy: 0.9923 - val_loss: 0.0394 - val_accuracy: 0.9868
Epoch 8/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0151 - accuracy: 0.9951 - val_loss: 0.0416 - val_accuracy: 0.9880
Epoch 9/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0168 - accuracy: 0.9952 - val_loss: 0.0564 - val_accuracy: 0.9827
Epoch 10/15
1875/1875 [==============================] - 55s 29ms/step - loss: 0.0196 - accuracy: 0.9939 - val_loss: 0.0274 - val_accuracy: 0.9926
Epoch 11/15
1875/1875 [==============================] - 56s 30ms/step - loss: 0.0115 - accuracy: 0.9960 - val_loss: 0.0267 - val_accuracy: 0.9938
Epoch 12/15
1875/1875 [==============================] - 56s 30ms/step - loss: 0.0087 - accuracy: 0.9973 - val_loss: 0.0472 - val_accuracy: 0.9862
Epoch 13/15
1875/1875 [==============================] - 59s 31ms/step - loss: 0.0097 - accuracy: 0.9967 - val_loss: 0.0334 - val_accuracy: 0.9923
Epoch 14/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0091 - accuracy: 0.9970 - val_loss: 0.0478 - val_accuracy: 0.9890
Epoch 15/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0133 - accuracy: 0.9961 - val_loss: 0.0329 - val_accuracy: 0.9912
y_kf_test++++++:
[[1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 ...
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]]
预测的概率矩阵为test_pred:
[[9.9999166e-01 8.3696723e-06 7.0094308e-09 1.5220067e-08]
 [6.5147565e-10 2.9474978e-09 1.0000000e+00 3.8487034e-15]
 [3.4868474e-20 3.0091077e-13 3.5588698e-16 1.0000000e+00]
 ...
 [9.6342439e-05 3.3096262e-06 9.9989974e-01 5.9694372e-07]
 [9.9999988e-01 1.6789036e-07 1.0092740e-08 1.7682024e-11]
 [8.7801713e-01 9.7556551e-07 2.5424310e-03 1.1943953e-01]]
abs_sum++++++:
cv_scores+++++:
[417.5972366333008]
************************************ 2 ************************************
Epoch 1/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0186 - accuracy: 0.9949 - val_loss: 0.0089 - val_accuracy: 0.9963
Epoch 2/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0116 - accuracy: 0.9965 - val_loss: 0.0138 - val_accuracy: 0.9955
Epoch 3/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0106 - accuracy: 0.9967 - val_loss: 0.0207 - val_accuracy: 0.9941
Epoch 4/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0093 - accuracy: 0.9971 - val_loss: 0.0101 - val_accuracy: 0.9964
Epoch 5/15
1875/1875 [==============================] - 57s 31ms/step - loss: 0.0070 - accuracy: 0.9978 - val_loss: 0.0196 - val_accuracy: 0.9949
Epoch 6/15
1875/1875 [==============================] - 57s 31ms/step - loss: 0.0073 - accuracy: 0.9977 - val_loss: 0.0124 - val_accuracy: 0.9969
Epoch 7/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0078 - accuracy: 0.9975 - val_loss: 0.0151 - val_accuracy: 0.9956
Epoch 8/15
1875/1875 [==============================] - 61s 32ms/step - loss: 0.0075 - accuracy: 0.9977 - val_loss: 0.0302 - val_accuracy: 0.9913
Epoch 9/15
1875/1875 [==============================] - 60s 32ms/step - loss: 0.0065 - accuracy: 0.9982 - val_loss: 0.0090 - val_accuracy: 0.9971
Epoch 10/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0065 - accuracy: 0.9982 - val_loss: 0.0263 - val_accuracy: 0.9945
Epoch 11/15
1875/1875 [==============================] - 57s 31ms/step - loss: 0.0070 - accuracy: 0.9978 - val_loss: 0.0103 - val_accuracy: 0.9970
Epoch 12/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0056 - accuracy: 0.9985 - val_loss: 0.0134 - val_accuracy: 0.9965
Epoch 13/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0061 - accuracy: 0.9983 - val_loss: 0.0128 - val_accuracy: 0.9963
Epoch 14/15
1875/1875 [==============================] - 59s 31ms/step - loss: 0.0069 - accuracy: 0.9981 - val_loss: 0.0295 - val_accuracy: 0.9904
Epoch 15/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0052 - accuracy: 0.9988 - val_loss: 0.0134 - val_accuracy: 0.9967
y_kf_test++++++:
[[1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 ...
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]]
预测的概率矩阵为test_pred:
[[9.99999762e-01 2.31557891e-07 1.64789016e-12 1.57000205e-10]
 [7.08220592e-13 3.05859255e-10 1.00000000e+00 1.08148854e-23]
 [5.37503151e-19 4.81650921e-20 5.44116887e-20 1.00000000e+00]
 ...
 [2.33657775e-03 2.78295577e-03 9.94879365e-01 1.02097715e-06]
 [9.99999523e-01 4.99625287e-07 2.50645789e-11 2.91075359e-08]
 [1.00000000e+00 1.97536454e-09 4.46538850e-09 6.42643183e-09]]
abs_sum++++++:
cv_scores+++++:
[417.5972366333008, 166.70497226715088]
************************************ 3 ************************************
Epoch 1/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0103 - accuracy: 0.9975 - val_loss: 0.0062 - val_accuracy: 0.9985
Epoch 2/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0042 - accuracy: 0.9988 - val_loss: 0.0129 - val_accuracy: 0.9963
Epoch 3/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0071 - accuracy: 0.9978 - val_loss: 0.0027 - val_accuracy: 0.9992
Epoch 4/15
1875/1875 [==============================] - 61s 32ms/step - loss: 0.0042 - accuracy: 0.9987 - val_loss: 0.0055 - val_accuracy: 0.9982
Epoch 5/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0061 - accuracy: 0.9984 - val_loss: 0.0116 - val_accuracy: 0.9963
Epoch 6/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0048 - accuracy: 0.9986 - val_loss: 0.0055 - val_accuracy: 0.9984
Epoch 7/15
1875/1875 [==============================] - 57s 31ms/step - loss: 0.0045 - accuracy: 0.9991 - val_loss: 0.0084 - val_accuracy: 0.9984
Epoch 8/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0062 - accuracy: 0.9983 - val_loss: 0.0036 - val_accuracy: 0.9989
Epoch 9/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0039 - accuracy: 0.9989 - val_loss: 0.0091 - val_accuracy: 0.9973
Epoch 10/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0054 - accuracy: 0.9986 - val_loss: 0.0167 - val_accuracy: 0.9957
Epoch 11/15
1875/1875 [==============================] - 57s 31ms/step - loss: 0.0061 - accuracy: 0.9986 - val_loss: 0.0188 - val_accuracy: 0.9967
Epoch 12/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0025 - accuracy: 0.9991 - val_loss: 0.0162 - val_accuracy: 0.9948
Epoch 13/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0050 - accuracy: 0.9986 - val_loss: 0.0081 - val_accuracy: 0.9983
Epoch 14/15
1875/1875 [==============================] - 60s 32ms/step - loss: 0.0047 - accuracy: 0.9988 - val_loss: 0.0143 - val_accuracy: 0.9962
Epoch 15/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0046 - accuracy: 0.9989 - val_loss: 0.0293 - val_accuracy: 0.9941
y_kf_test++++++:
[[1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 ...
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]]
预测的概率矩阵为test_pred:
[[9.9999940e-01 5.5716970e-07 4.2182435e-10 7.5162426e-15]
 [3.0866704e-10 2.2008441e-08 1.0000000e+00 3.8451365e-23]
 [3.2547221e-23 1.0347285e-20 5.9116691e-22 1.0000000e+00]
 ...
 [7.4469927e-04 4.4713984e-06 9.9902654e-01 2.2425935e-04]
 [1.0000000e+00 6.6102653e-12 9.1913356e-16 7.5038865e-26]
 [9.9999976e-01 2.0411268e-07 3.4282695e-11 1.1750135e-11]]
abs_sum++++++:
cv_scores+++++:
[417.5972366333008, 166.70497226715088, 286.96989250183105]
************************************ 4 ************************************
Epoch 1/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0064 - accuracy: 0.9982 - val_loss: 0.0071 - val_accuracy: 0.9980
Epoch 2/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0039 - accuracy: 0.9991 - val_loss: 0.0132 - val_accuracy: 0.9973
Epoch 3/15
1875/1875 [==============================] - 57s 30ms/step - loss: 0.0060 - accuracy: 0.9983 - val_loss: 0.0273 - val_accuracy: 0.9916
Epoch 4/15
1875/1875 [==============================] - 59s 31ms/step - loss: 0.0042 - accuracy: 0.9989 - val_loss: 0.0024 - val_accuracy: 0.9991
Epoch 5/15
1875/1875 [==============================] - 57s 31ms/step - loss: 0.0030 - accuracy: 0.9992 - val_loss: 0.0026 - val_accuracy: 0.9994
Epoch 6/15
1875/1875 [==============================] - 60s 32ms/step - loss: 0.0038 - accuracy: 0.9991 - val_loss: 0.0056 - val_accuracy: 0.9982
Epoch 7/15
1875/1875 [==============================] - 62s 33ms/step - loss: 0.0036 - accuracy: 0.9991 - val_loss: 0.0026 - val_accuracy: 0.9991
Epoch 8/15
1875/1875 [==============================] - 58s 31ms/step - loss: 0.0042 - accuracy: 0.9987 - val_loss: 0.0255 - val_accuracy: 0.9923
Epoch 9/15
1875/1875 [==============================] - 56s 30ms/step - loss: 0.0042 - accuracy: 0.9990 - val_loss: 0.0107 - val_accuracy: 0.9974
Epoch 10/15
1875/1875 [==============================] - 54s 29ms/step - loss: 0.0034 - accuracy: 0.9991 - val_loss: 0.0022 - val_accuracy: 0.9992
Epoch 11/15
1875/1875 [==============================] - 53s 29ms/step - loss: 0.0039 - accuracy: 0.9989 - val_loss: 0.0222 - val_accuracy: 0.9959
Epoch 12/15
1875/1875 [==============================] - 54s 29ms/step - loss: 0.0024 - accuracy: 0.9992 - val_loss: 0.0033 - val_accuracy: 0.9990
Epoch 13/15
1875/1875 [==============================] - 55s 29ms/step - loss: 0.0033 - accuracy: 0.9991 - val_loss: 0.0153 - val_accuracy: 0.9955
Epoch 14/15
1875/1875 [==============================] - 53s 28ms/step - loss: 0.0037 - accuracy: 0.9990 - val_loss: 0.0063 - val_accuracy: 0.9982
Epoch 15/15
1875/1875 [==============================] - 55s 29ms/step - loss: 0.0027 - accuracy: 0.9991 - val_loss: 0.0042 - val_accuracy: 0.9991
y_kf_test++++++:
[[1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 ...
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]]
预测的概率矩阵为test_pred:
[[1.0000000e+00 1.2987890e-12 4.3421242e-17 9.6314740e-25]
 [1.0664977e-16 1.0852620e-15 1.0000000e+00 3.7733482e-19]
 [2.0541892e-26 1.0776717e-29 6.0904610e-24 1.0000000e+00]
 ...
 [2.2533643e-10 1.9210296e-10 1.0000000e+00 6.5144643e-12]
 [1.0000000e+00 8.0374228e-11 8.2770508e-16 1.1548383e-20]
 [1.0000000e+00 3.1995117e-15 3.1809543e-14 2.6292750e-17]]
abs_sum++++++:
cv_scores+++++:
[417.5972366333008, 166.70497226715088, 286.96989250183105, 46.44265604019165]
temp=pd.DataFrame(lgb_test)
result=pd.read_csv('sample_submit.csv')
result['label_0']=temp[0]
result['label_1']=temp[1]
result['label_2']=temp[2]
result['label_3']=temp[3]
result.to_csv('submit_baseline_v3.0.csv',index=False)
submit_data=pd.read_csv('submit_baseline_v3.0.csv')
submit_data
idlabel_0label_1label_2label_3
01000001.000000e+001.298789e-124.342124e-179.631474e-25
11000011.066498e-161.085262e-151.000000e+003.773348e-19
21000022.054189e-261.077672e-296.090461e-241.000000e+00
31000031.000000e+005.491751e-154.521685e-221.850977e-25
41000041.000000e+002.030157e-109.926018e-131.117518e-13
..................
199951199951.000000e+002.684174e-088.052518e-125.653323e-13
199961199961.000000e+009.495173e-129.100256e-136.368752e-20
199971199972.253364e-101.921030e-101.000000e+006.514464e-12
199981199981.000000e+008.037423e-118.277051e-161.154838e-20
199991199991.000000e+003.199512e-153.180954e-142.629275e-17

20000 rows × 5 columns

for index,row in submit_data.iterrows():
    row_max = max(list(row)[1:])
    if row_max > 0.9:
        for i in range(1,5):
            if row[i]>0.9:
                submit_data.iloc[index,i] = 1
            else:
                submit_data.iloc[index,i] = 0
submit_data
idlabel_0label_1label_2label_3
01000001.00.00.00.0
11000010.00.01.00.0
21000020.00.00.01.0
31000031.00.00.00.0
41000041.00.00.00.0
..................
199951199951.00.00.00.0
199961199961.00.00.00.0
199971199970.00.01.00.0
199981199981.00.00.00.0
199991199991.00.00.00.0

20000 rows × 5 columns

submit_data.to_csv('submit_baseline_v3.0.1.csv',index=False)







V3分数降了一百多分!!!!
在这里插入图片描述







  • 2
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

羊老羊

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值