baseline_v2_changeModel(cnn):score:267.2897
# 2021.05.08
# lightgbm 模型更换成CNN 模型
# 本文原创 望赞鼓励,转载请说明出处.
仍存疑问:
dense层的作用,需要几层
如何来构造CNN
Datawhile三月选题:心跳信号分类
import os
import gc
import math
import pandas as pd
import numpy as np
import lightgbm as lgb
#import xgboost as xgb
from catboost import CatBoostRegressor
from sklearn.linear_model import SGDRegressor, LinearRegression, Ridge
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from tqdm import tqdm
import matplotlib.pyplot as plt
import time
import warnings
warnings.filterwarnings('ignore')
train = pd.read_csv('train.csv')
test=pd.read_csv('testA.csv')
train.head()
id | heartbeat_signals | label | |
---|---|---|---|
0 | 0 | 0.9912297987616655,0.9435330436439665,0.764677... | 0.0 |
1 | 1 | 0.9714822034884503,0.9289687459588268,0.572932... | 0.0 |
2 | 2 | 1.0,0.9591487564065292,0.7013782792997189,0.23... | 2.0 |
3 | 3 | 0.9757952826275774,0.9340884687738161,0.659636... | 0.0 |
4 | 4 | 0.0,0.055816398940721094,0.26129357194994196,0... | 2.0 |
def reduce_mem_usage(df):
start_mem = df.memory_usage().sum() / 1024**2
print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
for col in df.columns:
col_type = df[col].dtype
if col_type != object:
c_min = df[col].min()
c_max = df[col].max()
if str(col_type)[:3] == 'int':
if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
df[col] = df[col].astype(np.int8)
elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
df[col] = df[col].astype(np.int16)
elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
df[col] = df[col].astype(np.int32)
elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
df[col] = df[col].astype(np.int64)
else:
if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
df[col] = df[col].astype(np.float16)
elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
df[col] = df[col].astype(np.float32)
else:
df[col] = df[col].astype(np.float64)
else:
df[col] = df[col].astype('category')
end_mem = df.memory_usage().sum() / 1024**2
print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
return df
# 简单预处理
train_list = []
for items in train.values:
train_list.append([items[0]] + [float(i) for i in items[1].split(',')] + [items[2]])
train = pd.DataFrame(np.array(train_list))
train.columns = ['id'] + ['s_'+str(i) for i in range(len(train_list[0])-2)] + ['label']
train = reduce_mem_usage(train)
test_list=[]