pytorch搭建Lstm构建天池算法大赛---心跳信号分类预测baseline

比赛链接天池零基础入门数据挖掘-心跳信号分类预测

完整工程代码点击

这里直接搭建一个baseline,直接跑代码即可。

import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import train_test_split, GroupKFold, KFold
import numpy as np
import torch
from    torch import autograd
import cv2
import os
from tqdm import tqdm

torch.manual_seed(10)#固定每次初始化模型的权重

train = pd.read_csv('/零基础入门数据挖掘-心跳信号分类预测/train.csv')
testA = pd.read_csv('/零基础入门数据挖掘-心跳信号分类预测/testA.csv')
sample_submit = pd.read_csv('/零基础入门数据挖掘-心跳信号分类预测/sample_submit.csv')

train['heartbeat_signals'] = train['heartbeat_signals'].apply(lambda x : np.array(x.split(',')).astype('float32'))
train['label'] = train['label'].apply(lambda x : np.array(x).astype('int32'))

testA['heartbeat_signals'] = testA['heartbeat_signals'].apply(lambda x : np.array(x.split(',')).astype('float32'))

data = []
for val in train['heartbeat_signals'].values:
    data.append(val)
data = np.array(data)
targets = train['label'].values

data = data.reshape(data.shape[0],1,205)
#data = torch.from_numpy(data).to(torch.float32)#转换成tensor


test = []
for val in testA['heartbeat_signals'].values:
    test.append(val)
test = np.array(test)
test = test.reshape(test.shape[0],1,205)
test = torch.from_numpy(test).to(torch.float32)#转换成tensor

test_ids = testA['id']
test_pre = np.zeros([len(test),4])
# 对训练集进行切割,然后进行训练
#x_train,x_val,y_train,y_val = train_test_split(data,target,test_size=0.2)

'''
#-----------LSTM主要参数-------
 input_size – 输入的特征维度
 hidden_size – 隐状态的特征维度,也就是LSTM输出的节点数目
 num_layers – 层数(和时序展开要区分开)
 bias – 如果为False,那么LSTM将不会使用偏置,默认为True。
 batch_first – 如果为True,那么输入和输出Tensor的形状为(batch, seq_len, input_size)
 dropout – 如果非零的话,将会在RNN的输出上加个dropout,最后一层除外。
 bidirectional – 如果为True,将会变成一个双向RNN,默认为False。
'''
class Bi_Lstm(nn.Module):
    def __init__(self):
        super(Bi_Lstm,self).__init__() 
        self.lstm = nn.LSTM(input_size = 205, hidden_size = 200,num_layers = 3)#加了双向,输出的节点数翻2倍
        self.l1 = nn.Linear(200,500)#特征输入
        self.l2 = nn.ReLU()#激活函数
        self.l3 = nn.BatchNorm1d(500)#批标准化
        self.l4 = nn.Linear(500,250)
        self.l5 = nn.ReLU()
        self.l6 = nn.BatchNorm1d(250)
        self.l7 = nn.Linear(250,4)#输出4个节点
        self.l8 = nn.BatchNorm1d(4)
    def forward(self, x):
        out,_ = self.lstm(x)
        #选择最后一个时间点的output
        out = self.l1(out[:,-1,:])
        out = self.l2(out)
        out = self.l3(out)
        out = self.l4(out)
        out = self.l5(out)
        out = self.l6(out)
        out = self.l7(out)
        out = self.l8(out)
        return out


def SoftMax(x):
    return np.e**x/np.sum(np.e**x)

def Score_function(y_pre,y_true):
    score = 0
    y_pre = np.argmax(y_pre,axis=1)
    for i in range(len(y_pre)):
        if y_pre[i] != int(y_true[i]):
            score += 1.0
    return score
training_step = 5#迭代次数
batch_size = 512#每个批次的大小

kf = KFold(n_splits=5, shuffle=True, random_state=2021)#5折交叉验证
for fold, (train_idx, test_idx) in enumerate(kf.split(train, targets)):
    print('-'*15, '>', f'Fold {fold+1}', '<', '-'*15)
    #print(train_idx)
    x_train, x_val = data[train_idx], data[test_idx]
    y_train, y_val = targets[train_idx], targets[test_idx]

    model = Bi_Lstm()
    optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
    loss_func = nn.CrossEntropyLoss()#多分类的任务

    model.train()#模型中有BN和Droupout一定要添加这个说明
    

    #开始迭代
    for step in range(training_step):
        print('step=',step)
        M_train = len(x_train)
        M_val = len(x_val)
        L_val = -batch_size
        with tqdm(np.arange(0,M_train,batch_size), desc='Training...') as tbar:
            for index in tbar:
                L = index
                R = min(M_train,index+batch_size)
                L_val += batch_size
                L_val %= M_val
                R_val = min(M_val,L_val + batch_size)
                #-----------------训练内容------------------
                train_pre = model(torch.from_numpy(x_train[L:R,:]).to(torch.float32))     # 喂给 model训练数据 x, 输出预测值
                train_loss = loss_func(train_pre, torch.from_numpy(y_train[L:R]).to(torch.long))
                val_pre = model(torch.from_numpy(x_val[L_val:R_val,:]).to(torch.float32))#验证集也得分批次,不然数据量太大内存爆炸
                val_loss = loss_func(val_pre, torch.from_numpy(y_val[L_val:R_val]).to(torch.long))
                #----------- -----计算准确率----------------
                train_acc = np.sum(np.argmax(np.array(train_pre.data),axis=1) == y_train[L:R])/(R-L)
                val_acc = np.sum(np.argmax(np.array(val_pre.data),axis=1) == y_val[L_val:R_val])/(R_val-L_val)

                #---------------打印在进度条上--------------
                tbar.set_postfix(train_loss=float(train_loss.data),train_acc=train_acc,val_loss=float(val_loss.data),val_acc=val_acc)
                tbar.update()  # 默认参数n=1,每update一次,进度+n

                #-----------------反向传播更新---------------
                optimizer.zero_grad()   # 清空上一步的残余更新参数值
                train_loss.backward()         # 以训练集的误差进行反向传播, 计算参数更新值
                optimizer.step()        # 将参数更新值施加到 net 的 parameters 上
            val_pre = np.array(model(torch.from_numpy(x_val).to(torch.float32)).data)
            y_pre = []
            for val in val_pre:
                y_pre.append(SoftMax(val))
            y_pre = np.array(y_pre)
            print('val_score=',Score_function(y_pre,y_val))
    pre = np.array(model(test).data)
    soft_pre = []
    for val in pre:
        soft_pre.append(SoftMax(val))
    test_pre += np.array(soft_pre)

    del model#删除原来的模型

test_pre /= 5
fp = open('天池submission.csv','w',encoding='utf-8')
fp.writelines('id,label_0,label_1,label_2,label_3\n')
for i in range(len(test_ids)):
    fp.writelines(str(int(test_ids[i])))
    for j in range(4):
        fp.writelines(','+str(test_pre[i][j]))
    fp.writelines('\n')
fp.close()

粗略的跑了下,分数为1396,也没做啥特征工程,也没调参,模型结构也比较简单,后面学习的同学可以自己调整下模型参数和结构。

在这里插入图片描述

  • 3
    点赞
  • 33
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值