导入依赖库
import pandas as pd
import numpy as np
import torch
from torch. utils. data import Dataset
from torch. utils. data import DataLoader
import torch. optim as optim
数据预处理
def labels2id ( labels) :
target_id = [ ]
target_labels = [ 'Class_1' , 'Class_2' , 'Class_3' , 'Class_4' , 'Class_5' , 'Class_6' , 'Class_7' , 'Class_8' , 'Class_9' ]
for label in labels:
target_id. append( target_labels. index( label) )
return target_id
class OttogroupDataset ( Dataset) :
def __init__ ( self, filepath) :
data = pd. read_csv( filepath)
labels = data[ 'target' ]
self. len = data. shape[ 0 ]
self. x_data = torch. tensor( np. array( data) [ : , 1 : - 1 ] . astype( float ) )
self. y_data = labels2id( labels)
def __getitem__ ( self, index) :
return self. x_data[ index] , self. y_data[ index]
def __len__ ( self) :
return self. len
train_dataset = OttogroupDataset( '../input/otto-group-product-classification-challenge/train.csv' )
train_loader= DataLoader( train_dataset,
shuffle= True ,
batch_size= 64 )
搭建网络
class Net ( torch. nn. Module) :
def __init__ ( self) :
super ( Net, self) . __init__( )
self. l1 = torch. nn. Linear( 93 , 64 )
self. l2 = torch. nn. Linear( 64 , 32 )
self. l3 = torch. nn. Linear( 32 , 16 )
self. l4 = torch. nn. Linear( 16 , 9 )
self. relu = torch. nn. ReLU( )
def forward ( self, x) :
x = self. relu( self. l1( x) )
x = self. relu( self. l2( x) )
x = self. relu( self. l3( x) )
return self. l4( x)
def predict ( self, x) :
with torch. no_grad( ) :
x = self. relu( self. l1( x) )
x = self. relu( self. l2( x) )
x = self. relu( self. l3( x) )
x = self. relu( self. l4( x) )
_, predicted = torch. max ( x, dim= 1 )
y = pd. get_dummies( predicted)
return y
model = Net( )
优化器和损失函数
criterion = torch. nn. CrossEntropyLoss( )
optimizer = optim. SGD( model. parameters( ) , lr = 0.01 , momentum= 0.5 )
跑
训练函数
def train ( epoch) :
running_loss = 0.0
for batch_idx , data in enumerate ( train_loader) :
inputs, target = data
inputs = inputs. float ( )
optimizer. zero_grad( )
outputs = model( inputs)
loss = criterion( outputs , target)
loss. backward( )
optimizer. step( )
running_loss += loss. item( )
if batch_idx % 300 == 299 :
print ( '[%d,%5d] loss:%.3f' % ( epoch+ 1 , batch_idx+ 1 , running_loss/ 300 ) )
running_loss = 0.0
跑训练集
if __name__ == '__main__' :
for epoch in range ( 50 ) :
train( epoch)
跑测试集
def predict_save ( ) :
test_data = pd. read_csv( '../input/otto-group-product-classification-challenge/test.csv' )
test_inputs = torch. tensor( np. array( test_data) [ : , 1 : ] . astype( float ) )
out = model. predict( test_inputs. float ( ) )
labels= [ 'Class_1' , 'Class_2' , 'Class_3' , 'Class_4' , 'Class_5' , 'Class_6' , 'Class_7' , 'Class_8' , 'Class_9' ]
out. columns = labels
out. insert( 0 , 'id' , test_data[ 'id' ] )
output = pd. DataFrame( out)
output. to_csv( 'my_predict.csv' , index= False )
return output
predict_save( )