为了完成实验三,找了个相近的例子去学习。来自微信公众号CodeInHand中一篇原创文章的学习。也可参考以下两篇文章:
代码:
import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision import datasets
import time
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split#分割数据集
import numpy as np
def dataprocess():
#dat.shape=(1000, 25)1000组数据 24个属性 1个类别
dat = np.loadtxt(open(r"E:\\1desktop\\now\\2019春季-安全系统实验\\实验3\\german.txt", "r"))
N, L = dat.shape
#N=1000,L=25
for j in range(L-1):#j~【0-24)
if j==0:
print(dat[:, j])
print('\n\n')
meanVal = np.mean(dat[:, j])
stdVal = np.std(dat[:, j])
dat[:,j] = (dat[:, j]-meanVal)/stdVal
return dat
def genTrainTest(dat):
N,L = dat.shape
np.random.shuffle(dat)#将多维数组打乱
traindat = dat[:900, :L-1]
# print('traindat:\n')
# print(traindat)
# print(traindat.shape)
# print('\n')
trainlable = dat[:900, L-1]-1
# print('trainlable:\n')
# print(trainlable)
# print(trainlable.shape)
# print('\n')
testdat = dat[900:, :L-1]
# print('testdat:\n')
# print(testdat)
# print(testdat.shape)
# print('\n')
testlabel = dat[900:,L-1]-1
return traindat, trainlable, testdat, testlabel
#逻辑回归
class LRmodule(nn.Module):
"""docstring for ClassName"""
def __init__(self, input_size):
super(LRmodule, self).__init__()
self.input_size = input_size
self.fc = nn.Linear(input_size, 2)
def forward(self, x):
datout = self.fc(x)
return torch.sigmoid(datout)
def Accuracy(pred, label):
pred = pred.cpu().data.numpy()
label = label.cpu().data.numpy()
test_np = (np.argmax(pred, 1) == label)
test_np = np.float32(test_np)
return np.mean(test_np)
if __name__ == "__main__":
#dataName = "german.txt"
data = dataprocess()
traindat, trainlable, testdat, testlabel = genTrainTest(data)
input_size = traindat.shape[1]
model = LRmodule(input_size)
Lossfunc = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.01)
model.train()
epochs = 2000
for e in range (epochs):
indata = Variable(torch.from_numpy(traindat).float())
inlabel = Variable(torch.from_numpy(trainlable).long())
outs = model(indata)
loss = Lossfunc(outs, inlabel)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (e+1)%100 == 0:
print('Epoch [{}/{}], Loss:{:.4f}'.format(e+1, epochs, loss.item()))
model.eval()
indata = Variable(torch.from_numpy(testdat).float())
inlabel = Variable(torch.from_numpy(testlabel).long())
testouts = model(indata)
Acc = Accuracy(testouts, inlabel)
print ('Test Accuracy', Acc)
其中dataprocess()函数:
意义:数据中心化和标准化在回归分析中是取消由于量纲不同、自身变异或者数值相差较大所引起的误差。
原理:数据标准化:是指数值减去均值,再除以标准差;
数据中心化:是指变量减去它的均值。
目的:通过中心化和标准化处理,得到均值为0,标准差为1的服从标准正态分布的数据。