MNIST数据
这里大家都知道MNIST数据集是啥,所以不做过多说明,只放代码:
import numpy as np
import matplotlib.pyplot as plt
import torch
from struct import unpack
import gzip
import torch.nn as nn
import time
np.random.seed(1234)
def __read_image(path):
with gzip.open(path, 'rb') as f:
magic, num, rows, cols = unpack('>4I', f.read(16))
img=np.frombuffer(f.read(), dtype=np.uint8).reshape(num, 28*28)
return img
def __read_label(path):
with gzip.open(path, 'rb') as f:
magic, num = unpack('>2I', f.read(8))
lab = np.frombuffer(f.read(), dtype=np.uint8)
# print(lab[1])
return lab
def __normalize_image(image):
img = image.astype(np.float32) / 255.0
return img
def __one_hot_label(label):
lab = np.zeros((label.shape[0], 10))
for i, row in enumerate(lab):
row[label[i]] = 1
return lab
def load_mnist(x_train_path, y_train_path, x_test_path, y_test_path, normalize=True, one_hot=False):
image = {
'train' : __read_image(x_train_path),
'test' : __read_image(x_test_path)
}
label = {
'train' : __read_label(y_train_path),
'test' : __read_label(y_test_path)
}
if normalize:
for key in ('train', 'test'):
image[key] = __normalize_image(image[key])
if one_hot:
for key in ('train', 'test'):
label[key] = __one_hot_label(label[key])
return (image['train'], label['train']), (image['test'], label['test'])
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
self.module = nn.Sequential(
nn.Linear(784,500),
nn.ReLU(),
nn.Linear(500,300),
nn.ReLU(),
nn.Linear(300,10),
nn.ReLU()
)
def forward(self,x):
x = self.module(x)
return x
def pred(net,images):
return net.forward(images).argmax(dim = 1)
def SOmax(X,y):
y = torch.tensor(__one_hot_label(y))
return (torch.exp(X)*y).sum(1,keepdims = True)/torch.exp(X).sum(1,keepdims = True)
'''
def Loss(net,images,labels):
logits = net.forward(images)
loss = -torch.log(SOmax(logits,labels)).mean()
return loss
'''
def Loss(net,images,labels):
criteon = nn.CrossEntropyLoss()
logits = net.forward(images)
pred_label = logits.argmax(dim = 1)
out = criteon(logits,labels)
return out
def pred_acc(net,images,labels):
logits = net.forward(images)
pred_label = logits.argmax(dim = 1)
return pred_label.eq(labels).sum()
def Train(net,train_images,train_labels,batch,epoch,optim):
m = 0
iter_num = int(epoch*train_images.shape[0]/batch)
train_acc = 0
for i in range(iter_num):
x = i*batch%len(train_images)
y = x + batch
for k in range(1):
loss = Loss(net,train_images[x:y],train_labels[x:y])
optim.zero_grad()
loss.backward()
optim.step()
if i%20 == 0:
print('the iteration:%d,the batch_loss:%.3e,the trained photo:%d'%(i + 20,loss.item(),y))
elif (i + 1)*batch%len(train_images) == 0:
train_acc = pred_acc(net,train_images,train_labels)
m = m + 1
acc = train_acc.item()/train_images.shape[0]
print('the epoch:%d,the acc:%.2f'%(m,acc))
train_acc = 0
def test_acc(net,images,labels):
logits = net.forward(images)
pred_label = logits.argmax(dim = 1)
return pred_label.eq(labels).sum().item()/images.shape[0]
x_train_path = 'D:\\python\\train-images-idx3-ubyte.gz'
y_train_path = 'D:\\python\\train-labels-idx1-ubyte.gz'
x_test_path = 'D:\\python\\t10k-images-idx3-ubyte.gz'
y_test_path = 'D:\\python\\t10k-labels-idx1-ubyte.gz'
(x_train,y_train),(x_test,y_test)=load_mnist(x_train_path, y_train_path, x_test_path, y_test_path)
tic = time.time()
train_images = torch.tensor(x_train).float()
train_labels = torch.tensor(y_train).long()
test_images = torch.tensor(x_test).float()
test_labels = torch.tensor(y_test).long()
net = Net()
#optim = torch.optim.SGD(net.parameters(),lr = 1e-3,momentum = 0.78)
optim = torch.optim.Adam(net.parameters(),lr = 1e-3,betas=(0.9,0.999))
batch = 1000
epoch = 8
Train(net,train_images,train_labels,batch,epoch,optim)
ela = time.time() - tic
print('the time:%.2f,the test_acc:%.2f'%(ela,test_acc(net,test_images,test_labels)))
covertype
这个数据集一共有7类,而且是1一直到7,因此处理之前记得先把标签处理一下:
import numpy as np
import matplotlib.pyplot as plt
import torch
from struct import unpack
import gzip
import os
import torch.nn as nn
import time
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
self.module = nn.Sequential(
nn.Linear(54,16),
nn.Dropout(0.5),
nn.LeakyReLU(inplace = True),
nn.Linear(16,16),
nn.Dropout(0.5),
nn.LeakyReLU(inplace = True),
nn.Linear(16,7),
nn.LeakyReLU(inplace = True)
)
def forward(self,x):
x = self.module(x)
return x
def pred(net,images):
return net.forward(images).argmax(dim = 1)
def Loss(net,images,labels):
criteon = nn.CrossEntropyLoss()
logits = net.forward(images)
pred_label = logits.argmax(dim = 1)
out = criteon(logits,labels)
return out
def pred_acc(net,images,labels):
logits = net.forward(images)
pred_label = logits.argmax(dim = 1)
return pred_label.eq(labels).sum()
def Train(net,train_images,train_labels,batch,epoch,optim):
m = 0
iter_num = int(epoch*train_images.shape[0]/batch)
train_acc = 0
for i in range(iter_num):
x = i*batch%len(train_images)
y = x + batch
loss = Loss(net,train_images[x:y],train_labels[x:y])
train_acc += pred_acc(net,train_images[x:y],train_labels[x:y])
optim.zero_grad()
loss.backward()
optim.step()
if i%20 == 0:
print('the iteration:%d,the batch_loss:%.3e,the trained photo:%d'%(i + 20,loss.item(),y))
elif (i + 1)*batch%len(train_images) == 0:
m = m + 1
acc = train_acc.item()/train_images.shape[0]
print('the epoch:%d,the acc:%.2f'%(m,acc))
train_acc = 0
def test_acc(net,images,labels):
logits = net.forward(images)
pred_label = logits.argmax(dim = 1)
return pred_label.eq(labels).sum().item()/images.shape[0]
cover_path = 'C:\\Users\\Desktop\\covertype\\cover.data'
data = np.loadtxt('C:\\Users\\Desktop\\covertype\\cover.data',delimiter = ',')
tic = time.time()
train_num = 500000
train_images = torch.tensor(data[:train_num,:-1]).float()
train_labels = torch.tensor(data[:train_num,-1] - 1).long()
test_images = torch.tensor(data[train_num:,:-1]).float()
test_labels = torch.tensor(data[train_num:,-10] - 1).long()
net = Net()
optim = torch.optim.SGD(net.parameters(),lr = 5e-3,momentum = 0.78)
batch = 10000
epoch = 8
Train(net,train_images,train_labels,batch,epoch,optim)
ela = time.time() - tic
print('the time:%.2f,the test_acc:%.2f'%(ela,test_acc(net,test_images,test_labels)))
下面是手动实现SGD算法代码
import numpy as np
from struct import unpack
import gzip
import random
import matplotlib.pyplot as plt
import time
np.random.seed(1234)
def __read_image(path):
with gzip.open(path, 'rb') as f:
magic, num, rows, cols = unpack('>4I', f.read(16))
img=np.frombuffer(f.read(), dtype=np.uint8).reshape(num, 28*28)
return img
def __read_label(path):
with gzip.open(path, 'rb') as f:
magic, num = unpack('>2I', f.read(8))
lab = np.frombuffer(f.read(), dtype=np.uint8)
# print(lab[1])
return lab
def __normalize_image(image):
img = image.astype(np.float32) / 255.0
return img
def newlabel(label):
lab = np.zeros([label.size,1])
for i in range(label.size):
if label[i]%2 == 0:
lab[i,0] = -1
else:
lab[i,0] = label[i]%2
return lab
def load_mnist(x_train_path, y_train_path, x_test_path, y_test_path, normalize=True, one_hot=True):
image = {
'train' : __read_image(x_train_path),
'test' : __read_image(x_test_path)
}
label = {
'train' : __read_label(y_train_path),
'test' : __read_label(y_test_path)
}
if normalize:
for key in ('train', 'test'):
image[key] = __normalize_image(image[key])
if one_hot:
for key in ('train', 'test'):
label[key] = newlabel(label[key])
return (image['train'], label['train']), (image['test'], label['test'])
def accuracy(x,y,w):
pred = x@w
leq = (pred <= 0).astype('float32')
geq = (pred >0).astype('float32')
label = -leq + geq
acc = (label == y).astype('float32')
return acc
def loss(x,y,w,lam):
return np.log(1 + np.exp(-y*x@w)).mean() + lam*np.linalg.norm(w,1)
def grad_loss(x,y,w,lam):
return (-y*x*np.exp(-y*x@w)/(1 + np.exp(-y*x@w))).mean(0).reshape(-1,1) + lam*np.sign(w)
def grad_f(x,y,w):
return (-y*x*np.exp(-y*x@w)/(1 + np.exp(-y*x@w))).mean(0).reshape(-1,1)
def SGD(w0,x,y,lam,epoch):
eps = 1e-8
dim = x.shape[0]
w = w0.copy()
print('the init norm:%.3e'%(np.linalg.norm(grad_loss(x,y,w0,lam))))
m = 0
alpha = 1e-1
while m < epoch:
sk = random.choice(range(0,dim))
d = - grad_loss(x[sk:sk + 1,:],y[sk:sk + 1,:],w0,lam)#这个重点注意
#alpha = gold(loss,grad_loss,x,y,w0,lam,d)
w = w0 + alpha*d
rho = np.linalg.norm(w - w0)
if rho < eps:
break
else:
w0 = w
m = m + 1
if (m + 1)%20 == 0:
alpha = alpha*0.618
w_grad = np.linalg.norm(grad_loss(x,y,w,lam))
print('the iteration:%d,the err:%.3e,the grad:%.3e,the value:%.3e'%(m + 1,rho,w_grad,loss(x,y,w,lam)))
return w
def SGDmom(w0,x,y,lam,epoch):
eps = 1e-8
dim = x.shape[0]
w = np.zeros_like(w0)
print('the init norm:%.3e'%(np.linalg.norm(grad_loss(x,y,w0,lam))))
v0 = 0
m = 0
alpha = 1e-1
while m < epoch:
sk = random.choice(range(0,dim))
diraction = -grad_loss(x[sk:sk + 1,:],y[sk:sk + 1,:],w0,lam)
v1 = (m + 2)*v0/(m + 5) + alpha*diraction
w = w0 + v1
rho = np.linalg.norm(w - w0)
if rho < eps:
break
else:
w0 = w
m = m + 1
if (m + 1)%40 == 0:
alpha = alpha*0.618
w_grad = np.linalg.norm(grad_loss(x,y,w,lam))
print('the iteration:%d,the err:%.3e,the grad:%.3e,the value:%.3e'%(m + 1,rho,w_grad,loss(x,y,w,lam)))
return w
def Nesterov(w0,x,y,lam,epoch):
err = [];step = []
eps = 1e-5
dim = x.shape[0]
w = np.zeros_like(w0)
v0 = 0
m = 0
alpha = 1e-1
#print('the no regular norm:%.2e'%(np.linalg.norm(grad_f(x,y,w))))
#print('the init norm:%.3e'%(np.linalg.norm(grad_loss(x,y,w0,lam))))
while m < epoch:
sk = random.choice(range(0,dim))
diraction = grad_loss(x[sk:sk + 1,:],y[sk:sk + 1,:],w0 + (m + 2)*v0/(m + 5),lam)
v1 = (m + 2)*v0/(m + 5) - alpha*diraction
w = w0 + v1
rho = np.linalg.norm(w - w0)
if rho < eps:
break
else:
w0 = w
m = m + 1
if (m + 1)%50 == 0:
alpha = alpha*0.5
acc = accuracy(x,y,w).sum()/x.shape[0]
step.append(m + 1)
err.append(1 - acc)
print('the iteration:%d,the err:%.3e,the train acc:%.3e,the value:%.3e'%(m + 1,rho,acc,loss(x,y,w,lam)))
return w,step,err
def Adagrad(w0,x,y,lam,epoch):
err = [];step = []
eps = 1e-8
dim = x.shape[0]
w = w0.copy()
#print('the no regular norm:%.2e'%(np.linalg.norm(grad_f(x,y,w0))))
#print('the init norm:%.3e'%(np.linalg.norm(grad_loss(x,y,w0,lam))))
m = 0
sk = random.choice(range(0,dim))
d0 = - grad_loss(x[sk:sk + 1,:],y[sk:sk + 1,:],w0,lam)
g0 = d0*d0
alpha = 1e-1
while m < epoch:
w = w0 + alpha*d0/np.sqrt(g0 + eps)
sk = random.choice(range(0,dim))
d1 = - grad_loss(x[sk:sk + 1,:],y[sk:sk + 1,:],w,lam)
g1 = g0 + d1*d1
rho = np.linalg.norm(w - w0)
if rho < eps:
break
else:
w0 = w
d0 = d1
g0 = g1
m = m + 1
if (m + 1)%50 == 0:
alpha = alpha*0.5
acc = accuracy(x,y,w).sum()/x.shape[0]
step.append(m + 1)
err.append(1 - acc)
print('the iteration:%d,the err:%.3e,the train acc:%.3e,the value:%.3e'%(m + 1,rho,acc,loss(x,y,w,lam)))
return w,step,err
x_train_path = 'C:\\Users\\Desktop\\MNIST_data\\train-images-idx3-ubyte.gz'
y_train_path = 'C:\\Users\\Desktop\\MNIST_data\\train-labels-idx1-ubyte.gz'
x_test_path = 'C:\\Users\\Desktop\\MNIST_data\\t10k-images-idx3-ubyte.gz'
y_test_path = 'C:\\Users\\Desktop\\MNIST_data\\t10k-labels-idx1-ubyte.gz'
(x_train,y_train),(x_test,y_test)=load_mnist(x_train_path, y_train_path, x_test_path, y_test_path)
wx = np.random.randn(784,1)
w0 = wx/max(abs(wx[:,0]))
x = x_train
y = y_train
epoch = 300
col = ['r','b','k','g']
dot = ['r*','bo','k.','g']
m = 0
for lam in [10,1,0.1,0.001]:
st = time.time()
#w,step,err = Nesterov(w0,x,y,lam,epoch)
w,step,err = Adagrad(w0,x,y,lam,epoch)
#w = SGD(w0,x,y,lam,epoch)
acc = accuracy(x_test,y_test,w).sum()/x_test.shape[0]
ela = time.time() - st
plt.plot(step,err,col[m],label = 'lambda:%.2e'%(lam))
plt.plot(step,err,dot[m])
m = m + 1
plt.xlabel('iteration')
plt.ylabel('train error')
plt.title('mnist:Adagrad')
plt.legend(loc = 'upper right')
print('now,the lambda:%.2e'%(lam))
print('the iteration:%d,the test acc:%.3e,the train time:%.2f'%(epoch,acc,ela))
print('---------------------------------')
plt.savefig('mnistA.jpg')
def _label(data):
lab = np.zeros([data.shape[0],1])
for i in range(data.shape[0]):
if data[i,-1]%2 == 0:
lab[i,0] = -1
else:
lab[i,0] = 1
return lab.reshape(-1,1)
np.random.seed(1234)
def loss(x,y,w,lam):
return np.log(1 + np.exp(-y*x@w)).mean() + lam*np.linalg.norm(w,1)
def grad_loss(x,y,w,lam):
return (-y*x*np.exp(-y*x@w)/(1 + np.exp(-y*x@w))).mean(0).reshape(-1,1) + lam*np.sign(w)
def grad_f(x,y,w):
return (-y*x*np.exp(-y*x@w)/(1 + np.exp(-y*x@w))).mean(0).reshape(-1,1)
def accuracy(x,y,w):
pred = x@w
leq = (pred <= 0).astype('float32')
geq = (pred >0).astype('float32')
label = -leq + geq
acc = (label == y).astype('float32')
return acc
def Nesterov(w0,x,y,lam,epoch):
err = [];step = []
eps = 1e-5
dim = x.shape[0]
w = np.zeros_like(w0)
v0 = 0
m = 0
alpha = 1e-1
#print('the no regular norm:%.2e'%(np.linalg.norm(grad_f(x,y,w))))
#print('the init norm:%.3e'%(np.linalg.norm(grad_loss(x,y,w0,lam))))
while m < epoch:
sk = random.choice(range(0,dim))
diraction = grad_loss(x[sk:sk + 1,:],y[sk:sk + 1,:],w0 + (m + 2)*v0/(m + 5),lam)
v1 = (m + 2)*v0/(m + 5) - alpha*diraction
w = w0 + v1
rho = np.linalg.norm(w - w0)
if rho < eps:
break
else:
w0 = w
m = m + 1
if (m + 1)%50 == 0:
alpha = alpha*0.5
acc = accuracy(x,y,w).sum()/x.shape[0]
step.append(m + 1)
err.append(1 - acc)
print('the iteration:%d,the err:%.3e,the train acc:%.3e,the value:%.3e'%(m + 1,rho,acc,loss(x,y,w,lam)))
return w,step,err
def Adagrad(w0,x,y,lam,epoch):
err = [];step = []
eps = 1e-8
dim = x.shape[0]
w = w0.copy()
#print('the no regular norm:%.2e'%(np.linalg.norm(grad_f(x,y,w0))))
#print('the init norm:%.3e'%(np.linalg.norm(grad_loss(x,y,w0,lam))))
m = 0
sk = random.choice(range(0,dim))
d0 = - grad_loss(x[sk:sk + 1,:],y[sk:sk + 1,:],w0,lam)
g0 = d0*d0
alpha = 1e-1
while m < epoch:
w = w0 + alpha*d0/np.sqrt(g0 + eps)
sk = random.choice(range(0,dim))
d1 = - grad_loss(x[sk:sk + 1,:],y[sk:sk + 1,:],w,lam)
g1 = g0 + d1*d1
rho = np.linalg.norm(w - w0)
if rho < eps:
break
else:
w0 = w
d0 = d1
g0 = g1
m = m + 1
if (m + 1)%50 == 0:
alpha = alpha*0.5
acc = accuracy(x,y,w).sum()/x.shape[0]
step.append(m + 1)
err.append(1 - acc)
print('the iteration:%d,the err:%.3e,the train acc:%.3e,the value:%.3e'%(m + 1,rho,acc,loss(x,y,w,lam)))
return w,step,err
st = time.time()
train_num = 500000
y_train = _label(data)[:train_num,:]
x_train = (data[:,:-1])[:train_num,:]
y_test = _label(data)[train_num:,:]
x_test = (data[:,:-1])[train_num:,:]
wx = np.random.randn(54,1)
w0 = wx/max(abs(wx[:,0]))
x = x_train/1e4
y = y_train
epoch = 300
col = ['r','b','k','g']
dot = ['r*','bo','k.','g']
m = 0
for lam in [10,1,0.1,0.001]:
st = time.time()
w,step,err = Nesterov(w0,x,y,lam,epoch)
#w,step,err = Adagrad(w0,x,y,lam,epoch)
#w = SGD(w0,x,y,lam,epoch)
acc = accuracy(x_test,y_test,w).sum()/x_test.shape[0]
ela = time.time() - st
plt.plot(step,err,col[m],label = 'lambda:%.2e'%(lam))
plt.plot(step,err,dot[m])
m = m + 1
plt.xlabel('iteration')
plt.ylabel('train error')
plt.title('covertype:Nesterov')
plt.legend(loc = 'upper right')
print('now,the lambda:%.2e'%(lam))
print('the iteration:%d,the test acc:%.3e,the train time:%.2f'%(epoch,acc,ela))
print('---------------------------------')
plt.savefig('coverN.jpg')