1. Pytorch的优势
1)可以使用GPU加速运算
2)autograd.grad()求导函数,自动求导
3)常用的网络层,如nn.Linear ; nn.Conv2d;nn.Relu; nn.Softmax; nn.MSE
2. 查看pytorch,cuda版本
import torch
print(torch.cuda.is_available())
print(torch.version.cuda)
print(torch.__version__)
3. 回归问题
Linear Regression(预测值为实数)、
Logistic Regression(预测值在[0,1],加了一个sigmoid,二分类)、
Classification(多分类,总概率为1)
3.1 梯度下降
▽x为在x处的导数值,lr为学习率
最常用的:SGD、Adam等
3.2 线性回归(例:求解一个实际的线性方程)
import numpy as np
#compute loss
def compute_error_for_line_given_points(b,w,points):
totalError = 0
for i in range(0,len(points)):
x = points[i,0]
y = points[i,1]
totalError += (y-(w*x+b))**2
return totalError/float(len(points))
#compute gradient
def step_gradient(b_current,w_current,points,lr):
b_gradient = 0
w_gradient = 0
N = float(len(points))
for i in range(0, len(points)):
x = points[i, 0]
y = points[i, 1]
b_gradient += (2/N)*(w_current*x+b_current-y)
w_gradient += (2/N)*x*(w_current*x+b_current-y)
new_b = b_current - lr * b_gradient
new_w = w_current - lr * w_gradient
return [new_b,new_w]
# 迭代
def gradient_descent_runner(points,starting_b,starting_w,lr,num_iteration):
b = starting_b
w = starting_w
for i in range(num_iteration):
b,m = step_gradient(b, w, np.array(points),lr)
return [b,w]
#主程序
def run():
points = np.genfromtxt("data_5.csv", delimiter=",")
learning_rate = 0.0001
initial_b = 0 # initial y-intercept guess
initial_w = 0 # initial slope guess
num_iterations = 1000
print("Starting gradient descent at b = {0}, w = {1}, error = {2}"
.format(initial_b, initial_w,
compute_error_for_line_given_points(initial_b, initial_w, points))
)
print("Running...")
[b, w] = gradient_descent_runner(points, initial_b, initial_w, learning_rate, num_iterations)
print("After {0} iterations b = {1}, w= {2}, error = {3}".
format(num_iterations, b, w,
compute_error_for_line_given_points(b, w, points))
)
if __name__ == '__main__':
run()
'''result
Starting gradient descent at b = 0, w = 0, error = 5565.107834483211
Running...
After 1000 iterations b = 13.185818994067716, w = 0, error = 3820.831234776982
'''
3.3 分类问题(例:手写数字识别)
一个数字图片为28×28的图片(MINIST数据集),flatten后为[1, 784]的向量
用3个线性函数完成:
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
import torchvision
from matplotlib import pyplot as plot
from pytorch9_utils import plot_image, plot_curve, one_hot
batch_size = 512 #一次处理的图片数量
######################## step1. load dataset########################
train_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST('mnist_data', train=True, download=True,#使用mnist样本库,且在网上下载mnist
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),#原来格式为numpy,转为tensor
torchvision.transforms.Normalize( #原数字图片的value是0和1,用一个正则化,将它变为0附近的数
(0.1307,), (0.3081,)) #更加适合深度学习计算
])),
batch_size=batch_size, shuffle=True)#shuffle加载图片的时候做随机打散
test_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST('mnist_data/', train=False, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,))
])),
batch_size=batch_size, shuffle=False)
x, y = next(iter(train_loader))
print(x.shape,y.shape)
plot_image(x,y,'image sample')
###########################step2: build a network#########################
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# xw+b
self.fc1 = nn.Linear(28*28, 256)#输入28*28,输出d1为1*256,256和64都是经验据决定
self.fc2 = nn.Linear(256, 64)
self.fc3 = nn.Linear(64, 10) #10分类,最后的输出为10
def forward(self, x): #计算过程
# 原数据的x: [b, 1, 28, 28],b为batch_size,但计算是是二维的,真正使用时需要转换
#h1 = xw1+b1
#x = self.fc1(x)
# but, h1 = relu(xw1+b1)
x = F.relu(self.fc1(x))
# h2 = relu(h1w2+b2)
x = F.relu(self.fc2(x))
# h3 = h2w3+b3,一般最后一层会加sotfmax函数
x = self.fc3(x)
return x
#################################step3: train################################
net = Net()#创建网络对象
# [w1, b1, w2, b2, w3, b3]
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
train_loss = [] #用于保存每次计算的loss
for epoch in range(3): #对数据集迭代3次
for batch_idx, (x, y) in enumerate(train_loader):
# x: [b, 1, 28, 28], y: [512]
# [b, 1, 28, 28] => [b, 784]
x = x.view(x.size(0), 28*28)#将4维的输入打平成2维的,x这是变为[b,748]
# => [b, 10]
out = net(x) # out 为[b, 10]
y_onehot = one_hot(y)#将y转为[0,0,0,3,0,0,0,0,0]这种格式
# loss = mse(out, y_onehot)
loss = F.mse_loss(out, y_onehot)#通过mean square error计算误差
optimizer.zero_grad()#对梯度清零
loss.backward()#计算梯度
# w' = w - lr*grad
optimizer.step()#更新梯度
train_loss.append(loss.item())#保存loss
if batch_idx % 10==0:#每隔10个batch打印一个loss
print(epoch, batch_idx, loss.item())
plot_curve(train_loss) #将loss线画出来
# we get optimal [w1, b1, w2, b2, w3, b3]
#########################step4:test##################################
total_correct = 0
for x,y in test_loader:
x = x.view(x.size(0), 28*28)
out = net(x)
# out: [b, 10] => pred: [b]
pred = out.argmax(dim=1)
correct = pred.eq(y).sum().float().item()#当前batch预测正确的数量
total_correct += correct
#计算accuracy
total_num = len(test_loader.dataset)
acc = total_correct / total_num
print('test acc:', acc)
#查看一个预测例子
x, y = next(iter(test_loader))#取出几对样本
out = net(x.view(x.size(0), 28*28))#测试
pred = out.argmax(dim=1)
plot_image(x, pred, 'test')