CV计算机视觉核心04-神经网络与反向传播(线性模型的局限性,以及改进方法。用二分类来进行多分类:感知机。用逻辑回归进行多类别分类。神经网络:反向传播网络。作业(学习训练mnist数据集))
学习大纲:
一、线性模型的局限性,以及改进方法。
二、用二分类来进行多分类:感知机。
三、用逻辑回归进行多类别分类。
四、神经网络:反向传播网络。
五、支持向量机
一、线性模型的局限性,以及改进方法。
改进性能一般要从feature和model两个方面考虑
model:y = w1x1 + w2x2 + … + w6x6
当y = w1x1,如下图所示,是一个过0点的任意直线。
当y = w1x1 + w2x2,如下图所示,是一个过0点的一个平面。
当y = w1x1 + w2x2 + … + w6x6
最终会是一个6维的超平面。这个超平面依然是过0点的。
这里假设feature特征是[3, 2, 1, 6, 7, 5, 3.5],分别与label[0,1,2,3,4,5,6]对应。
因此要用y = w1x1过0点的直线来描述上面feature与label的对应关系是不行的。因此过0点的直线描述能力是极其有限的。
模型改进一、加上常数b偏置,保证线性模型的描述能力得以提高。
这里又会出现一个问题,那就是任何一条直线无法同时经过多个不同位置的点。加上b之后的线性模型,是可以不过0点的,但理论上,一条直线无法满足所有的样本点。
因此线性模型,要求所有样本在一个直线或平面上。
模型改进二、使用曲线模型(非线性模型)
有太多这种非线性模型的方案,遍历方案不现实,需要从根本上设计更好的方法。(下面的案例就是用二阶模型(非线性模型)训练的,但是效果依然不好)
线性无法满足要求,可以使用曲线。只要能将这些点描述出来达到期望值即可。可以消除理论误差。
二阶模型:
二阶模型(非线性模型)训练案例:
依然是识别自己创建的数字:
#coding:utf-8
# code for week2,recognize_computer_vision.py
# houchangligong,zhaomingming,20200601,
import torch
from itertools import product
import pdb
import sys
def generate_data():
# 本函数生成0-9,10个数字的图片矩阵
image_data=[]
num_0 = torch.tensor(
[[0,0,1,1,0,0],
[0,1,0,0,1,0],
[0,1,0,0,1,0],
[0,1,0,0,1,0],
[0,0,1,1,0,0],
[0,0,0,0,0,0]])
image_data.append(num_0)
num_1 = torch.tensor(
[[0,0,0,1,0,0],
[0,0,1,1,0,0],
[0,0,0,1,0,0],
[0,0,0,1,0,0],
[0,0,1,1,1,0],
[0,0,0,0,0,0]])
image_data.append(num_1)
num_2 = torch.tensor(
[[0,0,1,1,0,0],
[0,1,0,0,1,0],
[0,0,0,1,0,0],
[0,0,1,0,0,0],
[0,1,1,1,1,0],
[0,0,0,0,0,0]])
image_data.append(num_2)
num_3 = torch.tensor(
[[0,0,1,1,0,0],
[0,0,0,0,1,0],
[0,0,1,1,0,0],
[0,0,0,0,1,0],
[0,0,1,1,0,0],
[0,0,0,0,0,0]])
image_data.append(num_3)
num_4 = torch.tensor(
[
[0,0,0,0,1,0],
[0,0,0,1,1,0],
[0,0,1,0,1,0],
[0,1,1,1,1,1],
[0,0,0,0,1,0],
[0,0,0,0,0,0]])
image_data.append(num_4)
num_5 = torch.tensor(
[
[0,1,1,1,0,0],
[0,1,0,0,0,0],
[0,1,1,1,0,0],
[0,0,0,0,1,0],
[0,1,1,1,0,0],
[0,0,0,0,0,0]])
image_data.append(num_5)
num_6 = torch.tensor(
[[0,0,1,1,0,0],
[0,1,0,0,0,0],
[0,1,1,1,0,0],
[0,1,0,0,1,0],
[0,0,1,1,0,0],
[0,0,0,0,0,0]])
image_data.append(num_6)
num_7 = torch.tensor(
[
[0,1,1,1,1,0],
[0,0,0,0,1,0],
[0,0,0,1,0,0],
[0,0,0,1,0,0],
[0,0,0,1,0,0],
[0,0,0,0,0,0]])
image_data.append(num_7)
num_8 = torch.tensor(
[[0,0,1,1,0,0],
[0,1,0,0,1,0],
[0,0,1,1,0,0],
[0,1,0,0,1,0],
[0,0,1,1,0,0],
[0,0,0,0,0,0]])
image_data.append(num_8)
num_9 = torch.tensor(
[[0,0,1,1,1,0],
[0,1,0,0,1,0],
[0,1,1,1,1,0],
[0,0,0,0,1,0],
[0,0,0,0,1,0],
[0,0,0,0,0,0]])
image_data.append(num_9)
image_label=[0,1,2,3,4,5,6,7,8,9]
return image_data,image_label
def get_feature(x):
feature=[0,0,0,0]
# 下面添加提取图像x的特征feature的代码
def get_shadow(x,dim):
feature =torch.sum(x,dim)
feature = feature.float()
## 归一化
#for i in range(0,feature.shape[0]):
# feature[i]=feature[i]/sum(feature)
feature = feature.view(1,6)
return feature
feature = get_shadow(x,0)
#import pdb
#pdb.set_trace()
#print(feature)
return feature
def model(feature,weights):
y=-1
# 下面添加对feature进行决策的代码,判定出feature 属于[0,1,2,3,...9]哪个类别
#import pdb
#pdb.set_trace()
#这里的feature是1行7列
#这里torch.tensor(1,0),是bias对应的feature,因为只有1乘以任何数,还是本身。
# y = w1x1 + w2x2 +...+ wbxb + b1 * 1
# C = torch.cat( (A,B), 0 ) #按维数0拼接(竖着拼)
# C = torch.cat( (A,B), 1 ) #按维数1拼接(横着拼)
feature = torch.cat((feature,torch.tensor(1.0).view(1,1)),1)
#这里的feature2也是1行7列
feature2 = feature.mul(feature) #这个是2次项的feature
#feature3=feature2.mul(feature)
#feature4=feature3.mul(feature)
#pdb.set_trace()
#y = feature.mm(weights[:,0:1])+feature2.mm(weights[:,1:2])+feature3.mm(weights[:,2:3])+feature4.mm(weights[:,3:4])
#加号前半部分是一次项的乘法,加号后面部分是二次项的乘法。
y = feature.mm(weights[:,0:1])+feature2.mm(weights[:,1:2])
return y
def train_model(image_data,image_label,weights,lr):
loss_value_before=100000000.
loss_value=1000000.
for epoch in range(0,10000):
#epoch=0
#while (loss_value_before-loss_value)>-1:
#loss = 0
#for i in range(0,len(image_data)):
loss_value_before=loss_value
loss_value=0
for i in range(0,10):
#print(image_label[i])
#y = model(get_feature(image_data[i]),weights)
feature = get_feature(image_data[i])
y = model(feature,weights)
#import pdb
#pdb.set_trace()
loss = 0.5*(y-image_label[i])*(y-image_label[i])
#loss.data.add_(loss.data)
loss_value += loss.data.item()
#print("loss=%s"%(loss))
#weights =
# 更新公式
# w = w - (y-y1)*x*lr
#feature=feature.view(6)
#lr=-lr
#weights[0,0] = weights[0,0]+ (y.item()-image_label[i])*feature[0]*lr
#weights[1,0] = weights[1,0]+ (y.item()-image_label[i])*feature[1]*lr
#weights[2,0] = weights[2,0]+ (y.item()-image_label[i])*feature[2]*lr
#weights[3,0] = weights[3,0]+ (y.item()-image_label[i])*feature[3]*lr
#weights[4,0] = weights[4,0]+ (y.item()-image_label[i])*feature[4]*lr
#weights[5,0] = weights[5,0]+ (y.item()-image_label[i])*feature[5]*lr
#weights[6,0] = weights[6,0]+ (y.item()-image_label[i])*lr
loss.backward()
weights.data.sub_(weights.grad.data*lr)
weights.grad.data.zero_()
#loss.data=
print("epoch=%s,loss=%s/%s,weights=%s"%(epoch,loss_value,loss_value_before,weights.view(14)))
#epoch+=1
#loss_value=0
#:loss=0
#import pdb
#pdb.set_trace()
return weights
if __name__=="__main__":
#这里需要注意一下,看到输出的结果中有nan出现,就说明失败了。
#这里需要注意一下weight的size(7,2)
weights = torch.randn(7,2,requires_grad = True)
image_data,image_label = generate_data()
# 打印出0的图像
print("数字0对应的图片是:")
print(image_data[0])
print("-"*20)
# 打印出8的图像
print("数字8对应的图片是:")
print(image_data[8])
print("-"*20)
#lr = float(sys.argv[1]) #这里是将学习率作为输入参数了
lr = 0.001
# 对模型进行训练:
weights=train_model(image_data,image_label,weights,lr)
#对每张图片进行识别
print("对每张图片进行识别")
for i in range(0,10):
x=image_data[i]
#import pdb
#pdb.set_trace()
#对当前图片提取特征
feature=get_feature(x)
# 对提取到得特征进行分类
y = model(feature,weights)
#打印出分类结果
print("图像[%s]得分类结果是:[%s],它得特征是[%s]"%(i,y,feature))
这里效果一般:
改进三、采用多个线性分类器(多个二分类器)
二、用二分类来进行多分类:感知机。
直线分类:直接分10个二分类器,10个类别,1个模型。
用二分类做:一个模型只负责一个类别。所以10个类别,10个模型。
比如说对于0,也就是[0,other],对于“0”类和其它类进行2分类,other都等于-1,“0”类是等于0。一个模型只负责一类。
比如说对于1,也就是[1,other],对于“1”类和其它类进行2分类,other都等于-1,“1”类是等于1。一个模型只负责一类。
比如说对于2,也就是[2,other],对于“2”类和其它类进行2分类,other都等于-1,“2”类是等于2。一个模型只负责一类。
以上思路写成代码如下:
#coding:utf-8
# code for week2,recognize_computer_vision.py
# houchangligong,zhaomingming,20200601,
import torch
from itertools import product
import pdb
import sys
import numpy as np
def generate_data():
# 本函数生成0-9,10个数字的图片矩阵
image_data=[]
num_0 = torch.tensor(
[[0,0,1,1,0,0],
[0,1,0,0,1,0],
[0,1,0,0,1,0],
[0,1,0,0,1,0],
[0,0,1,1,0,0],
[0,0,0,0,0,0]])
image_data.append(num_0)
num_1 = torch.tensor(
[[0,0,0,1,0,0],
[0,0,1,1,0,0],
[0,0,0,1,0,0],
[0,0,0,1,0,0],
[0,0,1,1,1,0],
[0,0,0,0,0,0]])
image_data.append(num_1)
num_2 = torch.tensor(
[[0,0,1,1,0,0],
[0,1,0,0,1,0],
[0,0,0,1,0,0],
[0,0,1,0,0,0],
[0,1,1,1,1,0],
[0,0,0,0,0,0]])
image_data.append(num_2)
num_3 = torch.tensor(
[[0,0,1,1,0,0],
[0,0,0,0,1,0],
[0,0,1,1,0,0],
[0,0,0,0,1,0],
[0,0,1,1,0,0],
[0,0,0,0,0,0]])
image_data.append(num_3)
num_4 = torch.tensor(
[
[0,0,0,0,1,0],
[0,0,0,1,1,0],
[0,0,1,0,1,0],
[0,1,1,1,1,1],
[0,0,0,0,1,0],
[0,0,0,0,0,0]])
image_data.append(num_4)
num_5 = torch.tensor(
[
[0,1,1,1,0,0],
[0,1,0,0,0,0],
[0,1,1,1,0,0],
[0,0,0,0,1,0],
[0,1,1,1,0,0],
[0,0,0,0,0,0]])
image_data.append(num_5)
num_6 = torch.tensor(
[[0,0,1,1,0,0],
[0,1,0,0,0,0],
[0,1,1,1,0,0],
[0,1,0,0,1,0],
[0,0,1,1,0,0],
[0,0,0,0,0,0]])
image_data.append(num_6)
num_7 = torch.tensor(
[
[0,1,1,1,1,0],
[0,0,0,0,1,0],
[0,0,0,1,0,0],
[0,0,0,1,0,0],
[0,0,0,1,0,0],
[0,0,0,0,0,0]])
image_data.append(num_7)
num_8 = torch.tensor(
[[0,0,1,1,0,0],
[0,1,0,0,1,0],
[0,0,1,1,0,0],
[0,1,0,0,1,0],
[0,0,1,1,0,0],
[0,0,0,0,0,0]])
image_data.append(num_8)
num_9 = torch.tensor(
[[0,0,1,1,1,0],
[0,1,0,0,1,0],
[0,1,1,1,1,0],
[0,0,0,0,1,0],
[0,0,0,0,1,0],
[0,0,0,0,0,0]])
image_data.append(num_9)
image_label=[0,1,2,3,4,5,6,7,8,9]
return image_data,image_label
#这里get_feature还是提取的是投影信息shadow
def get_feature(x):
feature=[0,0,0,0]
# 下面添加提取图像x的特征feature的代码
def get_shadow(x,dim):
feature =torch.sum(x,dim)
feature = feature.float()
## 归一化
#for i in range(0,feature.shape[0]):
# feature[i]=feature[i]/sum(feature)
feature = feature.view(1,6)
return feature
feature = get_shadow(x,0)
#import pdb
#pdb.set_trace()
#print(feature)
return feature
def label2ground_truth(image_label):
gt = torch.ones(10,10)
gt = gt*-1.0
for label in image_label:
gt[label,label]=float(label)
#gt[label,label]=float(1)
return gt
def model(feature,weights):
y=-1
# 下面添加对feature进行决策的代码,判定出feature 属于[0,1,2,3,...9]哪个类别
#import pdb
#pdb.set_trace()
feature = torch.cat((feature,torch.tensor(1.0).view(1,1)),1)
#feature2=feature.mul(feature)
#feature3=feature2.mul(feature)
#feature4=feature3.mul(feature)
#pdb.set_trace()
#y = feature.mm(weights[:,0:1])+feature2.mm(weights[:,1:2])+feature3.mm(weights[:,2:3])+feature4.mm(weights[:,3:4])
#y = [0,-1,-1,-1]
# y = [-1,1,-1,01]
y = feature.mm(weights)
return y
def train_model(image_data,image_label,weights,lr):
loss_value_before=1000000000000000.
loss_value=10000000000000.
for epoch in range(0,1000):
#epoch=0
#while (loss_value_before-loss_value)>-1:
#loss = 0
#for i in range(0,len(image_data)):
loss_value_before=loss_value
loss_value=0
for i in range(0,10):
#print(image_label[i])
#y = model(get_feature(image_data[i]),weights)
feature = get_feature(image_data[i])
y = model(feature,weights)
#import pdb
#pdb.set_trace()
gt=label2ground_truth(image_label)
#loss = 0.5*(y-image_label[i])*(y-image_label[i])
#loss = torch.sum((y-gt[i:i+1,:]).mul(y-gt[i:i+1,:]))
#pdb.set_trace()
print("*%s,%s"%(y[0,i:i+1],gt[i:i+1,i:i+1]))
loss = torch.sum((y[0,i:i+1]-gt[i:i+1,i:i+1]).mul(y[0,i:i+1]-gt[i:i+1,i:i+1]))
#loss.data.add_(loss.data)
loss_value += loss.data.item()
#print("loss=%s"%(loss))
#weights =
# 更新公式
# w = w - (y-y1)*x*lr
#feature=feature.view(6)
#lr=-lr
#weights[0,0] = weights[0,0]+ (y.item()-image_label[i])*feature[0]*lr
#weights[1,0] = weights[1,0]+ (y.item()-image_label[i])*feature[1]*lr
#weights[2,0] = weights[2,0]+ (y.item()-image_label[i])*feature[2]*lr
#weights[3,0] = weights[3,0]+ (y.item()-image_label[i])*feature[3]*lr
#weights[4,0] = weights[4,0]+ (y.item()-image_label[i])*feature[4]*lr
#weights[5,0] = weights[5,0]+ (y.item()-image_label[i])*feature[5]*lr
#weights[6,0] = weights[6,0]+ (y.item()-image_label[i])*lr
loss.backward()
weights.data.sub_(weights.grad.data*lr)
weights.grad.data.zero_()
#loss.data=
#import pdb
#print("epoch=%s,loss=%s/%s,weights=%s"%(epoch,loss_value,loss_value_before,(weights[:,0:2]).view(14)))
print("epoch=%s,loss=%s/%s"%(epoch,loss_value,loss_value_before))
#epoch+=1
#loss_value=0
#:loss=0
#import pdb
#pdb.set_trace()
return weights
def get_result(y):
torch.argmin(torch.from_numpy(np.numpy([torch.min((torch.abs(y-i))) for i in range(0,10)])))
if __name__=="__main__":
weights = torch.randn(7,10,requires_grad = True)
image_data,image_label = generate_data()
# 打印出0的图像
print("数字0对应的图片是:")
print(image_data[0])
print("-"*20)
# 打印出8的图像
print("数字8对应的图片是:")
print(image_data[8])
print("-"*20)
# lr = float(sys.argv[1])
lr = 0.001
# 对模型进行训练:
weights=train_model(image_data,image_label,weights,lr)
#对每张图片进行识别
print("对每张图片进行识别")
for i in range(0,10):
x=image_data[i]
import pdb
#pdb.set_trace()
#对当前图片提取特征
feature=get_feature(x)
# 对提取到得特征进行分类
y = model(feature,weights)
# pdb.set_trace()
pred = torch.argmin(torch.from_numpy(np.array([torch.min((torch.abs(y-j))).item() for j in range(0,10)]))).item()
#pred = torch.argmin(torch.abs(y-1)).item()
#打印出分类结果
#pdb.set_trace()
#print("图像[%s]得分类结果是:[%s],它得特征是[%s],它得二分类结果是:[%s]"%(i,torch.argmin(torch.abs(y-i)).item(),feature,y))
print("图像[%s]得分类结果是:[%s],它得特征是[%s],它得二分类结果是:[%s]"%(i,pred,feature,y))
10个二分类器写在一起的公式:就是全连接网络的计算公式。
以上用二分类方法来完成多分类问题的方法。
这个方法就是感知机perception的思路,就是线性模型的一种应用。
感知机模型的公式:
y=WX+b
参数:w,b m*n + 1 个参数。
求解方法:梯度下降
三、用逻辑回归进行多类别分类
用sigmoid函数的案例:
#coding:utf-8
# code for week2,recognize_computer_vision.py
# houchangligong,zhaomingming,20200601,
import torch
from itertools import product
import pdb
import sys
def generate_data():
# 本函数生成0-9,10个数字的图片矩阵
image_data=[]
num_0 = torch.tensor(
[[0,0,1,1,0,0],
[0,1,0,0,1,0],
[0,1,0,0,1,0],
[0,1,0,0,1,0],
[0,0,1,1,0,0],
[0,0,0,0,0,0]])
image_data.append(num_0)
num_1 = torch.tensor(
[[0,0,0,1,0,0],
[0,0,1,1,0,0],
[0,0,0,1,0,0],
[0,0,0,1,0,0],
[0,0,1,1,1,0],
[0,0,0,0,0,0]])
image_data.append(num_1)
num_2 = torch.tensor(
[[0,0,1,1,0,0],
[0,1,0,0,1,0],
[0,0,0,1,0,0],
[0,0,1,0,0,0],
[0,1,1,1,1,0],
[0,0,0,0,0,0]])
image_data.append(num_2)
num_3 = torch.tensor(
[[0,0,1,1,0,0],
[0,0,0,0,1,0],
[0,0,1,1,0,0],
[0,0,0,0,1,0],
[0,0,1,1,0,0],
[0,0,0,0,0,0]])
image_data.append(num_3)
num_4 = torch.tensor(
[
[0,0,0,0,1,0],
[0,0,0,1,1,0],
[0,0,1,0,1,0],
[0,1,1,1,1,1],
[0,0,0,0,1,0],
[0,0,0,0,0,0]])
image_data.append(num_4)
num_5 = torch.tensor(
[
[0,1,1,1,0,0],
[0,1,0,0,0,0],
[0,1,1,1,0,0],
[0,0,0,0,1,0],
[0,1,1,1,0,0],
[0,0,0,0,0,0]])
image_data.append(num_5)
num_6 = torch.tensor(
[[0,0,1,1,0,0],
[0,1,0,0,0,0],
[0,1,1,1,0,0],
[0,1,0,0,1,0],
[0,0,1,1,0,0],
[0,0,0,0,0,0]])
image_data.append(num_6)
num_7 = torch.tensor(
[
[0,1,1,1,1,0],
[0,0,0,0,1,0],
[0,0,0,1,0,0],
[0,0,0,1,0,0],
[0,0,0,1,0,0],
[0,0,0,0,0,0]])
image_data.append(num_7)
num_8 = torch.tensor(
[[0,0,1,1,0,0],
[0,1,0,0,1,0],
[0,0,1,1,0,0],
[0,1,0,0,1,0],
[0,0,1,1,0,0],
[0,0,0,0,0,0]])
image_data.append(num_8)
num_9 = torch.tensor(
[[0,0,1,1,1,0],
[0,1,0,0,1,0],
[0,1,1,1,1,0],
[0,0,0,0,1,0],
[0,0,0,0,1,0],
[0,0,0,0,0,0]])
image_data.append(num_9)
image_label=[0,1,2,3,4,5,6,7,8,9]
return image_data,image_label
def get_feature(x):
feature=[0,0,0,0]
# 下面添加提取图像x的特征feature的代码
def get_shadow(x,dim):
feature =torch.sum(x,dim)
feature = feature.float()
# 归一化
for i in range(0,feature.shape[0]):
feature[i]=feature[i]/sum(feature)
feature = feature.view(1,6)
return feature
feature = get_shadow(x,0)
feature = feature+get_shadow(x,1)
#import pdb
#pdb.set_trace()
#print(feature)
return feature
def label2ground_truth(image_label):
gt = torch.ones(10,10)
gt = gt*0.0
loss_weights=torch.ones(10,10)
loss_weights = loss_weights*0.1
for label in image_label:
gt[label,label]=1.0
loss_weights[label,label]=0.9
return gt,loss_weights
def model(feature,weights,weights1):
y=-1
# 下面添加对feature进行决策的代码,判定出feature 属于[0,1,2,3,...9]哪个类别
#import pdb
#pdb.set_trace()
feature = torch.cat((feature,torch.tensor(1.0).view(1,1)),1)
feature2=feature.mul(feature)
#feature3=feature2.mul(feature)
#feature4=feature3.mul(feature)
#pdb.set_trace()
#y = feature.mm(weights[:,0:1])+feature2.mm(weights[:,1:2])+feature3.mm(weights[:,2:3])+feature4.mm(weights[:,3:4])
h = feature.mm(weights)
#y = h.mm(weights1)
y= torch.sigmoid(h)
return y
def train_model(image_data,image_label,weights,weights1,lr):
loss_value_before=1000000000000000.
loss_value=10000000000000.
for epoch in range(0,30000):
#epoch=0
#while (loss_value_before-loss_value)>-1:
#loss = 0
#for i in range(0,len(image_data)):
loss_value_before=loss_value
loss_value=0
for i in range(0,10):
#print(image_label[i])
#y = model(get_feature(image_data[i]),weights)
feature = get_feature(image_data[i])
y = model(feature,weights,weights1)
#import pdb
#pdb.set_trace()
gt,loss_weights=label2ground_truth(image_label)
#loss = 0.5*(y-image_label[i])*(y-image_label[i])
loss = torch.sum((y-gt[i:i+1,:]).mul(y-gt[i:i+1,:]).mul(loss_weights[i:i+1,:]))
#loss.data.add_(loss.data)
loss_value += loss.data.item()
# 更新公式
loss.backward()
weights.data.sub_(weights.grad.data*lr)
weights.grad.data.zero_()
#import pdb
#print("epoch=%s,loss=%s/%s,weights=%s"%(epoch,loss_value,loss_value_before,(weights[:,0:2]).view(14)))
print("epoch=%s,loss=%s/%s"%(epoch,loss_value,loss_value_before))
#epoch+=1
#loss_value=0
#:loss=0
return weights
if __name__=="__main__":
weights = torch.randn(7,10,requires_grad = True)
weights1 = torch.randn(10,10,requires_grad = True)
image_data,image_label = generate_data()
# 打印出0的图像
print("数字0对应的图片是:")
print(image_data[0])
print("-"*20)
# 打印出8的图像
print("数字8对应的图片是:")
print(image_data[8])
print("-"*20)
# lr = float(sys.argv[1])
lr = 0.001
# 对模型进行训练:
weights=train_model(image_data,image_label,weights,weights1,lr)
#对每张图片进行识别
print("对每张图片进行识别")
for i in range(0,10):
x=image_data[i]
#import pdb
#pdb.set_trace()
#对当前图片提取特征
feature=get_feature(x)
# 对提取到得特征进行分类
y = model(feature,weights,weights1)
#打印出分类结果
#pdb.set_trace()
print("图像[%s]得分类结果是:[%s],它得特征是[%s],它得二分类结果是:[%s]"%(i,torch.argmax(y).item(),feature,y))
四、神经网络:反向传播网络。
act表示激活函数:
作业(学习训练mnist数据集)
学习mnist数据集:
下面的代码w未初始化,对结果影响非常大,未使用该初始化,准确率:10%,使用该初始化,准确率90%左右。
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
batch_size = 200
learning_rate = 0.01
epochs = 10
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('./data', train=True,download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('./data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=batch_size, shuffle=True)
w1, b1 = torch.randn(200, 784, requires_grad=True), \
torch.zeros(200, requires_grad=True)
w2, b2 = torch.randn(200, 200, requires_grad=True), \
torch.zeros(200, requires_grad=True)
w3, b3 = torch.randn(10, 200, requires_grad=True), \
torch.zeros(10, requires_grad=True)
# 初始化,对结果影响非常大,未使用该初始化,准确率:10%,使用该初始化,准确率90%左右。
#torch.nn.init.kaiming_normal_(w1)
#torch.nn.init.kaiming_normal_(w2)
#torch.nn.init.kaiming_normal_(w3)
def forward(x):
x = x @ w1.t() + b1
x = F.relu(x)
x = x @ w2.t() + b2
x = F.relu(x)
x = x @ w3.t() + b3
x = F.relu(x)
return x
# 未使用relu: 正确率:92%,使用relu:96%
optimizer = optim.SGD([w1, b1, w2, b2, w3, b3], lr=learning_rate)
criteon = nn.CrossEntropyLoss()
for epoch in range(epochs):
for batch_idx, (data, target) in enumerate(train_loader):
# 这里-1如何理解可以查看这个地址:https://www.cnblogs.com/MartinLwx/p/10543604.html
data = data.view(-1, 28 * 28)
#数据集输入feature=data
logits = forward(data)
# logits:预测值,target:ground truth
#这里的loss是交叉熵
loss = criteon(logits, target)
#这里是梯度清零
optimizer.zero_grad()
#反向传播,获得梯度
loss.backward()
# print(w1.grad.norm(), w2.grad.norm())
# w = w - lr*delta_w
#这里是更新梯度
#w = w - lr * delta_w
optimizer.step()
if batch_idx % 100 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
test_loss = 0
correct = 0
for data, target in test_loader:
data = data.view(-1, 28 * 28)
logits = forward(data)
test_loss += criteon(logits, target).item()
pred = logits.data.max(1)[1]
correct += pred.eq(target.data).sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
下面的代码w有初始化:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
batch_size = 200
learning_rate = 0.01
epochs = 10
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('./data', train=True,download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=batch_size, shuffle=True) #batch_size每次加载200张图片。
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('./data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=batch_size, shuffle=True)
#200个照片,维度是784。
w1, b1 = torch.randn(200, 784, requires_grad=True), \
torch.zeros(200, requires_grad=True)
w2, b2 = torch.randn(200, 200, requires_grad=True), \
torch.zeros(200, requires_grad=True)
w3, b3 = torch.randn(10, 200, requires_grad=True), \
torch.zeros(10, requires_grad=True)
# 初始化,对结果影响非常大,未使用该初始化,准确率:10%,使用该初始化,准确率90%左右。
torch.nn.init.kaiming_normal_(w1)
torch.nn.init.kaiming_normal_(w2)
torch.nn.init.kaiming_normal_(w3)
def forward(x):
#这里的.t()表示转至的意思
#@矩阵乘法
x = x @ w1.t() + b1
x = F.relu(x)
x = x @ w2.t() + b2
x = F.relu(x)
x = x @ w3.t() + b3
x = F.relu(x)
return x
# 未使用relu: 正确率:92%,使用relu:96%
#初始化一下优化器:
#参数1:用于梯度下降的那些参数。
optimizer = optim.SGD([w1, b1, w2, b2, w3, b3], lr=learning_rate)
criteon = nn.CrossEntropyLoss()
for epoch in range(epochs):
for batch_idx, (data, target) in enumerate(train_loader):
# 这里-1如何理解可以查看这个地址:https://www.cnblogs.com/MartinLwx/p/10543604.html
data = data.view(-1, 28 * 28)
#数据集输入feature=data
#结果是(200,10)
logits = forward(data)
# logits:预测值,target:ground truth
#这里的loss是交叉熵
loss = criteon(logits, target)
#这里是梯度清零
optimizer.zero_grad()
#反向传播,获得梯度
loss.backward()
# print(w1.grad.norm(), w2.grad.norm())
# w = w - lr*delta_w
#这里是更新梯度
#w = w - lr * delta_w
optimizer.step()
if batch_idx % 100 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
test_loss = 0
correct = 0
for data, target in test_loader:
data = data.view(-1, 28 * 28)
logits = forward(data)
test_loss += criteon(logits, target).item()
pred = logits.data.max(1)[1]
correct += pred.eq(target.data).sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))