9月29日计算机视觉基础学习笔记——AlexNet


前言

本文为9月29日计算机视觉基础学习笔记——AlexNet,分为两个章节:

  • Week 6 homework——Lenet;
  • AlexNet。

一、Week 6 homework——Lenet

#coding:utf-8
# code for week2,recognize_computer_vision.py
# houchangligong,zhaomingming,20200602,
import torch
from torch import  nn
from itertools import product
import sys
from mnist import MNIST
import cv2
import numpy as np


def model(feature,layers):
    y=-1
    
    B = len(feature)
    fea=torch.tensor(feature).view(B,1,28,28).float()

    fea= torch.relu(layers[0](fea))
    fea= layers[1](fea)
    fea= torch.relu(layers[2](fea))
    fea= layers[3](fea)
    fea= torch.relu(layers[4](fea))
    fea = fea.view(B,120)
    fea= torch.relu(layers[5](fea))
    
    output= torch.sigmoid(layers[6](fea))
    y=output
    
    #y=torch.softmax(output,1)
    #y = 1.0/(1.0+torch.exp(-1.*h))
    return y

def get_acc(image_data,image_label,layers,start_i,end_i):
    correct=0
    for i in range(start_i,end_i):
             y = model(image_data[i:i+1],layers)
             gt = image_label[i]
             pred = torch.argmax(y).item()
             if gt==pred:
                 correct+=1
    #print("acc=%s"%(float(correct/20.0)))
    return  float(correct/float(end_i-start_i))


def train_model(image_data,image_label,layers,lr):
    loss_value_before=1000000000000000.
    loss_value=10000000000000.
    
    for epoch in range(0,300):
        loss_value_before=loss_value
        loss_value=0
        #print(image_label[i])
        B = len(image_data)
        B = 80
        y = model(image_data[0:B],layers)
        gt=torch.tensor(image_label[0:B]).view(B,1)
        # get one_hot
        gt_vector = torch.zeros(B,10).scatter_(1,gt,1)
        
        # 关心所有值
        loss = torch.sum((y-gt_vector).mul(y-gt_vector))
        # 优化loss,正样本接近1,负样本远离1
        #loss1 = (y-1.0).mul(y-1.0)
        #loss = loss1[0,gt]+torch.sum(1.0/(loss1[0,0:gt]))+torch.sum(1.0/(loss1[0,gt:-1]))
        loss_value += loss.data.item()
        # 更新公式
        # w  = w - (y-y1)*x*lr
        loss.backward()
        for i in [0,2,4,5,6]: 
            layers[i].weight.data.sub_(layers[i].weight.grad.data*lr)
            layers[i].weight.grad.data.zero_()
            layers[i].bias.data.sub_(layers[i].bias.grad.data*lr)
            layers[i].bias.grad.data.zero_()
        train_acc=get_acc(image_data,image_label,layers,0,80)
        test_acc =get_acc(image_data,image_label,layers,80,100)
        print("epoch=%s,loss=%s/%s,train/test_acc:%s/%s"%(epoch,loss_value,loss_value_before,train_acc,test_acc))
    return layers 

if __name__=="__main__":

    # 从输入中获取学习率
    lr = float(sys.argv[1])
    
    layers=[]
    # add conv1 
    conv1=nn.Conv2d(1,6,kernel_size = 5,stride=1,padding=2)
    layers.append(conv1)
    pool2=nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
    layers.append(pool2)
    # add conv3 
    conv3=nn.Conv2d(6,16,kernel_size = 5,stride=1,padding=0)
    layers.append(conv3)
    pool4=nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
    layers.append(pool4)
    # add conv5 
    conv5=nn.Conv2d(16,120,kernel_size = 5,stride=1,padding=0)
    layers.append(conv5)
    f6 = nn.Linear(120, 84)
    layers.append(f6)
    output=nn.Linear(84,10)
    layers.append(output)
    # 加载数据
    # minst 2828 dataset 60000 samples
    mndata = MNIST('../week4/mnist/python-mnist/data/')
    image_data_all, image_label_all = mndata.load_training()
    image_data=image_data_all[0:100]
    image_label=image_label_all[0:100]
    # 使用未训练的模型处理数据
    y = model(image_data,layers)
    # 使用未训练得模型测试 
    print("初始的未训练时模型的acc=%s"%(get_acc(image_data,image_label,layers,80,100)))
    
    # 对模型进行训练:
    train_model(image_data,image_label,layers,lr)
    # 训练完成,对模型进行测试,给出测试结果:
    print("训练完成后模型的acc=%s"%(get_acc(image_data,image_label,layers,80,100)))

二、AlexNet

1、Dataset

  • ImageNet;
  • ILSVRC-2010:有测试集标签。

2、前向计算

1

  • Input image: 224 × 224 × 3 224\times 224\times 3 224×224×3
  • Kernel of the first layer: 96 × ( 11 × 11 × 3 ) 96\times (11\times 11\times 3) 96×(11×11×3), stride = 4;
  • Kernel of the second layer: 256 × ( 5 × 5 × 48 ) 256\times (5\times 5\times 48) 256×(5×5×48);
  • Kernel of the third layer: 384 × ( 3 × 3 × 256 ) 384\times (3\times 3\times 256) 384×(3×3×256);
  • Kernel of the fourth layer: 384 × ( 3 × 3 × 192 ) 384\times (3\times 3\times 192) 384×(3×3×192);
  • Kernel of the fifth layer: 256 × ( 3 × 3 × 192 ) 256\times (3\times 3\times 192) 256×(3×3×192)

3、反向传播

参数更新规则:
v i + 1 : = 0.9 v i − 0.0005 ϵ ω i − ϵ ( ∂ L ∂ ω ∣ ω i ) D i ω i + 1 : = ω i + v i + 1 v_{i+1} := 0.9v_i - 0.0005\epsilon\omega_i - \epsilon(\frac{\partial L}{\partial \omega}|_{\omega_i})_{D_i}\\ \omega_{i+1} := \omega_i + v_{i+1} vi+1:=0.9vi0.0005ϵωiϵ(ωLωi)Diωi+1:=ωi+vi+1

4、定量对比 & 定性对比

1
2
代码如下:

#coding:utf-8
# code for week2,recognize_computer_vision.py
# houchangligong,zhaomingming,20200602,
import torch
from torch import  nn
from itertools import product
import sys
from mnist import MNIST
import cv2
import numpy as np
import time


def get_flops(layer,fea):
    #layer :conv2d,linear
    
    output_pixels = fea.data.view(-1).shape
    flops_per_pixel=layer.kernel_size[0]**2*layer.in_channels
    flops =output_pixels[0]*flops_per_pixel
    flops = float(flops)
    print_f(layer)
    print_f("flops=%s / %.2f M / %.2f G "%(flops,flops/(1000.**2),flops/(1000.**3)))
    return flops

def forword_flops(layer,fea): 
    start_time = time.clock()
    fea= torch.relu(layer(fea))
    end_time = time.clock()
    flops=get_flops(layer,fea)
    time_cost=end_time-start_time
    print_f("time cost:%s S,computer flops:%s "%(time_cost,flops/(1000.**3)/time_cost))
    return fea,time_cost,flops

def dropout(fea,flag="train"):
    #pdb()
    if flag=="train":
        size= fea.shape
        a = torch.empty(size[0],size[1]).uniform_(0, 1)
        p=torch.bernoulli(a)
        fea=fea*p
    elif flag=="evluate":
        fea=fea*0.5
    return fea

def model(feature,layers):
    y=-1
    # time cost sum
    tcs=0
    # flops sum
    fls=0
    B = len(feature)
    fea=torch.tensor(feature).view(B,1,28,28).float()
    #放大到alexnet需要的尺寸
    
    fea = nn.functional.interpolate(fea,(224,224),mode='nearest')
    #fea = nn.functional.upsample_bilinear(fea, (224,224))
    #fea= torch.rand(100,3,224,224)
    fea=torch.cat([fea,fea,fea],1)
    B = fea.shape[0]
    import pdb
    pdb.set_trace()
    print_f("feature map size:[%s,%s,%s,%s]"%(fea.shape))
    start_time = time.clock()
    fea= torch.relu(layers[0](fea))
    end_time = time.clock()
    flops=get_flops(layers[0],fea)
    time_cost=end_time-start_time
    print_f("time cost:%s S"%(end_time-start_time))
    tcs+=time_cost
    fls+=flops
    #pool 
    fea= layers[1](fea)
    #fea= torch.relu(layers[2](fea))
    fea,tc,fl=forword_flops(layers[2],fea)
    tcs+=tc
    fls+=fl

    fea= layers[3](fea)
    #fea= torch.relu(layers[4](fea))
    fea,tc,fl=forword_flops(layers[4],fea)
    tcs+=tc
    fls+=fl
    #fea= torch.relu(layers[5](fea))
    fea,tc,fl=forword_flops(layers[5],fea)
    tcs+=tc
    fls+=fl
    #fea= torch.relu(layers[6](fea))
    fea,tc,fl=forword_flops(layers[6],fea)
    tcs+=tc
    fls+=fl
    print_f("sum_time_cost:%s,sum_flops:%s,computer_flops:%s"%(tcs,fls,fls/tcs/(1024.**3)))
    fea= layers[7](fea)
    fea = fea.view(B,9216)
    fea= torch.relu(layers[8](fea))
    fea=dropout(fea)
    
    fea= torch.relu(layers[9](fea))
    fea=dropout(fea)
    output= torch.sigmoid(dropout(layers[10](fea)))
    
    y=output
    
    #y=torch.softmax(output,1)
    #y = 1.0/(1.0+torch.exp(-1.*h))
    return y

def get_acc(image_data,image_label,layers,start_i,end_i):
    correct=0
    for i in range(start_i,end_i):
             y = model(image_data[i:i+1],layers)
             gt = image_label[i]
             pred = torch.argmax(y).item()
             if gt==pred:
                 correct+=1
    #print("acc=%s"%(float(correct/20.0)))
    return  float(correct/float(end_i-start_i))
def train_model(image_data,image_label,layers,lr):
    loss_value_before=1000000000000000.
    loss_value=10000000000000.
    
    for epoch in range(0,300):
        loss_value_before=loss_value
        loss_value=0
        #print(image_label[i])
        B = len(image_data)
        B = 80
        y = model(image_data[0:B],layers)
        gt=torch.tensor(image_label[0:B]).view(B,1)
        # get one_hot
        gt_vector = torch.zeros(B,1000).scatter_(1,gt,1)
        
        # 关心所有值
        loss = torch.sum((y-gt_vector).mul(y-gt_vector))
        # 优化loss,正样本接近1,负样本远离1
        #loss1 = (y-1.0).mul(y-1.0)
        #loss = loss1[0,gt]+torch.sum(1.0/(loss1[0,0:gt]))+torch.sum(1.0/(loss1[0,gt:-1]))
        loss_value += loss.data.item()
        # 更新公式
        # w  = w - (y-y1)*x*lr
        loss.backward()
        for i in [0,2,4,5,6,8,9,10]: 
            layers[i].weight.data.sub_(layers[i].weight.grad.data*lr)
            layers[i].weight.grad.data.zero_()
            layers[i].bias.data.sub_(layers[i].bias.grad.data*lr)
            layers[i].bias.grad.data.zero_()
        train_acc=get_acc(image_data,image_label,layers,0,80)
        test_acc =get_acc(image_data,image_label,layers,80,100)
        print("epoch=%s,loss=%s/%s,train/test_acc:%s/%s"%(epoch,loss_value,loss_value_before,train_acc,test_acc))
    return layers 

def print_params_num(layers):
    params_num=0
    params_num_K=0
    params_num_M=0
    print("-"*20)
    for i in [0,2,4,5,6,8,9,10]: 
        nw=layers[i].weight.data.view(-1).shape
        nb=layers[i].bias.data.view(-1).shape
        layer_num = nw[0]+nb[0]
        layer_num_K = layer_num/1000.
        layer_num_M = layer_num_K/1000.
        print(layers[i])
        print("layer[%s] has %s / %sK / %sM params"%(i,layer_num,layer_num_K,layer_num_M))
        params_num +=layer_num
        params_num_K +=layer_num_K
        params_num_M +=layer_num_M
    print("alexnet has %s / %sK / %sM params need to train"%(params_num,params_num_K,params_num_M))
    print("-"*20)
        
if __name__=="__main__":
    # 从输入中获取学习率
    lr = float(sys.argv[1])
    
    layers=[]
    # add conv1 
    conv1=nn.Conv2d(3,96,kernel_size = 11,stride=4,padding=2)
    layers.append(conv1)
    #pool2=nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
    pool2=nn.MaxPool2d(kernel_size=3, stride=2, padding=0,ceil_mode=True)
    layers.append(pool2)
    # add conv3 
    conv3=nn.Conv2d(96,256,kernel_size = 5,stride=1,padding=2)
    layers.append(conv3)
    pool4=nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
    layers.append(pool4)
    # add conv5 
    conv5=nn.Conv2d(256,384,kernel_size = 3,stride=1,padding=1)
    layers.append(conv5)
    conv6=nn.Conv2d(384,384,kernel_size = 3,stride=1,padding=1)
    layers.append(conv6)
    conv7=nn.Conv2d(384,256,kernel_size = 3,stride=1,padding=1)
    layers.append(conv7)
    pool8=nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
    layers.append(pool8)
    #fc9 = nn.Linear(36864, 4096)
    fc9 = nn.Linear(9216, 4096)
    layers.append(fc9)
    fc10 = nn.Linear(4096, 4096)
    layers.append(fc10)
    fc11 = nn.Linear(4096, 1000)
    layers.append(fc11)
    
    #参数量:
    print_params_num(layers)
    #pdb()
    # 记载数据
    # minst 2828 dataset 60000 samples
    mndata = MNIST('../week4/mnist/python-mnist/data/')
    image_data_all, image_label_all = mndata.load_training()
    image_data=image_data_all[0:100]
    image_label=image_label_all[0:100]
    # 使用未训练的模型处理数据
    import pdb
    pdb.set_trace()
    y = model(image_data,layers)
    
    # 使用为训练得模型测试 
    print("初始的未训练时模型的acc=%s"%(get_acc(image_data,image_label,layers,80,100)))
    
    # 对模型进行训练:
    train_model(image_data,image_label,layers,lr)
    # 训练完成,对模型进行测试,给出测试结果:
    print("训练完成后模型的acc=%s"%(get_acc(image_data,image_label,layers,80,100)))

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值