pytorch剪枝

剪枝

神经网络的剪枝是压缩神经网络的一个重要方法,某些神经网络(如AlexNet、VGG16等)整个模型所占空间多达上百M,这对桌面CPU和GPU来说也许不算什么,但是,许多深度学习的应用都是需要部署到移动端的,而移动端不同于PC端,它对功耗、面积等条件都十分敏感,举个例子,在iphone手机中,上百MB的软件无法通过4G信号进行下载,必须链接wifi,而且实验表明,存储器读写的功耗一般都大于计算部件所产生的功耗,特别是对DDR的读写功耗,更是比计算部件的功耗高出许多,因此,对神经网络进行压缩是加速神经网络的一个重要方法,下面就初步介绍一下剪枝技术。
所谓剪枝,就是"剪掉”网络中冗余的参数,具体表现为将要剪枝的参数值设为0,使之在今后的推理或者训练中不参与运算。目前,剪枝技术主要有两种,分别是结构化剪枝和非结构化剪枝。

结构化剪枝

结构化剪枝,顾名思义,即“有结构”的进行剪枝,对于全连接层权重,一般是剪去整行或者整列,事实上相当于减去了一个神经元,而对于卷积层,结构化剪枝可以是减去某个卷积核(输出通道上剪枝),也可以是剪去某个输入通道。等等

非结构化剪枝

非结构化剪枝,则是对单一权重进行修剪,并不要求整行整列的修剪,好处是更能保持原先的精确度,因为结构化剪枝很容易剪去那些比较重要的权重。当然,非结构化剪枝也有其不便之处,因为非结构化剪枝并没有改变权重张量的"形状",如果不采用特殊的压缩存储算法(如CSR、CSC),那么事实上模型的大小并没有发生改变,而是被那些分布无规则的0元素所占据。

实验

在pytorch中,剪枝非常方便,我们可以通过语句

import torch.nn.utils.prune as prune

来实现剪枝,下面我用pytorch进行了一个简单的剪枝(非结构化)实验。

import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
from keras.utils import to_categorical
import numpy as np
import torch.nn.utils.prune as prune

path="F:\mnist.npz"
f = np.load(path)
train_X, train_y = f['x_train'], f['y_train']
test_X, test_y = f['x_test'], f['y_test']
f.close()

train_X = train_X.reshape(-1, 28, 28, 1)
train_X = train_X.astype('float32')
train_X /= 255
train_y = to_categorical(train_y, 10)

# 创建网络

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.conv1=nn.Conv2d(1,32,5,stride=1,padding=0)
        self.relu1=nn.ReLU()
        self.pool1=nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
        self.conv2=nn.Conv2d(32,16,3,stride=1,padding=0)
        self.relu2=nn.ReLU()
        self.pool2=nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
        self.fc1 = nn.Linear(400, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x=self.conv1(x)
        x=self.relu1(x)
        x=self.pool1(x)
        x=self.conv2(x)
        x=self.relu2(x)
        x=self.pool2(x)
        x=x.view(-1,400)
        x=self.fc1(x)
        x=F.relu(x)
        x=self.fc2(x)
        x=F.relu(x)
        x=self.fc3(x)
        return x


net = Net()


optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

train_loss = []
precision=0

for epoch in range(10):

    for i in range(600):
        x=train_X[i*100:i*100+100]
        y=train_y[i*100:i*100+100]
        x = x.reshape(-1,1,28,28)
        x = torch.from_numpy(x)        #(batch_size,input_feature_shape)
        y = torch.from_numpy(y)        #(batch_size,label_onehot_shape)

        out = net(x)

        loss = F.mse_loss(out, y)         # 计算两者的误差

        optimizer.zero_grad()             # 清空上一步的残余更新参数值
        loss.backward()                   # 误差反向传播, 计算参数更新值
        optimizer.step()                  # 将参数更新值施加到 net 的 parameters 上
        train_loss.append(loss.item())

        if i % 10 == 0:

            print(epoch, i, np.mean(train_loss))
            train_loss=[]
        if epoch>4 and i%50==0:
            module = net.conv1
            prune.random_unstructured(module, name='weight', amount=0.01)
            print(torch.sum(net.conv1.weight == 0))

total_correct = 0
for i in range(10000):
    x = train_X[i]
    y = train_y[i]
    x = torch.from_numpy(x)
    y = torch.from_numpy(y)
    x=x.view(1,1,28,28)
    y=y.view(1,10)
    out = net(x)
    pred = out.argmax(dim=1)  # 返回值最大的索引
    label = y.argmax(dim=1)
    correct = pred.eq(label).sum().float().item()  # 这个batch中正确的数量
    total_correct += correct

acc = total_correct / 10000.0
print('test acc:', acc)


采用l1_unstructured剪枝:

import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
from keras.utils import to_categorical
import numpy as np
import torch.nn.utils.prune as prune

path="F:\mnist.npz"
f = np.load(path)
train_X, train_y = f['x_train'], f['y_train']
test_X, test_y = f['x_test'], f['y_test']
f.close()

train_X = train_X.reshape(-1, 28, 28, 1)
train_X = train_X.astype('float32')
train_X /= 255
train_y = to_categorical(train_y, 10)

# 创建网络

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.conv1=nn.Conv2d(1,32,5,stride=1,padding=0)
        self.relu1=nn.ReLU()
        self.pool1=nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
        self.conv2=nn.Conv2d(32,16,3,stride=1,padding=0)
        self.relu2=nn.ReLU()
        self.pool2=nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
        self.fc1 = nn.Linear(400, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x=self.conv1(x)
        x=self.relu1(x)
        x=self.pool1(x)
        x=self.conv2(x)
        x=self.relu2(x)
        x=self.pool2(x)
        x=x.view(-1,400)
        x=self.fc1(x)
        x=F.relu(x)
        x=self.fc2(x)
        x=F.relu(x)
        x=self.fc3(x)
        return x


net = Net()


optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

train_loss = []
precision=0

for epoch in range(6):

    for i in range(600):
        x=train_X[i*100:i*100+100]
        y=train_y[i*100:i*100+100]
        x = x.reshape(-1,1,28,28)
        x = torch.from_numpy(x)        #(batch_size,input_feature_shape)
        y = torch.from_numpy(y)        #(batch_size,label_onehot_shape)

        out = net(x)

        loss = F.mse_loss(out, y)         # 计算两者的误差

        optimizer.zero_grad()             # 清空上一步的残余更新参数值
        loss.backward()                   # 误差反向传播, 计算参数更新值
        optimizer.step()                  # 将参数更新值施加到 net 的 parameters 上
        train_loss.append(loss.item())

        if i % 10 == 0:

            print(epoch, i, np.mean(train_loss))
            train_loss=[]
        if epoch>=4 and i%100==0:
            module = net.conv1
            prune.l1_unstructured(module, name='weight', amount=0.02)
            print(torch.sum(net.conv1.weight == 0))

total_correct = 0
for i in range(10000):
    x = train_X[i]
    y = train_y[i]
    x = torch.from_numpy(x)
    y = torch.from_numpy(y)
    x=x.view(1,1,28,28)
    y=y.view(1,10)
    out = net(x)
    pred = out.argmax(dim=1)  # 返回值最大的索引
    label = y.argmax(dim=1)
    correct = pred.eq(label).sum().float().item()  # 这个batch中正确的数量
    total_correct += correct

acc = total_correct / 10000.0
print('test acc:', acc)

print(net.conv1.weight)

结构化剪枝,修剪第0维,采用l2范数:

import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
from keras.utils import to_categorical
import numpy as np
import torch.nn.utils.prune as prune

path="F:\mnist.npz"
f = np.load(path)
train_X, train_y = f['x_train'], f['y_train']
test_X, test_y = f['x_test'], f['y_test']
f.close()

train_X = train_X.reshape(-1, 28, 28, 1)
train_X = train_X.astype('float32')
train_X /= 255
train_y = to_categorical(train_y, 10)

# 创建网络

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.conv1=nn.Conv2d(1,32,5,stride=1,padding=0)
        self.relu1=nn.ReLU()
        self.pool1=nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
        self.conv2=nn.Conv2d(32,16,3,stride=1,padding=0)
        self.relu2=nn.ReLU()
        self.pool2=nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
        self.fc1 = nn.Linear(400, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x=self.conv1(x)
        x=self.relu1(x)
        x=self.pool1(x)
        x=self.conv2(x)
        x=self.relu2(x)
        x=self.pool2(x)
        x=x.view(-1,400)
        x=self.fc1(x)
        x=F.relu(x)
        x=self.fc2(x)
        x=F.relu(x)
        x=self.fc3(x)
        return x


net = Net()


optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

train_loss = []
precision=0

for epoch in range(6):

    for i in range(600):
        x=train_X[i*100:i*100+100]
        y=train_y[i*100:i*100+100]
        x = x.reshape(-1,1,28,28)
        x = torch.from_numpy(x)        #(batch_size,input_feature_shape)
        y = torch.from_numpy(y)        #(batch_size,label_onehot_shape)

        out = net(x)

        loss = F.mse_loss(out, y)         # 计算两者的误差

        optimizer.zero_grad()             # 清空上一步的残余更新参数值
        loss.backward()                   # 误差反向传播, 计算参数更新值
        optimizer.step()                  # 将参数更新值施加到 net 的 parameters 上
        train_loss.append(loss.item())

        if i % 10 == 0:

            print(epoch, i, np.mean(train_loss))
            train_loss=[]
        if epoch>=4 and i%100==0:
            module = net.conv1
            prune.ln_structured(module, name="weight", amount=1, n=2, dim=0)
            print(torch.sum(net.conv1.weight == 0))

total_correct = 0
for i in range(10000):
    x = train_X[i]
    y = train_y[i]
    x = torch.from_numpy(x)
    y = torch.from_numpy(y)
    x=x.view(1,1,28,28)
    y=y.view(1,10)
    out = net(x)
    pred = out.argmax(dim=1)  # 返回值最大的索引
    label = y.argmax(dim=1)
    correct = pred.eq(label).sum().float().item()  # 这个batch中正确的数量
    total_correct += correct

acc = total_correct / 10000.0
print('test acc:', acc)

print(net.conv1.weight)

对多组参数进行剪枝:

import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
from keras.utils import to_categorical
import numpy as np
import torch.nn.utils.prune as prune

path="F:\mnist.npz"
f = np.load(path)
train_X, train_y = f['x_train'], f['y_train']
test_X, test_y = f['x_test'], f['y_test']
f.close()

train_X = train_X.reshape(-1, 28, 28, 1)
train_X = train_X.astype('float32')
train_X /= 255
train_y = to_categorical(train_y, 10)

# 创建网络

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.conv1=nn.Conv2d(1,32,5,stride=1,padding=0)
        self.relu1=nn.ReLU()
        self.pool1=nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
        self.conv2=nn.Conv2d(32,16,3,stride=1,padding=0)
        self.relu2=nn.ReLU()
        self.pool2=nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
        self.fc1 = nn.Linear(400, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x=self.conv1(x)
        x=self.relu1(x)
        x=self.pool1(x)
        x=self.conv2(x)
        x=self.relu2(x)
        x=self.pool2(x)
        x=x.view(-1,400)
        x=self.fc1(x)
        x=F.relu(x)
        x=self.fc2(x)
        x=F.relu(x)
        x=self.fc3(x)
        return x


net = Net()


optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

train_loss = []
precision=0

for epoch in range(6):

    for i in range(600):
        x=train_X[i*100:i*100+100]
        y=train_y[i*100:i*100+100]
        x = x.reshape(-1,1,28,28)
        x = torch.from_numpy(x)        #(batch_size,input_feature_shape)
        y = torch.from_numpy(y)        #(batch_size,label_onehot_shape)

        out = net(x)

        loss = F.mse_loss(out, y)         # 计算两者的误差

        optimizer.zero_grad()             # 清空上一步的残余更新参数值
        loss.backward()                   # 误差反向传播, 计算参数更新值
        optimizer.step()                  # 将参数更新值施加到 net 的 parameters 上
        train_loss.append(loss.item())

        if i % 10 == 0:

            print(epoch, i, np.mean(train_loss))
            train_loss=[]
        if epoch>=4 and i%100==0:
            for name, module in net.named_modules():
                # prune 20% of connections in all 2D-conv layers
                if isinstance(module, torch.nn.Conv2d):
                    prune.l1_unstructured(module, name='weight', amount=0.02)
                # prune 40% of connections in all linear layers
                elif isinstance(module, torch.nn.Linear):
                    prune.l1_unstructured(module, name='weight', amount=0.04)
            print(torch.sum(net.conv1.weight == 0))
            print(torch.sum(net.fc1.weight ==0))

total_correct = 0
for i in range(10000):
    x = train_X[i]
    y = train_y[i]
    x = torch.from_numpy(x)
    y = torch.from_numpy(y)
    x=x.view(1,1,28,28)
    y=y.view(1,10)
    out = net(x)
    pred = out.argmax(dim=1)  # 返回值最大的索引
    label = y.argmax(dim=1)
    correct = pred.eq(label).sum().float().item()  # 这个batch中正确的数量
    total_correct += correct

acc = total_correct / 10000.0
print('test acc:', acc)

print(net.conv1.weight)
print(net.fc1.weight)

全局剪枝:

import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
from keras.utils import to_categorical
import numpy as np
import torch.nn.utils.prune as prune

path="F:\mnist.npz"
f = np.load(path)
train_X, train_y = f['x_train'], f['y_train']
test_X, test_y = f['x_test'], f['y_test']
f.close()

train_X = train_X.reshape(-1, 28, 28, 1)
train_X = train_X.astype('float32')
train_X /= 255
train_y = to_categorical(train_y, 10)

# 创建网络

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.conv1=nn.Conv2d(1,32,5,stride=1,padding=0)
        self.relu1=nn.ReLU()
        self.pool1=nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
        self.conv2=nn.Conv2d(32,16,3,stride=1,padding=0)
        self.relu2=nn.ReLU()
        self.pool2=nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
        self.fc1 = nn.Linear(400, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x=self.conv1(x)
        x=self.relu1(x)
        x=self.pool1(x)
        x=self.conv2(x)
        x=self.relu2(x)
        x=self.pool2(x)
        x=x.view(-1,400)
        x=self.fc1(x)
        x=F.relu(x)
        x=self.fc2(x)
        x=F.relu(x)
        x=self.fc3(x)
        return x


net = Net()


optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

train_loss = []
precision=0

for epoch in range(6):

    for i in range(600):
        x=train_X[i*100:i*100+100]
        y=train_y[i*100:i*100+100]
        x = x.reshape(-1,1,28,28)
        x = torch.from_numpy(x)        #(batch_size,input_feature_shape)
        y = torch.from_numpy(y)        #(batch_size,label_onehot_shape)

        out = net(x)

        loss = F.mse_loss(out, y)         # 计算两者的误差

        optimizer.zero_grad()             # 清空上一步的残余更新参数值
        loss.backward()                   # 误差反向传播, 计算参数更新值
        optimizer.step()                  # 将参数更新值施加到 net 的 parameters 上
        train_loss.append(loss.item())

        if i % 10 == 0:

            print(epoch, i, np.mean(train_loss))
            train_loss=[]
        if epoch>=4 and i%100==0:
            parameters_to_prune = (
                (net.conv1, 'weight'),
                (net.conv2, 'weight'),
                (net.fc1, 'weight'),
                (net.fc2, 'weight'),
                (net.fc3, 'weight'),
            )

            prune.global_unstructured(
                parameters_to_prune,
                pruning_method=prune.L1Unstructured,
                amount=0.02,
            )

            print(
                "Sparsity in conv1.weight: {:.2f}%".format(
                    100. * float(torch.sum(net.conv1.weight == 0))
                    / float(net.conv1.weight.nelement())
                )
            )
            print(
                "Sparsity in conv2.weight: {:.2f}%".format(
                    100. * float(torch.sum(net.conv2.weight == 0))
                    / float(net.conv2.weight.nelement())
                )
            )
            print(
                "Sparsity in fc1.weight: {:.2f}%".format(
                    100. * float(torch.sum(net.fc1.weight == 0))
                    / float(net.fc1.weight.nelement())
                )
            )
            print(
                "Sparsity in fc2.weight: {:.2f}%".format(
                    100. * float(torch.sum(net.fc2.weight == 0))
                    / float(net.fc2.weight.nelement())
                )
            )
            print(
                "Sparsity in fc3.weight: {:.2f}%".format(
                    100. * float(torch.sum(net.fc3.weight == 0))
                    / float(net.fc3.weight.nelement())
                )
            )
            print(
                "Global sparsity: {:.2f}%".format(
                    100. * float(
                        torch.sum(net.conv1.weight == 0)
                        + torch.sum(net.conv2.weight == 0)
                        + torch.sum(net.fc1.weight == 0)
                        + torch.sum(net.fc2.weight == 0)
                        + torch.sum(net.fc3.weight == 0)
                    )
                    / float(
                        net.conv1.weight.nelement()
                        + net.conv2.weight.nelement()
                        + net.fc1.weight.nelement()
                        + net.fc2.weight.nelement()
                        + net.fc3.weight.nelement()
                    )
                )
            )

total_correct = 0
for i in range(10000):
    x = train_X[i]
    y = train_y[i]
    x = torch.from_numpy(x)
    y = torch.from_numpy(y)
    x=x.view(1,1,28,28)
    y=y.view(1,10)
    out = net(x)
    pred = out.argmax(dim=1)  # 返回值最大的索引
    label = y.argmax(dim=1)
    correct = pred.eq(label).sum().float().item()  # 这个batch中正确的数量
    total_correct += correct

acc = total_correct / 10000.0
print('test acc:', acc)

print(net.conv1.weight)
print(net.fc1.weight)

  • 8
    点赞
  • 39
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 9
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 9
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

FPGA硅农

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值