# 李理：三层卷积网络和vgg的实现

### 5.2 cell3 实现最原始的卷积层的forward部分

N, C, H, W = x.shape
F, _, HH, WW = w.shape
stride = conv_param['stride']
H_out = 1 + (H + 2 * pad - HH) / stride
W_out = 1 + (W + 2 * pad - WW) / stride
out = np.zeros((N,F,H_out,W_out))

for n in range(N):
for c in range(C):

for n in range(N):
for i in range(H_out):
for j in range(W_out):
current_x_matrix = x_pad[n, :, i * stride: i * stride + HH, j * stride:j * stride + WW]
for f in range(F):
current_filter = w[f]
out[n,f,i,j] = np.sum(current_x_matrix*current_filter)

out[n,:,i,j] = out[n,:,i,j]+b

#### 5.2.6 第8-11行

In [19]: x=np.array([[1,2],[3,4],[5,6]])

In [20]: x
Out[20]:
array([[1, 2],
[3, 4],
[5, 6]])

In [21]: y=np.pad(x,(1,1),'constant', constant_values=(0,0))

In [22]: y
Out[22]:
array([[0, 0, 0, 0],
[0, 1, 2, 0],
[0, 3, 4, 0],
[0, 5, 6, 0],
[0, 0, 0, 0]])

In [23]: y=np.pad(x,(1,0),'constant', constant_values=(0,0))

In [24]: y
Out[24]:
array([[0, 0, 0],
[0, 1, 2],
[0, 3, 4],
[0, 5, 6]])

#### 5.2.7 第12-19行

current_x_matrix = x_pad[n,:, i*stride: i*stride+HH, j*stride:j*stride+WW]

current_filter = w[f]

out[n,f,i,j]+=b[f]

  for n in range(N):
for f in range(F):
current_filter = w[f]
for i in range(H_out):
for j in range(W_out):
current_x_matrix = x_pad[n, :, i * stride: i * stride + HH, j * stride:j * stride + WW]
out[n, f, i, j] = np.sum(current_x_matrix * current_filter)
out[n, f, i, j] = out[n, f, i, j] + b[f]

### 5.3 cell4

w[0, 0, :, :] = [[0, 0, 0], [0, 0.3, 0], [0, 0, 0]]
w[0, 1, :, :] = [[0, 0, 0], [0, 0.6, 0], [0, 0, 0]]
w[0, 2, :, :] = [[0, 0, 0], [0, 0.1, 0], [0, 0, 0]]

w[1, 0, :, :] =0
w[1, 1, :, :] =0
w[1, 2, :, :] = [[1, 2, 1], [0, 0, 0], [-1, -2, -1]]

### 5.4 cell5 实现conv_backward_naive

  x, w, b, conv_param = cache
stride = conv_param['stride']
N, C, H, W = x.shape
F, _, HH, WW = w.shape
_,_,H_out,W_out = dout.shape

for n in range(N):
for c in range(C):

db = np.zeros((F))

dw = np.zeros(w.shape)

for n in range(N):
for i in range(H_out):
for j in range(W_out):
current_x_matrix = x_pad[n, :, i * stride: i * stride + HH, j * stride:j * stride + WW]
for f in range(F):
dw[f] = dw[f] + dout[n,f,i,j]* current_x_matrix
dx_pad[n,:, i*stride: i*stride+HH, j*stride:j*stride+WW] += w[f]*dout[n,f,i,j]
db = db + dout[n,:,i,j]
dx = dx_pad[:,:,pad:H+pad,pad:W+pad]

    #forward
current_x_matrix = x_pad[n, :, i * stride: i * stride + HH, j * stride:j * stride + WW]
out[n,f,i,j] = np.sum(current_x_matrix* w[f])
#backward
dw[f] += dout[n,f,i,j]*current_x_matrix
dx_pad[....]+=dout * w[f]

### 5.5 cell6 实现max_pool_forward_naive

  N, C, H, W = x.shape
pool_height = pool_param['pool_height']
pool_width = pool_param['pool_width']
stride = pool_param['stride']
H_out = 1 + (H - pool_height) / stride
W_out = 1 + (W - pool_width) / stride
out = np.zeros((N,C,H_out,W_out))

for n in range(N):
for c in range(C):
for h in range(H_out):
for w in range(W_out):
out[n,c,h,w] = np.max(x[n,c, h*stride:h*stride+pool_height, w*stride:w*stride+pool_width])

max_pool的forward非常简单，就是在对应的局部感知域里选择最大的那个数就行。

### 5.6 cell7 实现max_pool_backward_naive

  x, pool_param = cache
pool_height = pool_param['pool_height']
pool_width = pool_param['pool_width']
stride = pool_param['stride']
N, C, H_out, W_out = dout.shape

dx = np.zeros(x.shape)

for n in range(N):
for c in range(C):
for h in range(H_out):
for w in range(W_out):
current_matrix = x[n, c, h * stride:h * stride + pool_height, w * stride:w * stride + pool_width]
max_idx = np.unravel_index(np.argmax(current_matrix),current_matrix.shape)
dx[n, c, h * stride + max_idx[0], w * stride + max_idx[1]] += dout[n, c, h, w]

backward也很简单，就是最大的局部感知域最大的那个dx是1，其余的是0。为了提高效率，其实forward阶段是可以”记下“最大的那个下标，这里是重新计算的。

In [3]: x
Out[3]:
array([[1, 2, 3],
[4, 5, 6]])

In [5]: x.argmax()
Out[5]: 5

In [6]: ind = np.unravel_index(np.argmax(x),x.shape)

In [7]: ind
Out[7]: (1, 2)

### 5.7 cell8-9

pooling的快速版本和原始版本比较

### 5.9 cell12 三层的卷积神经网络

conv - relu - 2x2 max pool - affine - relu - affine - softmax

#### 5.9.1 init函数

  def __init__(self, input_dim=(3, 32, 32), num_filters=32, filter_size=7,
hidden_dim=100, num_classes=10, weight_scale=1e-3, reg=0.0,
dtype=np.float32):
"""
初始化一个新的网络

输入:
- input_dim: 三元组 (C, H, W) 给定输入的Channel数，Height和Width
- num_filters: 卷积层的filter的个数（feature map）
- filter_size: filter的width和height，这里假设是一样的。
- hidden_dim: 全连接层hidden unit的个数
- num_classes: 输出的类别数
- weight_scale: 初始化weight高斯分布的标准差

- reg: L2正则化系数
- dtype: 浮点数的类型
"""
self.params = {}
self.reg = reg
self.dtype = dtype

############################################################################
# TODO: Initialize weights and biases for the three-layer convolutional    #
# network. Weights should be initialized from a Gaussian with standard     #
# deviation equal to weight_scale; biases should be initialized to zero.   #
# All weights and biases should be stored in the dictionary self.params.   #
# Store weights and biases for the convolutional layer using the keys 'W1' #
# and 'b1'; use keys 'W2' and 'b2' for the weights and biases of the       #
# hidden affine layer, and keys 'W3' and 'b3' for the weights and biases   #
# of the output affine layer.                                              #
############################################################################
C, H, W = input_dim
self.params['W1'] = np.random.normal(0, weight_scale, (num_filters, C, filter_size, filter_size))
self.params['b1'] = np.zeros(num_filters)

self.params['W2'] = np.random.normal(0, weight_scale, (num_filters*H/2*W/2, hidden_dim))
self.params['b2'] = np.zeros(hidden_dim)

self.params['W3'] = np.random.normal(0, weight_scale, (hidden_dim, num_classes))
self.params['b3'] = np.zeros(num_classes)

############################################################################
#                             END OF YOUR CODE                             #
############################################################################

for k, v in self.params.iteritems():
self.params[k] = v.astype(dtype)

init主要的代码就是初始化三层卷积网络的参数W和b。

• W1的shape是(num_filters, C, filter_size, filter_size)
• b1的shape是(num_filters)
• b2的shape是(hidden_dim)
• W3的shape是(hidden_dim, num_classes)
• b3是(num_classes)

#### 5.9.2 loss函数

1. forward部分

    conv_out, conv_cache = conv_forward_fast(X, W1, b1, conv_param)
relu1_out, relu1_cache = relu_forward(conv_out)
pool_out, pool_cache = max_pool_forward_fast(relu1_out, pool_param)
affine_relu_out, affine_relu_cache = affine_relu_forward(pool_out, W2, b2)
affine2_out, affine2_cache = affine_forward(affine_relu_out, W3, b3)
scores = affine2_out

• 第一行是进行卷积，同时要保存cache，后面backward会用到
• 第二行是relu
• 第三行是max_pool
• 第四行是affine_relu，把affine和relu同时做了，当然分开也是可以的。
• 第五行是affine
    if y is None:
return scores

2. backwoard

    loss, dscores = softmax_loss(scores, y)
loss += 0.5 * self.reg*(np.sum(self.params['W1']* self.params['W1'])
+ np.sum(self.params['W2']* self.params['W2'])
+ np.sum(self.params['W3']* self.params['W3']))

affine2_dx, affine2_dw, affine2_db = affine_backward(dscores, affine2_cache)
grads['W3'] = affine2_dw + self.reg * self.params['W3']

affine1_dx, affine1_dw, affine1_db = affine_relu_backward(affine2_dx, affine_relu_cache)

grads['W2'] = affine1_dw + self.reg * self.params['W2']

pool_dx = max_pool_backward_fast(affine1_dx, pool_cache)
relu_dx = relu_backward(pool_dx, relu1_cache)
conv_dx, conv_dw, conv_db = conv_backward_fast(relu_dx, conv_cache)
grads['W1'] = conv_dw + self.reg * self.params['W1']
grads['b1'] = conv_db

affine2_out, affine2_cache = affine_forward(affine_relu_out, W3, b3)

affine2_dx, affine2_dw, affine2_db = affine_backward(dscores, affine2_cache)

out, cache=xxx_forward(x,y,z)
dx,dy,dz=xxx_backward(dout, cache)

### 5.10 cell13-14 sanity check和gradient check

Initial loss (no regularization):  2.30258612067
Initial loss (with regularization):  2.50896273286

W1 max relative error: 9.816730e-05
W2 max relative error: 3.816233e-03
W3 max relative error: 2.890462e-05
b1 max relative error: 6.426752e-05
b2 max relative error: 1.000000e+00
b3 max relative error: 1.013546e-09

### 5.11 cell15-16 拟合少量数据

(Epoch 9 / 10) train acc: 0.790000; val_acc: 0.205000
(Iteration 19 / 20) loss: 0.659042
(Iteration 20 / 20) loss: 0.712001
(Epoch 10 / 10) train acc: 0.820000; val_acc: 0.225000

### 5.12 cell17

(Iteration 941 / 980) loss: 1.359960
(Iteration 961 / 980) loss: 1.461109
(Epoch 1 / 1) train acc: 0.476000; val_acc: 0.470000

### 5.14 cell19 Spatial Batch Normalization

#### 5.14.1 forward

def spatial_batchnorm_forward(x, gamma, beta, bn_param):

Inputs:
- x: 输入数据shape (N, C, H, W)
- gamma: scale参数 shape (C,)
- beta: 平移参数 shape (C,)
- bn_param: Dictionary包括:
- mode: 'train' 或者 'test'; 必须有的
- eps: 保持数值计算稳定的一个很小的常量
- momentum: 计算running mean/variance的常量，前面也讲过。
如果momentum=0 那么每次都丢弃原来的值，只用当前最新值。
momentum=1 表示只用原来的值。默认0.9，大部分情况下不用修改
- running_mean: 数组 shape (D,) 保存当前的均值
- running_var 数组 shape (D,) 保存当前的方差

Returns a tuple of:
- out: 输出数据 shape (N, C, H, W)
- cache: 用于backward的cache
"""
out, cache = None, None

#############################################################################
# TODO: Implement the forward pass for spatial batch normalization.         #
#                                                                           #
# HINT: You can implement spatial batch normalization using the vanilla     #
# version of batch normalization defined above. Your implementation should  #
# be very short; ours is less than five lines.                              #
#############################################################################
N, C, H, W = x.shape
temp_output, cache = batchnorm_forward(x.transpose(0,2,3,1).reshape((N*H*W,C)), gamma, beta, bn_param)
out = temp_output.reshape(N,H,W,C).transpose(0,3,1,2)

#############################################################################
#                             END OF YOUR CODE                              #
#############################################################################

return out, cache

1. 通过x.shape获得输入的N, C, H, W代表batchSize，Channel数，Height和Width
2. 把(N, C, H, W)的4维tensor变成(N H W,C)的2维tensor。因为要把第二维C放到最后，所以首先transponse(0,2,3,1)把第二维放到第四维，然后原来的第三和四维分别变成第二和三维。然后在reshape成二维的(N H W, C)。这样就直接调用之前的batchnorm_forward。
transpose(0,2,3,1)的意思就是：把原来的第0维放到新的第0维【 不变】，把原来的第2维放到现在的第1维，把原来的第3维放到现在的第2维，把原来的第1维放到第3维。【主要这一段我说的时候下标是从0开始的了】
3. 计算完成后我们需要把它恢复成(N, C, H, W)的4维tensor

Before spatial batch normalization:
Shape:  (2, 3, 4, 5)
Means:  [ 10.55377221  10.73790598   9.53943534]
Stds:  [ 3.78632253  3.62325432  3.74675181]
After spatial batch normalization:
Shape:  (2, 3, 4, 5)
Means:  [  5.66213743e-16  -1.38777878e-16   7.43849426e-16]
Stds:  [ 0.99999965  0.99999962  0.99999964]
After spatial batch normalization (nontrivial gamma, beta):
Shape:  (2, 3, 4, 5)
Means:  [ 6.  7.  8.]
Stds:  [ 2.99999895  3.99999848  4.99999822]

#### 5.14.2 backward

  N,C,H,W = dout.shape
dx_temp, dgamma, dbeta = batchnorm_backward_alt(dout.transpose(0,2,3,1).reshape((N*H*W,C)),cache)
dx = dx_temp.reshape(N,H,W,C).transpose(0,3,1,2)

dx error:  1.24124210224e-07
dgamma error:  1.440787364e-12
dbeta error:  1.19492399319e-11

### 5.15 实现一个validation数据上准确率超过65%的网络

1. learning_rate非常重要，刚开始要大，之后用lr_decay让它变小。如果发现开始loss下降很慢，那么可以调大这个参数。如果loss时而变大时而变小【当然偶尔反复是正常的】，那么可能是learning_rate过大了。
3. 如果训练准确率和验证准确率差距过大，说明模型过拟合了，可以增大L2正则化参数reg，另外使用dropout也是可以缓解过拟合的。
4. batch norm非常有用，尽量使用
5. 越深的网络效果越好，当然要求的参数也越多，计算也越慢。后面我们会介绍一些使得我们可以训练更深网络的方法，比如著名的152层的ResNet以及参数很少的Inception系列算法，这些方法是最近一两年在ImageNet上名列前茅。

1. VGG简介

VGG其实也没有什么的新东西，就是使用3 × 3的卷积和pooling实现比较深(16-19)层【当然ResNet出来后就不敢说自己深了】的网络结构。
VGG的结构如下图：

class VGGlikeConvNet(object):
"""
A flexible convolutional network with the following architecture:
[(CONV-SBN-RELU)*A-POOL]*B - (FC-BN-RELU)*K - SOFTMAX

The network operates on minibatches of data that have shape (N, C, H, W)
consisting of N images, each with height H and width W and with C input
channels.
"""
def __init__(self, input_dim=(3, 32, 32), num_filters=[64, 128, 256, 512, 512], filter_size=3,
hidden_dims=[1024, 1024], num_classes=10, weight_scale=1e-2, reg=1e-3,
As=[2,2,3,3,3],use_batchnorm=True,
dropout=0, dtype=np.float32, seed=None):

"""
Inputs:
- input_dim: Tuple (C, H, W) giving size of input data
- num_filters: A list of integers giving the filters to use in each "MAIN" convolutional layer
- filter_size: Size of filters to use in the convolutional layer
- hidden_dim: A list of integers giving the size of each hidden layer.
- num_classes: Number of scores to produce from the final affine layer.
- weight_scale: Scalar giving standard deviation for random initialization
of weights.
- reg: Scalar giving L2 regularization strength
- As: Numbers of "SUB" convolution-layer replications in each num_filter
- dtype: numpy datatype to use for computation.
"""
self.params = {}
self.reg = reg
self.dtype = dtype
self.use_dropout = dropout > 0
self.use_batchnorm = use_batchnorm
self.filter_size=filter_size
self.hidden_dims=hidden_dims
C, H, W = input_dim
self.num_filters=num_filters
self.As=As

# With batch normalization we need to keep track of running means and
# variances, so we need to pass a special bn_param object to each batch
# normalization layer. You should pass self.bn_params[0] to the forward pass
# of the first batch normalization layer, self.bn_params[1] to the forward
# pass of the second batch normalization layer, etc.
self.bn_params = {}

for i in range(1, len(num_filters)+1):
num_filter=num_filters[i-1]

for j in range(1, As[i-1]+1):
#debug
ss=str(i)+","+str(j)
print (H,W,C, num_filter,  filter_size, filter_size)
self.params['W' + ss] = np.random.normal(0, weight_scale,
(num_filter, C, filter_size, filter_size))
self.params['b' + ss] = np.zeros(num_filter)

C=num_filter
if self.use_batchnorm:
self.params['beta' + ss] = np.zeros(num_filter)
self.params['gamma'+ ss]=np.ones(num_filter)
self.bn_params[ss]={'mode': 'train'}
#max-pooling size/=2
H/=2
W/=2

# full connected layers
for i in range(1, len(hidden_dims)+1):
layer_input_dim = C*H*W if i == 1 else hidden_dims[i - 2]
layer_output_dim = hidden_dims[i - 1]
print (layer_input_dim, layer_output_dim)
self.params['W' + str(i)] = np.random.normal(0, weight_scale, (layer_input_dim, layer_output_dim))

self.params['b' + str(i)] = np.zeros(layer_output_dim)

if self.use_batchnorm:
self.params['beta' + str(i)] = np.zeros(layer_output_dim)
self.params['gamma' + str(i)] = np.ones(layer_output_dim)
self.bn_params[str(i)]={'mode': 'train'}
# softmax layer
softmax_input_dim=hidden_dims[-1]
softmax_output_dim=num_classes
print (softmax_input_dim, softmax_output_dim)
self.params['W_softmax'] = np.random.normal(0, weight_scale, (softmax_input_dim, softmax_output_dim))
self.params['b_softmax'] = np.zeros(softmax_output_dim)
self.dropout_param = {}
if self.use_dropout:
self.dropout_param = {'mode': 'train', 'p': dropout}
if seed is not None:
self.dropout_param['seed'] = seed

# Cast all parameters to the correct datatype
for k, v in self.params.iteritems():
self.params[k] = v.astype(dtype)

def loss(self, X, y=None):
"""
Compute loss and gradient for the fully-connected net.

Input / output: Same as TwoLayerNet above.
"""
X = X.astype(self.dtype)
mode = 'test' if y is None else 'train'

# Set train/test mode for batchnorm params and dropout param since they
# behave differently during training and testing.
if self.dropout_param is not None:
self.dropout_param['mode'] = mode
if self.use_batchnorm:
for bn_param in self.bn_params:
self.bn_params[bn_param]['mode'] = mode

scores = None

conv_caches={}
relu_caches={}
bn_caches={}
affine_relu_caches={}
affine_bn_relu_caches={}
dropout_caches={}
pool_caches={}
conv_param = {'stride': 1, 'pad': (self.filter_size - 1) / 2}

# pass pool_param to the forward pass for the max-pooling layer
pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}
current_input = X

# conv layers
for i in range(1, len(self.num_filters)+1):
for j in range(1, self.As[i-1]+1):
ss=str(i) + "," +str(j)
keyW = 'W' + ss
keyb = 'b' + ss
if not self.use_batchnorm:
conv_out, conv_cache = conv_forward_fast(current_input, self.params[keyW], self.params[keyb], conv_param)
relu_out, relu_cache = relu_forward(conv_out)
conv_caches[ss]=conv_cache
relu_caches[ss]=relu_cache
current_input=relu_out
else:
key_gamma = 'gamma' + ss
key_beta = 'beta' + ss
conv_out, conv_cache = conv_forward_fast(current_input, self.params[keyW], self.params[keyb], conv_param)
bn_out, bn_cache=spatial_batchnorm_forward(conv_out, self.params[key_gamma], self.params[key_beta], self.bn_params[ss])
relu_out, relu_cache = relu_forward(bn_out)
conv_caches[ss] = conv_cache
relu_caches[ss] = relu_cache
bn_caches[ss] = bn_cache
current_input = relu_out
pool_out, pool_cache = max_pool_forward_fast(current_input, pool_param)
pool_caches[str(i)]=pool_cache
current_input=pool_out

# full connected layers
for i in range(1, len(self.hidden_dims) + 1):
keyW = 'W' + str(i)
keyb = 'b' + str(i)

if not self.use_batchnorm:
current_input, affine_relu_caches[i] = affine_relu_forward(current_input, self.params[keyW], self.params[keyb])

else:
key_gamma = 'gamma' + str(i)
key_beta = 'beta' + str(i)
current_input, affine_bn_relu_caches[i] = affine_bn_relu_forward(current_input, self.params[keyW],
self.params[keyb],
self.params[key_gamma], self.params[key_beta],
self.bn_params[str(i)])

if self.use_dropout:
current_input, dropout_caches[i] = dropout_forward(current_input, self.dropout_param)

# softmax
keyW = 'W_softmax'
keyb = 'b_softmax'
affine_out, affine_cache = affine_forward(current_input, self.params[keyW], self.params[keyb])

scores=affine_out

# If test mode return early
if mode == 'test':
return scores

loss, dscores = softmax_loss(scores, y)

# last layer:
affine_dx, affine_dw, affine_db = affine_backward(dscores, affine_cache)

grads['W_softmax'] = affine_dw + self.reg * self.params['W_softmax']

loss += 0.5 * self.reg * (np.sum(self.params['W_softmax'] * self.params['W_softmax']))

# full connected layers
for i in range(len(self.hidden_dims), 0, -1):
if self.use_dropout:
affine_dx = dropout_backward(affine_dx, dropout_caches[i])

if not self.use_batchnorm:
affine_dx, affine_dw, affine_db = affine_relu_backward(affine_dx, affine_relu_caches[i])
else:
affine_dx, affine_dw, affine_db, dgamma, dbeta = affine_bn_relu_backward(affine_dx, affine_bn_relu_caches[i])

keyW = 'W' + str(i)
keyb = 'b' + str(i)
loss += 0.5 * self.reg * (np.sum(self.params[keyW] * self.params[keyW]))
grads[keyW] = affine_dw + self.reg * self.params[keyW]

# conv layers
conv_dx=affine_dx
for i in range(len(self.num_filters), 0, -1):
dpool_out=conv_dx
conv_dx=max_pool_backward_fast(dpool_out, pool_caches[str(i)])

for j in range(self.As[i-1],0,-1):
ss=str(i) + "," +str(j)
keyW = 'W' + ss
keyb = 'b' + ss
if not self.use_batchnorm:
drelu_out=conv_dx
relu_cache=relu_caches[ss]
conv_cache=conv_caches[ss]
dconv_out=relu_backward(drelu_out, relu_cache)
conv_dx, conv_dw, conv_db=conv_backward_fast(dconv_out, conv_cache)

loss += 0.5 * self.reg * (np.sum(self.params[keyW] * self.params[keyW]))
grads[keyW] = conv_dw + self.reg * self.params[keyW]

else:
key_gamma = 'gamma' + ss
key_beta = 'beta' + ss

drelu_out = conv_dx
relu_cache = relu_caches[ss]
conv_cache = conv_caches[ss]
bn_cache=bn_caches[ss]

dbn_out = relu_backward(drelu_out, relu_cache)
dconv_out, dgamma, dbeta=spatial_batchnorm_backward(dbn_out, bn_cache)

conv_dx, conv_dw, conv_db = conv_backward_fast(dconv_out, conv_cache)

loss += 0.5 * self.reg * (np.sum(self.params[keyW] * self.params[keyW]))
grads[keyW] = conv_dw + self.reg * self.params[keyW]

############################################################################
#                             END OF YOUR CODE                             #
############################################################################

return loss, grads

2. 使用VGG完成作业

model = VGGlikeConvNet(input_dim=(3, 32, 32), num_filters=[64, 64, 128, 256], filter_size=3,
hidden_dims=[64, 64], num_classes=10, weight_scale=1e-2, reg=1e-3, As=[2,2,3,3],
dropout=0.2,
dtype=np.float32)

solver=Solver(model, data,
num_epochs=10, batch_size=50,
lr_decay=0.95,
optim_config={'learning_rate': 5e-4},
verbose=True, print_every=20)

solver.train()

(Iteration 9741 / 9800) loss: 0.733453
(Iteration 9761 / 9800) loss: 0.645659
(Iteration 9781 / 9800) loss: 0.564387
(Epoch 10 / 10) train acc: 0.927000; val_acc: 0.854000

#### 深度学习经典卷积神经网络之VGGNet

2017-06-09 11:31:35

#### 深度网络VGG理解

2017-02-21 14:12:01

#### VGGNet网络结构

2018-02-05 19:32:37

#### CS231n笔记7-ConvNets Typical Architecture与VGGNet

2016-05-18 11:09:43

#### VGG网络结构分析

2018-01-17 21:40:55

2016-01-02 11:18:40

#### 【深度学习】VGG-16网络结构

2017-12-18 21:05:32

#### 【DL笔记】VGG网络详解及代码实现

2017-08-10 15:25:02

#### [caffe]深度学习之图像分类模型VGG解读

2015-07-01 19:26:54

#### 牛津大学视觉几何组VGG卷积神经网络实践教程VGG Convolutional Neural Networks Practical

2016-04-01 13:58:32