CS231n - Assignment2 Tensorflow

本次的作业很贴心,在ipython的作业中有一段教程大概告诉我们tensorflow的基本使用,还附上了一些常用API的guide链接,赞!没有科学上网也没有关系,我这里分享一个API查询神器 DevDocs,offline data对于我这种家里没有网的同志更是实用性MAX!作业过程中碰到不清楚的API随手一查即可~

作业代码地址:my_github


废话不多说,看作业

首先是作业里已经写好的simple_model函数

要知道为什么张开成一维向量后是5408,计算过程写好了注释;one_hot编码的使用

# clear old variables
tf.reset_default_graph()

# setup input (e.g. the data that changes every batch)
# The first dim is None, and gets sets automatically based on batch size fed in
X = tf.placeholder(tf.float32, [None, 32, 32, 3])
y = tf.placeholder(tf.int64, [None])
is_training = tf.placeholder(tf.bool)

def simple_model(X,y):
    # define our weights (e.g. init_two_layer_convnet)
    
    # setup variables
    #size of conv kernel is 7*7*3, the number is 32
    #output_size:(32-7+1)/stride * (32-7+1)/stride * 32
    #if stride=2
    #13*13*32 = 5408
    Wconv1 = tf.get_variable("Wconv1", shape=[7, 7, 3, 32])
    bconv1 = tf.get_variable("bconv1", shape=[32])
    W1 = tf.get_variable("W1", shape=[5408, 10])
    b1 = tf.get_variable("b1", shape=[10])

    # define our graph (e.g. two_layer_convnet)
    a1 = tf.nn.conv2d(X, Wconv1, strides=[1,2,2,1], padding='VALID') + bconv1
    h1 = tf.nn.relu(a1)
    #13*13*32 -->  1*5408
    h1_flat = tf.reshape(h1,[-1,5408])
    y_out = tf.matmul(h1_flat,W1) + b1
    return y_out

y_out = simple_model(X,y)

# define our loss
"""""
tf.one_hot(indices, depth)
将输入转换为one_hot张量
label1:[0 1 2 3 4]
after one_hot:
[[1 0 0 0 0]
[0 1 0 0 0]
[0 0 1 0 0]
[0 0 0 1 0]
[0 0 0 0 1]]
"""""
total_loss = tf.losses.hinge_loss(tf.one_hot(y,10),logits=y_out)
mean_loss = tf.reduce_mean(total_loss)

# define our optimizer
optimizer = tf.train.AdamOptimizer(5e-4) # select optimizer and set learning rate
train_step = optimizer.minimize(mean_loss)

然后是run_model的解读

def run_model(session, predict, loss_val, Xd, yd,
              epochs=1, batch_size=64, print_every=100,
              training=None, plot_losses=False):
    # have tensorflow compute accuracy
    #tf.argmax返回predict每行数值最大的下标
    correct_prediction = tf.equal(tf.argmax(predict,1), y)
    #tf.cast 将输入的数据格式转换为dtype
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    # shuffle indicies
    train_indicies = np.arange(Xd.shape[0])
    np.random.shuffle(train_indicies)

    training_now = training is not None
    
    # setting up variables we want to compute (and optimizing)
    # if we have a training function, add that to things we compute
    variables = [mean_loss,correct_prediction,accuracy]
    if training_now:
        variables[-1] = training
    
    # counter 
    iter_cnt = 0
    for e in range(epochs):
        # keep track of losses and accuracy
        correct = 0
        losses = []
        # make sure we iterate over the dataset once
        #math.ceil返回大于参量的最小整数
        for i in range(int(math.ceil(Xd.shape[0]/batch_size))):
            # generate indicies for the batch
            start_idx = (i*batch_size)%Xd.shape[0]
            idx = train_indicies[start_idx:start_idx+batch_size]
            
            # create a feed dictionary for this batch
            feed_dict = {X: Xd[idx,:],
                         y: yd[idx],
                         is_training: training_now }
            # get batch size
            #最后一次迭代中,实际的batch_size可能会小于设定的batch_size
            actual_batch_size = yd[idx].shape[0]
            
            # have tensorflow compute loss and correct predictions
            # and (if given) perform a training step
            
            #variables = [mean_loss,correct_prediction,accuracy]
            #mean_loss = tf.reduce_mean(total_loss)
            #correct_prediction = tf.equal(tf.argmax(predict,1), y)
            #accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            loss, corr, _ = session.run(variables,feed_dict=feed_dict)
            
            # aggregate performance stats
            losses.append(loss*actual_batch_size)
            correct += np.sum(corr)
            
            # print every now and then
            if training_now and (iter_cnt % print_every) == 0:
                print("Iteration {0}: with minibatch training loss = {1:.3g} and accuracy of {2:.2g}"\
                      .format(iter_cnt,loss,np.sum(corr)/actual_batch_size))
            iter_cnt += 1
        total_correct = correct/Xd.shape[0]
        total_loss = np.sum(losses)/Xd.shape[0]
        print("Epoch {2}, Overall loss = {0:.3g} and accuracy of {1:.3g}"\
              .format(total_loss,total_correct,e+1))
        if plot_losses:
            plt.plot(losses)
            plt.grid(True)
            plt.title('Epoch {} Loss'.format(e+1))
            plt.xlabel('minibatch number')
            plt.ylabel('minibatch loss')
            plt.show()
    return total_loss,total_correct

with tf.Session() as sess:
    with tf.device("/gpu:0"): #"/cpu:0" or "/gpu:0" 
        sess.run(tf.global_variables_initializer())
        print('Training')
        run_model(sess,y_out,mean_loss,X_train,y_train,1,64,100,train_step,True)
        print('Validation')
        run_model(sess,y_out,mean_loss,X_val,y_val,1,64)

接下来需要自己写的部分就按照simple_model来就差不多,只是按照要求用了别的一些API

cross_entropy: 交叉熵常常与softmax一起使用,tf中直接将它俩放一起做一个API了;

RMSProp: tf.train下常见的optimizer都有实现。

mean_loss = None
optimizer = None
total_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=tf.one_hot(y,10), logits=y_out))

mean_loss = tf.reduce_mean(total_loss)
optimizer = tf.train.RMSPropOptimizer(1e-3)

Batch_Normalization: 在tf中需要加入额外的依赖(不知道为啥,要我加就加呗)

extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(extra_update_ops):
    train_step = optimizer.minimize(mean_loss)

接下来任务是要在CIFAR-10上训练一个>=70%accuracy on validation set的网络

作业给出的hint中第一条就提到了上面那个网络的缺点之一:filter size of 7*7,后面当然果断将它改为3个3*3,做到这里来的同学自然知道个中缘由,不再多赘言。第二个hint是在全连接层后再加一个SVM;还有个可以注意的是使用Global Average Pooling,使用average pooling将输入的一个feature map变为1*1,最后将它们组合起来作为一个向量输入到分类器中,主要解决的应该还是affine layer参数的问题,相较而言,global_average_pooling没有参数,计算会更快。不过想到加分类器还要进run_model改,干脆就没写了(逃)它们和强大的resnet、densenet大家有时间都可以一试。

def my_model(X,y,is_training):
    conv11 = tf.nn.conv2d(X, Wconv11, strides=[1,1,1,1],padding='SAME') + bconv11
    h11 = tf.nn.relu(conv11)
    conv12 = tf.nn.conv2d(h11, Wconv12, strides=[1,1,1,1],padding='SAME') + bconv12
    h12 = tf.nn.relu(conv12)
    conv13 = tf.nn.conv2d(h12, Wconv13, strides=[1,1,1,1],padding='SAME') + bconv13
    h_batch1 = tf.layers.batch_normalization(conv13, training=is_training)
    h1 = tf.nn.relu(h_batch1)
    h_pool1 = tf.nn.max_pool(h1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
    print("h_pool1",h_pool1.shape)
    
    conv21 = tf.nn.conv2d(h_pool1, Wconv21, strides=[1, 1, 1, 1], padding='SAME') + bconv21
    h21 = tf.nn.relu(conv21)
    conv22 = tf.nn.conv2d(h21, Wconv22, strides=[1,1,1,1], padding='SAME') + bconv22
    h22 = tf.nn.relu(conv22)
    conv23 = tf.nn.conv2d(h22, Wconv23, strides=[1,1,1,1], padding='SAME') + bconv23
    h_batch23 = tf.layers.batch_normalization(conv23, training=is_training)
    h2 = tf.nn.relu(h_batch23)
    h_pool2 = tf.nn.max_pool(h2, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding='VALID')
    print("h_pool2",h_pool2.shape)
    
    conv31 = tf.nn.conv2d(h_pool2, Wconv31, strides=[1,1,1,1],padding='SAME') + bconv31
    h31 = tf.nn.relu(conv31)
    conv32 = tf.nn.conv2d(h31, Wconv32, strides=[1,1,1,1], padding='SAME') + bconv32
    h32 = tf.nn.relu(conv32)
    conv33 = tf.nn.conv2d(h32, Wconv33, strides=[1,1,1,1], padding='SAME') + bconv33
    h_batch3 = tf.layers.batch_normalization(conv33, training=is_training)
    h3 = tf.nn.relu(h_batch3)
    print("h3",h3.shape)
    h_pool3 = tf.nn.max_pool(h3, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding='VALID')
    print("h_pool3",h_pool3.shape)
    
    conv_flat = tf.reshape(h_pool3, [-1,80])
    affine = tf.matmul(conv_flat, W1) + b1
    h_batch4 = tf.layers.batch_normalization(affine, training=is_training)
    h4 = tf.nn.relu(h_batch4)
    y_out = tf.matmul(h4, W2) + b2
    return y_out
tf.reset_default_graph()

X = tf.placeholder(tf.float32, shape=[None, 32, 32, 3])
y = tf.placeholder(tf.int64, [None])
is_training = tf.placeholder(tf.bool)
global_step = tf.Variable(0)

Wconv11 = tf.get_variable("Wconv11", shape=[3, 3, 3, 30])
bconv11 = tf.get_variable("bconv11", [30])
Wconv12 = tf.get_variable("Wconv12", shape=[3, 3, 30, 30])
bconv12 = tf.get_variable("bconv12", [30])
Wconv13 = tf.get_variable("Wconv13", shape=[3, 3, 30, 30])
bconv13 = tf.get_variable("bconv13", [30])
#16*16*30

Wconv21 = tf.get_variable("Wconv21", shape=[3, 3, 30, 50])
bconv21 = tf.get_variable("bconv21", [50])
Wconv22 = tf.get_variable("Wconv22", shape=[3, 3, 50, 50])
bconv22 = tf.get_variable("bconv22", [50])
Wconv23 = tf.get_variable("Wconv23", shape=[3, 3, 50, 50])
bconv23 = tf.get_variable("bconv23", [50])
#4*4*50

Wconv31 = tf.get_variable("Wconv31", shape=[3, 3, 50, 80])
bconv31 = tf.get_variable("bconv31", [80])
Wconv32 = tf.get_variable("Wconv32", shape=[3, 3, 80, 80])
bconv32 = tf.get_variable("bconv32", [80])
Wconv33 = tf.get_variable("Wconv33", shape=[3, 3, 80, 80])
bconv33 = tf.get_variable("bconv33", [80])
#1*1*80

W1 = tf.get_variable("W1", shape=[80, 512])
b1 = tf.get_variable("b1", shape=[512])
W2 = tf.get_variable("W2", shape=[512, 10])
b2 = tf.get_variable("b2", shape=[10])
y_out = my_model(X,y,is_training)
mean_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(y,10),logits=y_out))
mean_loss += 0.001 * (tf.nn.l2_loss(Wconv11) + tf.nn.l2_loss(Wconv12) + tf.nn.l2_loss(Wconv13) + 
                      tf.nn.l2_loss(Wconv21) + tf.nn.l2_loss(Wconv22) + tf.nn.l2_loss(Wconv23) + 
                      tf.nn.l2_loss(Wconv31) + tf.nn.l2_loss(Wconv32) + tf.nn.l2_loss(Wconv33) + 
                      tf.nn.l2_loss(W1) + tf.nn.l2_loss(W2))
#tf.train..exponential_decay() 指数衰减学习率,每迭代100次学习率衰减为0.9倍
#decayed_learning_rate = learning_rate * decay_rate^(global_step/decay_steps)
learning_rate = tf.train.exponential_decay(1e-3, global_step, 100, 0.9, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate)

# batch normalization in tensorflow requires this extra dependency
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(extra_update_ops):
    train_step = optimizer.minimize(mean_loss)

最后跑了10个epoch的结果,根据输出的log,进行更多的迭代次数是可以得到更高准确率的,但这没啥意义咯,试试别的architecture可能会有惊喜的结果。

最后有个问题是:卷积核是否更多一些会更好?这个我认为不一定,而且如果是增加的冗余特征的话,识别效果可能会更差。因为冗余特征没有增加数据的信息但对分类的置信度造成了影响,它产生的效果也一定程度上影响了模型,使得模型的效果变差。以logistic regression为例,极端假设所有的特征均重复,那么对数几率会变成原来的两倍,使得模型分类的置信度得到了提高。

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值