用tensorflow一步步实现NAS
使用数据和前期准备
我们使用MNIST数据集,训练数据集有55000,测试集为10000.
模型
这个模型分为两个部分,一部分为控制器;一部分为我们要优化的网络。 控制器是带有NAS细胞和特殊的强化学习方法的的rnn tensorflow,用于训练和获得奖励。我们想用奖励最大化网络的精度以及训练控制器来提高精度。 训练器产生动作去修改CNN。包括修改每层的filters,kernel_size ,pool_size,dropout_rate
实现
针对该控制器,建立了一种基于NASCell的策略网络方法。该网络将当前状态和最大搜索层数作为输入,并且输出新的操作更新网络。
def policy_network(state,max_layers):
with tf.name_scope("policy_network"):
nas_net=tf.contrib.rnn.NASNet(4*max_layers)#NAS 细胞的单元数
outputs,state=tf.nn.dynamic_rnn(nas_net,state.expand_dims[state,-1],dtype=tf.float32)
print(outputs)
bias=tf.Variable([0.05]*4*max_layers)
outputs=tf.nn.bias_add(outputs,bias)
print('outputs',outputs,outputs[:,-1:,:],tf.slice(outputs, [0, 4*max_layers-1, 0], [1, 1, 4*max_layers]))
#outputs[:,-1:,:]是指取[batch_size,max_time,cell.output_size]中每一维最后增加的所有
#例子:x=np.array([[[1,2,3,4],[2,1,2,2]],[[2,2,2,2],[3,3,2,2]],[[3,3,3,3],[3,4,3,4]]])
#print(x[:,-1:,:]) 结果输出[[[2 1 2 2]] [[3 3 2 2]][[3 4 3 4]]]
return outputs[:,-1:,:]
为了使我们的参数可以微调,我们将代码放到Reinforce类中 要实例化这个类,我们需要传递以下参数 : sess and optimizer----首先分别初始化这两个参数 policy_network----初始化方法 max_layers----最大层数 division_rate----每个神经元的正态分布值在-1.0到1.0之间 reg_param----正则化参数 exploration----随机产生动作的可能性
class Reinforce():
def __init__(self, sess, optimizer, policy_network, max_layers, global_step,
division_rate=100.0,
reg_param=0.001,
discount_factor=0.99,
exploration=0.3):
self.sess = sess
self.optimizer = optimizer
self.policy_network = policy_network
self.division_rate = division_rate
self.reg_param = reg_param
self.discount_factor=discount_factor
self.max_layers = max_layers
self.global_step = global_step
self.reward_buffer = []
self.state_buffer = []
self.create_variables()
var_lists = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
self.sess.run(tf.variables_initializer(var_lists))
当然,我们还必须创建变量和placeholders,包括logits和gradient。为此,我们编写一个方法create_variables,在计算初始梯度后,我们启动梯度下降法。
def create_variables(self):
with tf.name_scope("model_inputs"):
#原始state
self.states = tf.placeholder(tf.float32, [None, self.max_layers*4], name="states")
with tf.name_scope("predict_actions"):
#初始化策略网络
with tf.variable_scope("policy_network"):
self.policy_outputs = self.policy_network(self.states, self.max_layers)
self.action_scores = tf.identity(self.policy_outputs, name="action_scores")
#所有元素等比例放大
self.predicted_action = tf.cast(tf.scalar_mul(self.division_rate, self.action_scores), tf.int32, name="predicted_action")
policy_network_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="policy_network")
with tf.name_scope("compute_gradients"):
self.discounted_rewards = tf.placeholder(tf.float32, (None,), name="discounted_rewards")
with tf.variable_scope("policy_network", reuse=True):
self.logprobs = self.policy_network(self.states, self.max_layers)
print("self.logprobs", self.logprobs)
self.cross_entropy_loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logprobs[:, -1, :], labels=self.states)
self.pg_loss= tf.reduce_mean(self.cross_entropy_loss)
self.reg_loss = tf.reduce_sum([tf.reduce_sum(tf.square(x)) for x in policy_network_variables])
self.loss = self.pg_loss + self.reg_param * self.reg_loss
self.gradients = self.optimizer.compute_gradients(self.loss)
for i, (grad, var) in enumerate(self.gradients):
if grad is not None:
self.gradients[i] = (grad * self.discounted_rewards, var)
with tf.name_scope("train_policy_network"):
self.train_op = self.optimizer.apply_gradients(self.gradients, global_step=self.global_step)
强化学习的实现: 首先梯度值乘以折扣,定义了变量之后,我们应该在tensorflow图中初始化它,在__init__的末尾,每一个动作取决于之前的状态,但是有时候为了更有效地训练,我们可以生成随机动作来避免局部最小值。
def get_action(self, state):
return self.sess.run(self.predicted_action, {self.states: state})
if random.random() < self.exploration:
return np.array([[random.sample(range(1, 35), 4*self.max_layers)]])
else:
return self.sess.run(self.predicted_action, {self.states: state})
在每个周期中,我们的网络能够产生一个动作,获得奖励然后采取一个训练步骤。训练步骤包括store_rollout以及train_step。
def storeRollout(self, state, reward):
self.reward_buffer.append(reward)
self.state_buffer.append(state[0])
def train_step(self, steps_count):
states = np.array(self.state_buffer[-steps_count:])/self.division_rate
rewars = self.reward_buffer[-steps_count:]
_, ls = self.sess.run([self.train_op, self.loss],
{self.states: states,
self.discounted_rewards: rewards})
return ls
正如上述方法提及的,我们需要为每一个动作/状态定义一个奖励。这是通过生成一个新的CNN网络,每个动作都有新的架构,对其进行培训并评估其准确性来实现的。由于这个过程产生了很多CNN网络,让我们为它写一个管理器:
import tensorflow as tf
from cnn import CNN
class NetManager():
def __init__(self, num_input, num_classes,
learning_rate, mnist,
max_step_per_action=5500*3,
bathc_size=100,
dropout_rate=0.85):
self.num_input = num_input
self.num_classes = num_classes
self.learning_rate = learning_rate
self.mnist = mnist
self.max_step_per_action = max_step_per_action
self.bathc_size = bathc_size
self.dropout_rate = dropout_rate
def get_reward(self, action, step, pre_acc):
action = [action[0][0][x:x+4] for x in range(0, len(action[0][0]), 4)]
cnn_drop_rate = [c[3] for c in action]
with tf.Graph().as_default() as g:
with g.container('experiment'+str(step)):
model = CNN(self.num_input, self.num_classes, action)
loss_op = tf.reduce_mean(model.loss)
optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
train_op = optimizer.minimize(loss_op)
with tf.Session() as train_sess:
init = tf.global_variables_initializer()
train_sess.run(init)
for step in range(self.max_step_per_action):
batch_x, batch_y = self.mnist.train.next_batch(self.bathc_size)
feed = {model.X: batch_x, model.Y: batch_y, model.dropout_keep_prob: self.dropout_rate, model.cnn_dropout_rates: cnn_drop_rate}
_ = train_sess.run(train_op, feed_dict=feed)
if step % 100 == 0: # Calculate batch loss and accuracy
loss, acc = train_sess.run( [loss_op, model.accuracy], feed_dict={model.X: batch_x, model.Y: batch_y, model.dropout_keep_prob: 1.0, model.cnn_dropout_rates: [1.0]*len(cnn_drop_rate)})
print("Step " + str(step) + ", Minibatch Loss= " +"{:.4f}".format(loss) + ", Current accuracy= " + "{:.3f}".format(acc))
batch_x, batch_y = self.mnist.test.next_batch(10000)
loss, acc = train_sess.run( [loss_op, model.accuracy], feed_dict={model.X: batch_x, model.Y: batch_y, model.dropout_keep_prob: 1.0, model.cnn_dropout_rates: [1.0]*len(cnn_drop_rate)})
print("!!!!!!acc:", acc, pre_acc)
if acc - pre_acc <= 0.01:
return acc, acc
else:
return 0.01, acc
然后我们在“action”中为每一层创建了带有超参数的bathc,并为每一层创建了cnn_drop_rate - dropout rate列表。 这里我们用CNN类定义卷积神经模型。它可以是任何能够通过某种操作生成神经模型的类。
import tensorflow as tf
class CNN():
def __init__(self, num_input, num_classes, cnn_config):
cnn = [c[0] for c in cnn_config]
cnn_num_filters = [c[1] for c in cnn_config]
max_pool_ksize = [c[2] for c in cnn_config]
self.X = tf.placeholder(tf.float32, [None, num_input], ame="input_X")
self.Y = tf.placeholder(tf.int32, [None, num_classes], name="input_Y")
self.dropout_keep_prob = tf.placeholder(tf.float32, [],name="dense_dropout_keep_prob")
self.cnn_dropout_rates = tf.placeholder(tf.float32, [len(cnn), ], name="cnn_dropout_keep_prob")
Y = self.Y
X = tf.expand_dims(self.X, -1)
pool_out = X
with tf.name_scope("Conv_part"):
for idd, filter_size in enumerate(cnn):
with tf.name_scope("L"+str(idd)):
conv_out = tf.layers.conv1d( pool_out, filters=cnn_num_filters[idd], kernel_size=(int(filter_size)), strides=1,padding="SAME", name="conv_out_"+str(idd), activation=tf.nn.relu, kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=tf.zeros_initializer)
pool_out = tf.layers.max_pooling1d( conv_out, pool_size=(int(max_pool_ksize[idd])), strides=1, padding='SAME', name="max_pool_"+str(idd))
pool_out = tf.nn.dropout(pool_out, self.cnn_dropout_rates[idd])
flatten_pred_out = tf.contrib.layers.flatten(pool_out)
self.logits = tf.layers.dense(flatten_pred_out, num_classes)
self.prediction = tf.nn.softmax(self.logits, name="prediction")
self.loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=Y, name="loss")
correct_pred = tf.equal(tf.argmax(self.prediction, 1), tf.argmax(Y, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name="accuracy")
我们在TF图中创建一个分开的容器以防混乱。 在创建了一个新的CNN模型以后我们能够训练它并且得到一个奖励。 正如定义的,奖励提高了所有测试数据集的准确性。对于MNIST来说有10000个例子。 准备好所有东西以后我们开始训练MNIST。首先我们将优化层数的体系结构。设置层数的最大数为2。我们也可以将值设的更大,但是值越大将需要更多的计算力。
def train(mnist, max_layers):
sess = tf.Session()
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.1
learning_rate = tf.train.exponential_decay(0.99, global_step,
500, 0.96, staircase=True)
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate)
reinforce = Reinforce(sess, optimizer, policy_network, args.max_layers, global_step)
net_manager = NetManager(num_input=784,
num_classes=10,
learning_rate=0.001,
mnist=mnist)
MAX_EPISODES = 250
step = 0
state = np.array( [[10.0, 128.0, 1.0, 1.0]*max_layers], dtype=np.float32)
pre_acc = 0.0
for i_episode in range(MAX_EPISODES):
action = reinforce.get_action(state)
print("current action:", action)
if all(ai > 0 for ai in action[0][0]):
reward, pre_acc = net_manager.get_reward(action, step, pre_acc)
else:
reward = -1.0
# In our sample action is equal state
state = action[0]
reinforce.store_rollout(state, reward)
step += 1
ls = reinforce.train_step(MAX_STEPS)
log_str = "current time: "+str(datetime.datetime.now().time())+" episode: "+str(i_episode)+" loss: "+str(ls)+" last_state: "+str(state)+" last_reward: "+str(reward)
print(log_str)
def main():
max_layers =2
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
train(mnist, max_layers)
if __name__ == '__main__':
main()
我们不确定应该向我们的策略网络提供什么。首先,我们总是尝试在每episode的RNN中输入1.0的数组,但是没有得到任何结果。然后,我们尝试在每episode中提供一个新状态,结果得到了一个很好的架构。我们得出结论,第一个状态可以是任何非零数组,为了加快找到合适的架构,我们设置了第一个状态:[[10.0,128.0,1.0,1.0]*args.max_layers] 每episode之后我们都会更新权重。否则,我们的计算就没有用了。这就是为什么我们的“批次大小”为加强= 1。经过100次循环,得到如下架构: input layer : 784 nodes (MNIST images size) first convolution layer : 61x24f irst max-pooling layer: 60 second convolution layer : 57x55 second max-pooling layer: 59 output layer : 10 nodes (number of class for MNIST)
测试结果
我们用在MNIST数据及中表现还不错的手工设计网络来对比我们的NASNet input layer : 784 nodes (MNIST images size) first convolution layer : 5x32f irst max-pooling layer: 2 second convolution layer : 5x64 second max-pooling layer: 2 output layer : 10 nodes (number of class for MNIST) All weights were initialized by the Xavier algorithm. 我们训练了10个epochs,得到NAS model的精确度是0.9987,手工的是0.963