前一篇博客讲了关于如何利用DNN的方法进行语音声学模型的研究,今天这篇博客讲解关于如何利用LSTM的方法进行语音声学模型的研究,不多说,直接上代码,欢迎小伙伴留言,由于实验室的保密的原因,只提供关键部分的代码,欢迎各位大神批评指正。首先是单层LSTM:
# -*- coding : utf-8 -*- #author : zhangwei import tensorflow as tf import numpy as np n_inputs = 39 n_steps = 1 n_hidden_units = 256 n_classes = 219 n_epoches = 100 learning_rate = 1e-3 batch_size = 128 # n_all = batch_size * n_steps a = np.array([0] , dtype=np.float32) filename_01 = '/home/zhangwei/data/train_mfcc_800000.txt' filename_02 = '/home/zhangwei/data/train_label_800000.txt' filename_03 = '/home/zhangwei/data/test_mfcc.txt' filename_04 = '/home/zhangwei/data/test_label.txt' X_train = np.loadtxt(filename_01) Y_train = np.loadtxt(filename_02) X_test = np.loadtxt(filename_03) Y_test = np.loadtxt(filename_04) x = tf.placeholder(tf.float32 , [batch_size , n_steps , n_inputs]) y = tf.placeholder(tf.float32 , [batch_size , n_classes]) Weights = {'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])), 'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))} biases = {'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units])), 'out': tf.Variable(tf.constant(0.1, shape=[n_classes]))} def RNN(x , Weights , biases): x = tf.reshape(x , shape=[-1 , n_inputs]) x_in = tf.add(tf.matmul(x , Weights['in']) , biases['in']) x_in_end = tf.reshape(x_in , [-1 , n_steps , n_hidden_units]) lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=n_hidden_units , forget_bias=1.0 , state_is_tuple=True) init_state = lstm_cell.zero_state(batch_size , dtype=tf.float32) outputs , state = tf.nn.dynamic_rnn(lstm_cell , x_in_end , initial_state=init_state , time_major=False) results = tf.matmul(state[1] , Weights['out']) + biases['out'] return results prediction = RNN(x , Weights , biases) loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction , labels=y)) train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss_op) correct_prediction = tf.equal(tf.argmax(prediction , 1) , tf.argmax(y , 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction , tf.float32)) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for i in range(n_epoches): print 'Iter : ', str(i) + ' ; loss : ' + str(loss) + ' ; trainging accuracy : ' + str(training_acc) + ' ; test accuracy : ' + str(test_acc)
下面是多层RNN的代码:
# -*- coding : utf-8 -*- #author : zhangwei import tensorflow as tf import numpy as np n_inputs = 39 n_steps = 1 n_hidden_units = 256 n_classes = 219 n_epoches = 100 learning_rate = 1e-3 batch_size = 128 filename_01 = '/home/zhangwei/data/train_mfcc_800000.txt' filename_02 = '/home/zhangwei/data/train_label_800000.txt' filename_03 = '/home/zhangwei/data/test_mfcc.txt' filename_04 = '/home/zhangwei/data/test_label.txt' X_train = np.loadtxt(filename_01) Y_train = np.loadtxt(filename_02) X_test = np.loadtxt(filename_03) Y_test = np.loadtxt(filename_04) with tf.name_scope('Input'): x = tf.placeholder(tf.float32 , [batch_size , n_steps , n_inputs]) y = tf.placeholder(tf.float32 , [batch_size , n_classes]) keep_prob = tf.placeholder(tf.float32) with tf.name_scope('Layer1'): def get_cell(): m_cell = tf.nn.rnn_cell.LSTMCell(num_units=n_hidden_units , activation=tf.nn.tanh) return tf.nn.rnn_cell.DropoutWrapper(cell=m_cell , input_keep_prob=1.0 , output_keep_prob=keep_prob) cell = tf.nn.rnn_cell.MultiRNNCell([get_cell() for i in range(2)]) init_cell = cell.zero_state(batch_size=batch_size , dtype=tf.float32) output , _ = tf.nn.dynamic_rnn(cell=cell , inputs=x , initial_state=init_cell , dtype=tf.float32) with tf.name_scope('Layer2'): W = tf.Variable(tf.truncated_normal([n_hidden_units , n_classes] , stddev=0.01)) b = tf.Variable(tf.zeros(n_classes)) output = tf.reshape(output , shape=[-1 , n_hidden_units]) logist = tf.matmul(output , W) + b prediction = tf.nn.softmax(logits=logist) with tf.name_scope('Loss'): loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction , labels=y)) with tf.name_scope('Train'): train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss_op) with tf.name_scope('Accuracy'): correct_prediction = tf.equal(tf.argmax(prediction , 1) , tf.argmax(y , 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction , tf.float32)) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for i in range(n_epoches): print 'Iter : ', str(i) + ' ; loss : ' + str(loss) + ' ; trainging accuracy : ' + str(training_acc) + ' ; test accuracy : ' + str(test_acc)
由于本人是DL新手,欢迎各位大佬批评指正!