基于深度学习方法的语音识别研究（二）

最新推荐文章于 2024-08-15 09:29:59 发布

Xwei1226

最新推荐文章于 2024-08-15 09:29:59 发布

阅读量1.8k

点赞数 1

分类专栏：语音识别-深度学习文章标签：语音识别声学模型

本文链接：https://blog.csdn.net/Xwei1226/article/details/80866314

版权

语音识别-深度学习专栏收录该内容

64 篇文章 37 订阅

订阅专栏

前一篇博客讲了关于如何利用DNN的方法进行语音声学模型的研究，今天这篇博客讲解关于如何利用LSTM的方法进行语音声学模型的研究，不多说，直接上代码，欢迎小伙伴留言，由于实验室的保密的原因，只提供关键部分的代码，欢迎各位大神批评指正。首先是单层LSTM：

# -*- coding : utf-8 -*-
#author : zhangwei

import tensorflow as tf
import numpy as np

n_inputs = 39
n_steps = 1
n_hidden_units = 256
n_classes = 219
n_epoches = 100
learning_rate = 1e-3
batch_size = 128
# n_all = batch_size * n_steps
a = np.array([0] , dtype=np.float32)

filename_01 = '/home/zhangwei/data/train_mfcc_800000.txt'
filename_02 = '/home/zhangwei/data/train_label_800000.txt'
filename_03 = '/home/zhangwei/data/test_mfcc.txt'
filename_04 = '/home/zhangwei/data/test_label.txt'
X_train = np.loadtxt(filename_01)
Y_train = np.loadtxt(filename_02)
X_test = np.loadtxt(filename_03)
Y_test = np.loadtxt(filename_04)

x = tf.placeholder(tf.float32 , [batch_size , n_steps , n_inputs])
y = tf.placeholder(tf.float32 , [batch_size , n_classes])

Weights = {'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
           'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))}
biases = {'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units])),
          'out': tf.Variable(tf.constant(0.1, shape=[n_classes]))}

def RNN(x , Weights , biases):
    x = tf.reshape(x , shape=[-1 , n_inputs])
    x_in = tf.add(tf.matmul(x , Weights['in']) , biases['in'])
    x_in_end = tf.reshape(x_in , [-1 , n_steps , n_hidden_units])
    lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=n_hidden_units , forget_bias=1.0 , state_is_tuple=True)
    init_state = lstm_cell.zero_state(batch_size , dtype=tf.float32)
    outputs , state = tf.nn.dynamic_rnn(lstm_cell , x_in_end , initial_state=init_state , time_major=False)
    results = tf.matmul(state[1] , Weights['out']) + biases['out']
    return results

prediction = RNN(x , Weights , biases)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction , labels=y))
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss_op)
correct_prediction = tf.equal(tf.argmax(prediction , 1) , tf.argmax(y , 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction , tf.float32))

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for i in range(n_epoches):
       
        print 'Iter : ', str(i) + ' ; loss : ' + str(loss) + ' ; trainging accuracy : ' + str(training_acc) + ' ; test accuracy : ' + str(test_acc)

下面是多层RNN的代码：

# -*- coding : utf-8 -*-
#author : zhangwei

import tensorflow as tf
import numpy as np

n_inputs = 39
n_steps = 1
n_hidden_units = 256
n_classes = 219
n_epoches = 100
learning_rate = 1e-3
batch_size = 128

filename_01 = '/home/zhangwei/data/train_mfcc_800000.txt'
filename_02 = '/home/zhangwei/data/train_label_800000.txt'
filename_03 = '/home/zhangwei/data/test_mfcc.txt'
filename_04 = '/home/zhangwei/data/test_label.txt'
X_train = np.loadtxt(filename_01)
Y_train = np.loadtxt(filename_02)
X_test = np.loadtxt(filename_03)
Y_test = np.loadtxt(filename_04)

with tf.name_scope('Input'):
    x = tf.placeholder(tf.float32 , [batch_size , n_steps , n_inputs])
    y = tf.placeholder(tf.float32 , [batch_size , n_classes])
    keep_prob = tf.placeholder(tf.float32)

with tf.name_scope('Layer1'):
    def get_cell():
        m_cell = tf.nn.rnn_cell.LSTMCell(num_units=n_hidden_units , activation=tf.nn.tanh)
        return tf.nn.rnn_cell.DropoutWrapper(cell=m_cell , input_keep_prob=1.0 , output_keep_prob=keep_prob)
    cell = tf.nn.rnn_cell.MultiRNNCell([get_cell() for i in range(2)])
    init_cell = cell.zero_state(batch_size=batch_size , dtype=tf.float32)
    output , _ = tf.nn.dynamic_rnn(cell=cell , inputs=x , initial_state=init_cell , dtype=tf.float32)

with tf.name_scope('Layer2'):
    W = tf.Variable(tf.truncated_normal([n_hidden_units , n_classes] , stddev=0.01))
    b = tf.Variable(tf.zeros(n_classes))
    output = tf.reshape(output , shape=[-1 , n_hidden_units])
    logist = tf.matmul(output , W) + b
    prediction = tf.nn.softmax(logits=logist)

with tf.name_scope('Loss'):
    loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction , labels=y))

with tf.name_scope('Train'):
    train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss_op)

with tf.name_scope('Accuracy'):
    correct_prediction = tf.equal(tf.argmax(prediction , 1) , tf.argmax(y , 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction , tf.float32))

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for i in range(n_epoches):
        
        print 'Iter : ', str(i) + ' ; loss : ' + str(loss) + ' ; trainging accuracy : ' + str(training_acc) + ' ; test accuracy : ' + str(test_acc)