tensorflow ctc示例
#-*-coding:utf8-*-
'''
tensorflow1.4
'''
import tensorflow as tf
import numpy as np
import pdb
import random
def create_sparse(batch_size, dtype=np.int32):
'''
创建稀疏张量,ctc_loss中labels要求是稀疏张量,随机生成序列长度在150~180之间的labels
'''
indices = []
values = []
for i in range(batch_size):
length = random.randint(150,180)####如果length=180,则可能出现no valid path found
for j in range(length):
indices.append((i,j))
value = random.randint(0,779)
values.append(value)
indices = np.asarray(indices, dtype=np.int64)
values = np.asarray(values, dtype=dtype)
shape = np.asarray([batch_size, np.asarray(indices).max(0)[1] + 1], dtype=np.int64) #[64,180]
return [indices, values, shape]
if __name__ == '__main__':
# num_hidden=500,num_classes=789(想象成780个汉字+blank),shape (500,781)
W = tf.Variable(tf.truncated_normal([500,781],stddev=0.1), name="W")
b = tf.Variable(tf.constant(0., shape=[781]), name="b")#781
global_step = tf.Variable(0, trainable=False)#全局步骤计数
#构造输入
#为了测试,随机batch_size=64张图片,h=60,w=400,w可以看成lstm的时间步,即lstm输入的time_step=400,h看成是每一时间步的输入tensor的size
inputs = tf.random_normal(shape=[64,400,60], dtype=tf.float32)
# array([ 64, 1000, 60], dtype=int32)
shape = tf.shape(inputs)
#batch_s, max_timesteps = shape[0], shape[1] #64,400
output = create_sparse(64)#创建64张图片对应的labels,稀疏张量,序列长度变长
seq_len = np.ones(64)*180#180为变长序列的最大值
labels = tf.SparseTensor(values=output[1],indices=output[0],dense_shape=output[2])
#pdb.set_trace()
cell = tf.nn.rnn_cell.LSTMCell(500, state_is_tuple=True)
#为默认的tf.nn.dynamic_rnn中参数time_major=false,即inputs的shape 是`[batch_size, max_time, ...]`,
outputs1, _ = tf.nn.dynamic_rnn(cell, inputs, seq_len, dtype=tf.float32,time_major=False)#(64, 1000, 1000)
outputs = tf.reshape(outputs1, [-1, 500])#(64×400,500)
print(outputs.shape)
logits0 = tf.matmul(outputs, W) + b
logits1 = tf.reshape(logits0, [64, -1, 781])
logits = tf.transpose(logits1, (1, 0, 2))#(500, 64, 781)
loss = tf.nn.ctc_loss(labels,logits,seq_len)
#pdb.set_trace()
cost = tf.reduce_mean(loss)
optimizer = tf.train.MomentumOptimizer(learning_rate=0.01,
momentum=0.9).minimize(cost, global_step=global_step)
#decoded, log_prob = tf.nn.ctc_beam_search_decoder(logits, seq_len, merge_repeated=False)#or "tf.nn.ctc_greedy_decoder"一种解码策略
#acc = tf.reduce_mean(tf.edit_distance(tf.cast(decoded[0], tf.int32), labels))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print (sess.run(loss))
修改参数导致维度不匹配,容易出现Loss Error: invalidArgumentError: Not Enough time for target transition sequence或者Loss Error: InvalidArgumentError: sequence_length(b) <= time,根据此程序可以模拟出上述错误提示
输出结果:
(25600, 500)
2019-03-15 15:52:08.541202: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
[1181.6133 1120.6461 1123.5836 1158.2643 1203.3185 1174.5375 1140.1708
1149.3875 1148.829 1107.2148 1164.7463 1140.0444 1169.8032 1117.4838
1145.2615 1115.3611 1197.3125 1106.878 1109.8082 1174.4955 1155.2653
1170.8049 1106.192 1119.4747 1142.7593 1111.7495 1203.1704 1143.159
1147.6185 1110.9576 1170.2196 1098.4486 1166.7227 1124.9521 1189.5774
1140.4617 1103.0671 1150.5786 1121.7013 1143.7145 1131.8195 1122.3063
1111.8436 1114.2665 1152.203 1115.6321 1154.8701 1128.693 1109.0947
1148.9967 1113.7224 1127.9088 1151.6262 1161.533 1130.3254 1159.5051
1141.0709 1121.4249 1145.6445 1165.9967 1144.8002 1119.772 1183.2587
1121.2657]