tf.nn.embedding_lookup:
tf.nn.embedding_lookup()就是根据input_ids中的id,寻找embeddings中的第id行。比如input_ids=[1,3,5],则找出embeddings中第1,3,5行,组成一个tensor返回。
num_steps=500
batch_size=1024
k=25
num_classes=10
num_features=784
X=tf.placeholder(tf.float32, shape=[None,num_features])
Y=tf.placeholder(tf.float32, shape=[None,num_classes])
kmeans=KMeans(inputs=X, num_clusters=k, distance_metric='cosine',use_mini_batch=True)
training_graph=kmeans.training_graph()
if len(training_graph)>6:
(all_scores, cluster_idx, scores, cluster_centers_initialized,cluster_centers_var,init_op,train_op)=training_graph
else:
(all_scores, cluster_idx, scores, cluster_centers_initialized,init_op,train_op)=training_graph
cluster_idx=cluster_idx[0]
avg_distance=tf.reduce_mean(scores)
init_vars=tf.global_variables_initializer()
sess=tf.Session()
sess.run(init_vars,feed_dict={X:full_data_x})
sess.run(init_op,feed_dict={X:full_data_x})
for i in range(1,num_steps+1):
_,d,idx=sess.run([train_op,avg_distance,cluster_idx],feed_dict={X:full_data_x})
if i%10 ==0 or i==1:
print('Step %i, Avg Distance %f' %(i,d))
counts=np.zeros(shape=(k,num_classes))
for i in range(len(idx)):
counts[idx[i]]+=mnist.train.labels[i]
labels_map = [np.argmax(c) for c in counts]
labels_map = tf.convert_to_tensor(labels_map)
cluster_label = tf.nn.embedding_lookup(labels_map, cluster_idx)
correct_pre=tf.equal(cluster_label,tf.cast(tf.argmax(Y,1),tf.int32))
accuracy_op=tf.reduce_mean(tf.cast(correct_pre,tf.float32))
test_x,test_y=mnist.test.images,mnist.test.labels
print('Test Accuracy:',sess.run(accuracy_op,feed_dict={X:test_x, Y:test_y}))