# 【续】–Tensorflow踩坑记之tf.metrics

## 精确率的计算公式

$Precision=\frac{truePositive}{truePositive+falsePositive}$

## 让我们先造点数据，传统算算看

import tensorflow as tf
import numpy as np

labels = np.array([[1,1,1,0],
[1,1,1,0],
[1,1,1,0],
[1,1,1,0]], dtype=np.uint8)

predictions = np.array([[1,0,0,0],
[1,1,0,0],
[1,1,1,0],
[0,1,1,1]], dtype=np.uint8)

n_batches = len(labels)
# First,calculate precision over entire set of batches
# using formula mentioned above
pred_p = (predictions > 0).sum()
# print(pred_p)
true_p = (labels*predictions > 0).sum()
# print(true_p)
precision = true_p / pred_p
print("Precision :%1.4f" %(precision))


## 上述方法的问题

• 正确预测的正样本数量
• 预测样本中所有正样本的数量

## 所以我们要这么做

# Initialize running variables
N_TRUE_P = 0
N_PRED_P = 0

# Specific steps
# Create running variables
N_TRUE_P = 0
N_PRED_P = 0

def reset_running_variables():
""" Resets the previous values of running variables to zero """
global N_TRUE_P, N_PRED_P
N_TRUE_P = 0
c = 0

def update_running_variables(labs, preds):
global N_TRUE_P, N_PRED_P
N_TRUE_P += ((labs * preds) > 0).sum()
N_PRED_P += (preds > 0).sum()

def calculate_precision():
global N_TRUE_P, N_PRED_P
return float (N_TRUE_P) / N_PRED_P

## 怎么用上面的函数呢？

### 样本整体准确率(直接计算)

# Overall precision
reset_running_variables()

for i in range(n_batches):
update_running_variables(labs=labels[i], preds=predictions[i])

precision = calculate_precision()
print("[NP] SCORE: %1.4f" %precision)

### 批次准确率(直接计算)

# Batch precision
for i in range(n_batches):
reset_running_variables()
update_running_variables(labs=labels[i], preds=predictions[i])
prec = calculate_precision()
print("- [NP] batch %d score: %1.4f" %(i, prec))
[NP] batch 0 score: 1.0000
[NP] batch 1 score: 1.0000
[NP] batch 2 score: 1.0000
[NP] batch 3 score: 0.6667

## 不要小瞧这两个变量和三个函数

### 放一个官方的解释

The precision function creates two local variables,
true_positives and false_positives, that are used to compute the precision. This value is ultimately returned as precision, an idempotent operation that simply divides true_positives by the sum of true_positives and false_positives.
For estimation of the metric over a stream of data, the function creates an update_op operation that updates these variables and returns the precision.

### 两个变量和 tf.metrics.precision()的关系

• true_postives – N_TRUE_P
• false_postives – N_PRED_P - N_TRUE_P

### 三个函数和头大的update_op

• precision–calculate_precision()
• update_op–update_running_variables()

### Overall precision using tensorflow

# Overall precision using tensorflow
import tensorflow as tf

graph = tf.Graph()
with graph.as_default():
# Placeholders to take in batches onf data
tf_label = tf.placeholder(dtype=tf.int32, shape=[None])
tf_prediction = tf.placeholder(dtype=tf.int32, shape=[None])

# Define the metric and update operations
tf_metric, tf_metric_update = tf.metrics.precision(tf_label,
tf_prediction,
name="my_metric")

# Isolate the variables stored behind the scenes by the metric operation
running_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="my_metric")

# Define initializer to initialize/reset running variables
running_vars_initializer = tf.variables_initializer(var_list=running_vars)

with tf.Session(graph=graph) as session:
session.run(tf.global_variables_initializer())

# initialize/reset the running variables
session.run(running_vars_initializer)

for i in range(n_batches):
# Update the running variables on new batch of samples
feed_dict={tf_label: labels[i], tf_prediction: predictions[i]}
session.run(tf_metric_update, feed_dict=feed_dict)

# Calculate the score
score = session.run(tf_metric)
print("[TF] SCORE: %1.4f" %score)

[TF] SCORE: 0.8889

### Batch precision using tensorflow

# Batch precision using tensorflow
with tf.Session(graph=graph) as session:
session.run(tf.global_variables_initializer())

for i in range(n_batches):
# Reset the running variables
session.run(running_vars_initializer)

# Update the running variables on new batch of samples
feed_dict={tf_label: labels[i], tf_prediction: predictions[i]}
session.run(tf_metric_update, feed_dict=feed_dict)

# Calculate the score on this batch
score = session.run(tf_metric)
print("[TF] batch %d score: %1.4f" %(i, score))

[TF] batch 0 score: 1.0000
[TF] batch 1 score: 1.0000
[TF] batch 2 score: 1.0000
[TF] batch 3 score: 0.6667

## 再次划重点

session.run(running_vars_initializer)

score = session.run(tf_metric)

## 还需要注意的点

_ , score = session.run([tf_metric_update,tf_metric],\
feed_dict=feed_dict)

## update_op究竟返回了什么捏

stackoverflow的一个回答

rel = tf.placeholder(tf.int64, [1,3])
rec = tf.constant([[7, 5, 10, 6, 3, 1, 8, 12, 31, 88]], tf.int64)
precision, update_op = tf.metrics.precision_at_k(rel, rec, 10)

sess = tf.Session()
sess.run(tf.local_variables_initializer())

stream_vars = [i for i in tf.local_variables()]
#Get the local variables true_positive and false_positive

print("[PRECSION_1]: ",sess.run(precision, {rel:[[1,5,10]]})) # nan
#tf.metrics.precision maintains two variables true_positives
#and  false_positives, each starts at zero.
#so the output at this step is 'nan'

print("[UPDATE_OP_1]:",sess.run(update_op, {rel:[[1,5,10]]})) #0.2
#when the update_op is called, it updates true_positives
#and false_positives using labels and predictions.

print("[STREAM_VARS_1]:",sess.run(stream_vars)) #[2.0, 8.0]
# Get true positive rate and false positive rate

print("[PRECISION_1]:",sess.run(precision,{rel:[[1,10,15]]})) # 0.2
#So calling precision will use true_positives and false_positives and outputs 0.2

print("[UPDATE_OP_2]:",sess.run(update_op,{rel:[[1,10,15]]})) #0.15
#the update_op updates the values to the new calculated value 0.15.

print("[STREAM_VARS_2]:",sess.run(stream_vars)) #[3.0, 17.0]

[PRECSION_1]: nan
[STREAM_VARS_1]: [0.0, 0.0, 0.0, 0.0, 2.0, 8.0]
[STREAM_VARS_2]: [0.0, 0.0, 0.0, 0.0, 3.0, 17.0]

## tf.metrics.precision_at_k

tf.metrics.precision_at_k(
labels,
predictions,
k,
class_id=None,
weights=None,
metrics_collections=None,
name=None
)

labels,predictions,k的输入形式是什么样的呢？

Precision@k = (Recommended items @k that are relevant) / (# Recommended items @k)

import tensorflow as tf
import numpy as np

y_true = np.array([[2], [1], [0], [3], [0]]).astype(np.int64)
y_true = tf.identity(y_true)

y_pred = np.array([[0.1, 0.2, 0.6, 0.1],
[0.8, 0.05, 0.1, 0.05],
[0.3, 0.4, 0.1, 0.2],
[0.6, 0.25, 0.1, 0.05],
[0.1, 0.2, 0.6, 0.1]
]).astype(np.float32)
y_pred = tf.identity(y_pred)

_, m_ap = tf.metrics.sparse_average_precision_at_k(y_true, y_pred, 3)

sess = tf.Session()
sess.run(tf.local_variables_initializer())

stream_vars = [i for i in tf.local_variables()]

tf_map = sess.run(m_ap)
print("TF_MAP",tf_map)

print("STREAM_VARS",(sess.run(stream_vars)))

tmp_rank = tf.nn.top_k(y_pred,3)

print("TMP_RANK",sess.run(tmp_rank))

## 简单解释一下

• 首先y_true代表标签值（未经过one-hot），shape：(batch_size, num_labels) ,y_pred代表预测值（logit值） ，shape：(batch_size, num_classes)

• 其次，要注意的是tf.metrics.sparse_average_precision_at_k中会采用top_k根据不同的k值y_pred进行排序操作 ，所以tmp_rank是为了帮助大噶理解究竟y_pred在函数中进行了怎样的转换。

• 然后，stream_vars = [i for i in tf.local_variables()]这一行是为了帮助大噶理解 tf.metrics.sparse_average_precision_at_k创建的tf.local_varibles 实际输出值，进而可以更好地理解这个函数的用法。

• 具体看这个例子，当k=1时，只有第一个batch的预测输出是和标签匹配的 ，所以最终输出为：1/6 = 0.166666 ；当k=2时，除了第一个batch的预测输出，第三个batch的预测输出也是和标签匹配的，所以最终输出为：(1+(1/2))/6 = 0.25。

P.S:在以后的tf版本里，将tf.metrics.average_precision_at_k替代tf.metrics.sparse_average_precision_at_k