# Notes

mAP 是多模态检索常用指标，也有用 $mAP@R$ 的。这里备份一份计算 mAP 或 mAP@R 的 python 代码，主要抄自cvpr 2017 DCMH作者释出的代码

# Code

## fast version

• 原来那份代码的calc_mAP有些可以预处理的地方，改掉后可以提速
def calc_mAP(qF, rF, qL, rL, what=0, k=-1):
"""calculate mAP
Args:
qF: query feature/hash matrix
rF: retrieval feature/hash matrix
qL: query label matrix
rL: retrieval label matrix
what: {0: feature, 1: hash code}
"""
n_query = qF.shape[0]
if k == -1 or k > rF.shape[0]:
k = rF.shape[0]
# 预处理 ground-truth
Gnd = (np.dot(qL, rL.transpose()) > 0).astype(np.float32)
# 一次性计算所有 query sample 的检索结果
# 并顺便排好序
if what == 0:
Rank = np.argsort(cos_dis(qF,rF))
else:
Rank = np.argsort(hamming_dis(qF,rF))
AP = 0.0

for it in range(n_query):
gnd = Gnd[it]
if np.sum(gnd) == 0:
continue
rank = Rank[it][:k]
gnd = gnd[rank]
if np.sum(gnd) == 0:
continue
pos = np.asarray(np.where(gnd == 1.)) + 1.0
rel_cnt = np.arange(pos.shape[-1]) + 1.0
AP += np.mean(rel_cnt / pos)

mAP = AP / n_query
return mAP


## slow version

import numpy as np
from sklearn.preprocessing import normalize

# cos 相似度
def cos_sim(f1, f2):
"""cosine similarity"""
f1 = normalize(f1, norm='l2', axis=1)
f2 = normalize(f2, norm='l2', axis=1)
sim = np.dot(f1, f2.T)

# return sim
return 0.5 + 0.5 * sim

# cos 距离
def cos_dis(f1, f2):
"""cosine distance = 1. - cosine similarity"""
return 1. - cos_sim(f1, f2)

# hamming 距离
def hamming_dis(B1, B2):
"""Hamming distance"""
q = B2.shape[1]
distH = 0.5 * (q - np.dot(B1, B2.transpose()))
return distH

# mAP(@k)
def calc_mAP(qF, rF, qL, rL, what=0, k=-1):
"""calculate mAP
Args:
qF: query feature/hash matrix
rF: retrieval feature/hash matrix
qL: query label matrix
rL: retrieval label matrix
what: {0: feature, 1: hash code}
k: mAP@k, default -1  means mAP@ALL
"""
n_query = qF.shape[0]
if k == -1 or k > rF.shape[0]:  #  默认 mAP@all
k = rF.shape[0]
AP = 0.0

for it in range(n_query):
# ground-truth: 1 vs all
gnd = (np.dot(qL[it, :], rL.transpose()) > 0).astype(np.float32)
if np.sum(gnd) == 0:
continue
if what == 0:  # 连续向量用 cos 距离
dis = cos_dis(np.expand_dims(
qF[it], axis=0), rF).reshape(-1)  # 1 vs all
else:  # hash code 用 hamming 距离
dis = hamming_dis(qF[it, :], rF)

rank = np.argsort(dis)[:k]  # 按距离升序排位，截取前 k 个

gnd = gnd[rank]
if np.sum(gnd) == 0:  # 避免除 0
continue
pos = np.asarray(np.where(gnd == 1.)) + 1.0  # 出现相关样本的位置
rel_cnt = np.arange(pos.shape[-1]) + 1.0  # rel_cnt[k]：前 k 个结果中相关样本总数
AP += np.mean(rel_cnt / pos)

mAP = AP / n_query
return mAP


# Sample

• 其作者释出的代码中自带一份样例，可同他的代码对拍检验mAP@all有没有写错，详见引用[4]
qB = np.array([[1, -1, 1, 1],
[-1, -1, -1, 1],
[1, 1, -1, 1],
[1, 1, 1, -1]])
rB = np.array([[1, -1, 1, -1],
[-1, -1, 1, -1],
[-1, -1, 1, -1],
[1, 1, -1, -1],
[-1, 1, -1, -1],
[1, 1, -1, 1]])
query_L = np.array([[0, 1, 0, 0],
[1, 1, 0, 0],
[1, 0, 0, 1],
[0, 1, 0, 1]])
retrieval_L = np.array([[1, 0, 0, 1],
[1, 1, 0, 0],
[0, 1, 1, 0],
[0, 0, 1, 0],
[1, 0, 0, 0],
[0, 0, 1, 0]])