思路:
首先选定一个IOU阈值,例如为0.4。然后将所有3个窗口(bounding box)按照得分由高到低排序。然后选中得分最高的窗口,遍历计算剩余的2个窗口与该窗口的 重叠面积比例(IOU),如果IOU大于阈值0.4,则将窗口删除。然后,再从剩余的窗口中选中一个得分最高的,重复上述过程。直至所有窗口都被处理
# python3
import numpy as np
def py_nms(dets, thresh):
"""Pure Python NMS baseline."""
#x1、y1、x2、y2、以及score赋值
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
#每一个候选框的面积
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
#order是按照score降序排序的
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
#计算当前概率最大矩形框与其他矩形框的相交框的坐标,会用到numpy的broadcast机制,得到的是向量
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
#计算相交框的面积,注意矩形框不相交时w或h算出来会是负数,用0代替
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
#计算重叠度IOU:重叠面积/(面积1+面积2-重叠面积)
ovr = inter / (areas[i] + areas[order[1:]] - inter)
#找到重叠度不高于阈值的矩形框索引
inds = np.where(ovr <= thresh)[0]
#将order序列更新,由于前面得到的矩形框索引要比矩形框在原order序列中的索引小1,所以要把这个1加回来
order = order[inds + 1]
return keep
# test
if __name__ == "__main__":
dets = np.array([[30, 20, 230, 200, 1],
[50, 50, 260, 220, 0.9],
[210, 30, 420, 5, 0.8],
[430, 280, 460, 360, 0.7]])
thresh = 0.35
keep_dets = py_nms(dets, thresh)
print(keep_dets)
print(dets[keep_dets])
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
##假设k取4
data = pd.read_table('C:/Users/red/Desktop/a1/testSet.txt', header=None, names=['x', 'y'])
##没有表头,read_table去Tab
x = data['x']
y = data['y']
plt.subplot(2,1,1)
plt.scatter(x, y)
def distance(data, centers):
# data: 80x2, centers: kx2
dist = np.zeros((data.shape[0], centers.shape[0])) ## 出来的是80*4,即每个点相对4个质心的距离
for i in range(len(data)):
for j in range(len(centers)):
dist[i, j] = np.sqrt(np.sum((data.iloc[i, :] - centers[j]) ** 2)) ##共80个样本,每个样本的在两个特征维度上,
# 分别对k个质心求距离然后求和,类似矩阵乘法,
# 所以距离矩阵为80x4
return dist
def near_center(data, centers): ##得到每个点离哪个质心最近,返回对应质心的label
dist = distance(data, centers)
near_cen = np.argmin(dist, 1) ##得到的dist行为80个点,列为每个点到4个质心的距离。然后取最小距离,得到对应质心的label。
return near_cen
def kmeans(data, k):
# step 1: init. centers
centers = np.random.choice(np.arange(-5, 5, 0.1), (k, 2)) ##随机产生质心
print(centers)
for _ in range(10): #做10次迭代
# step 2: 点归属
near_cen = near_center(data, centers)
# step 3:簇重心更新
for ci in range(k): ##每次点划分完之后,安照步骤,需要重新寻找各个簇的质心,即求平均
centers[ci] = data[near_cen == ci].mean()
return centers, near_cen
centers, near_cen = kmeans(data, 4)
print(near_cen)
plt.subplot(2,1,2)
plt.scatter(x, y, c=near_cen)
plt.scatter(centers[:, 0], centers[:, 1], marker='*', s=500, c='r')
plt.show()
线性回归
from numpy import *
import matplotlib.pyplot as plt
def loadDataSet(fileName):
numFeat = len(open(fileName).readline().split('\t')) - 1 #得到特征值的个数
dataMat = []; labelMat = []
fr = open(fileName) #打开文件
for line in fr.readlines(): #读取整行
lineArr =[]
curLine = line.strip().split('\t') #将一行的不同特征分开
for i in range(numFeat):
lineArr.append(float(curLine[i]))
dataMat.append(lineArr)
labelMat.append(float(curLine[-1]))
return dataMat,labelMat
def standRegres(xArr,yArr):
xMat = mat(xArr); yMat = mat(yArr).T
xTx = xMat.T*xMat
ws = xTx.I * (xMat.T*yMat) #求 w=(x.T*x).I*x.T*y
return ws
a,b=loadDataSet('ex0.txt')
ws=standRegres(a,b)
print ws
x=arange(0,1,0.01)
plt.plot([i[1] for i in a],b,'or')
plt.plot(x,float(ws[0])+float(ws[1])*x,'g')
plt.show()