手推公式,现场造轮子
kmeans
#coding=utf-8
def distance(pt1,pt2):
m=len(pt1)
ans=0
for i in range(m):
ans+=(pt1[i]-pt2[i])**2
return ans
# 构造样本
pts=[]
for i in range(10):
for j in range(10):
pts.append([i,j])
# 初始中心
centers=[[0,2],[3,3]]
k=len(centers)
m=2
# 迭代
print(centers)
for i in range(5):
# 给每个样本分配对应的标签
labels=[]
for pt in pts:
best_i=-1
best_dis=100000
for i, center in enumerate(centers):
dis=distance(pt,center)
if dis<best_dis:
best_i=i
best_dis=dis
labels.append(best_i)
# 重新计算中心点
cts=[[] for _ in range(k)]
centers=[[0]*m for _ in range(k)]
for pt,label in zip(pts, labels):
cts[label].append(pt)
for label in range(k):
n_samples=len(cts[label])
for dim in range(m):
sm=0
for sample in range(n_samples):
sm+=cts[label][sample][dim]
sm/=n_samples
centers[label][dim]=sm
print(centers)
print(labels)
logistic regression
主要背这几个代码
- 交叉熵
loss = -(y @ np.log(y_hat) + (1 - y) @ np.log(1 - y_hat)) / n
- y_hat 计算
就sigmoid