1.7考试,愁啊,看一些基础算法将就着过吧,而且跨年总不能看着基础物理过不是,那不是个新年的好兆头。日更,直到更完逻辑回归,softmax回归,支持向量机,随机森林,线性回归(包括牛顿法),K-means算法(包括K-means++),基于图的推荐算法,最后回顾一下之前写过的PCA-LDA,以及做游戏决策用到的基于矩阵分解的推荐算法以及libFM,算是给2018画上一个圆满的句号吧。
==================================1.1=====================================
一、逻辑回归
import numpy as np
from data import load_train_data,save_model
from display import plot_xy
def sigmoid(x):
return 1/(1+np.exp(-x))
def train(x,label,steps,a):
feature_n = np.shape(x)[1]
#print(feature_n)
w = np.mat(np.ones((feature_n,1)))
for i in range(steps):
y = sigmoid(x*w)
#print(np.shape(label)[0])
#print(np.shape(y)[0])
error = label-y
if i%50==0:
print("step"+str(i)+", train error rate = "+str(loss(y,label)))
w = w+a*x.T*error
return w
def loss(y,label):
x_n = np.shape(y)[0]
cost = 0
for k in range(x_n):
if y[k,0]>0 and (1-y[k,0])>0:
cost -= label[k,0]*np.log(y[k,0]) + (1-label[k,0])*np.log(1-y[k,0])
return cost/x_n
if __name__ == '__main__':
print("load train data")
x, label = load_train_data("train_data.txt")
w = train(x, label, 1000, 0.01)
#save_model("weight",w)
plot_xy(x.getA(),w,label.getA().ravel())
训练结果:
分类图如下:
二、Softmax回归
import numpy as np
from data import load_train_data,save_model
from display import plot_xy
def train(x,label,n_classes,steps,a):
x_n,dim = np.shape(x)
w = np.mat(np.ones((dim,n_classes)))
for i in range(steps):
y = np.exp(x*w)
if i % 50==0:
print("step"+str(i)+", train error rate = "+str(loss(y,label)))
neg_e_sum = -y.sum(axis=1)
neg_e_sum = neg_e_sum.repeat(n_classes,axis=1)
neg_p = y / neg_e_sum
for k in range(x_n):
neg_p[k,label[k,0]] += 1
w = w + (a/x_n)*x.T*neg_p
return w
def loss(y,label):
#print(np.shape(y))
#print(label)
x_n = np.shape(y)[0]
cost = 0.0
for j in range(x_n):
if y[j,label[j,0]]/np.sum(y[j,:])>0:
cost -= np.log(y[j,label[j,0]]/np.sum(y[j,:]))
return cost/x_n
if __name__ == '__main__':
print("load train data")
x, label,k = load_train_data("train_data2.txt")
#print(k)
print("training")
w = train(x, label, k, 6000, 0.2)
#print("save model")
#save_model("weight2",w)
print("plot")
plot_xy(x.getA(),w,label.getA().ravel())
结果如下:
===============================1.2===================================
三、支持向量机SVM
import numpy as np
from data import load_train_data
#from display import plot_xy
def per_kernel_value(x,x_i,fun):
x_n = np.shape(x)[0]
kernel_value = np.mat(np.zeros((x_n,1)))
if fun[0]=='rbf':
sigma = f