为了加深对机器学习算法的理解,以及熟悉python,pandas,scikit-learn。现在自己实现一下主要的机器学习算法,程序记录如下:
logistic回归算法的实现程序:
from numpy import *
from sklearn import preprocessing
from sklearn import cross_validation
def load_data_set():
data_set=[];label=[]
file = open('testSet.txt')
for line in file.readlines():
tmp = line.strip().split('\t')
tmp_list = [1.0,float(tmp[0]),float(tmp[1])]
data_set.append(tmp_list)
label.extend([int(tmp[2])])
return data_set,label
def batch_update_params(data_set,label):
fea_len = len(data_set[0])
y=mat(label)
w = ones((1,fea_len))
data_mat=mat(data_set)
max_iter = 500
alpha=0.001
for i in range(max_iter):
y_hat = sign(data_mat,w)
error = y-y_hat
w = w + alpha*error*data_mat
return w
def stog_update_params(data_set,label):
fea_len = len(data_set[0])
y=mat(label)
w = ones((1,fea_len))
data_mat=mat(data_set)
m,n = shape(data_mat)
max_iter = 200
alpha=0.001
for i in range(m):
curr_data = data_mat[i]
y_hat = sign(data_mat,w)
error = y-y_hat
w = w + alpha*error*curr_data
return w
def sign(data_mat,w):
return 1.0/(1+exp(-w*data_mat.transpose()))
注意点:需要利用numpy的mat函数将列表数据转换成矩阵,之后再进行运算。