import numpy as np
from matplotlib import pyplot as plt
def load_dataset():
data_list = []
label_list = []
with open('testSet.txt', 'r') as f:
for line in f.readlines():
line_arr = line.strip().split()
data_list.append([1.0, float(line_arr[0]), float(line_arr[1])])
label_list.append(int(line_arr[2]))
return data_list, label_list
def sigmoid(x):
return 1.0 / (1.0 + np.exp(-x))
def grad_ascent(data_list, label_list):
alpha = 0.001
data_mat = np.matrix(data_list)
label_mat = np.matrix(label_list).transpose()
m, n = data_mat.shape
max_cycles = 500
weight = np.ones((n, 1))
for i in range(max_cycles):
h = sigmoid(data_mat * weight)
error = label_mat - h
weight = weight + alpha * data_mat.transpose() * error
return weight
def plot_best_fit(weight):
data_list, label_list = load_dataset()
data_arr = np.array(data_list)
n = data_arr.shape[0]
x_cord1 = []
y_cord1 = []
x_cord2 = []
y_cord2 = []
for i in range(n):
if label_list[i] == 1:
x_cord1.append(data_arr[i, 1])
y_cord1.append(data_arr[i, 2])
else:
x_cord2.append(data_arr[i, 1])
y_cord2.append(data_arr[i, 2])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(x_cord1, y_cord1, s=10, c='red', marker='s')
ax.scatter(x_cord2, y_cord2, s=10, c='green')
x = np.arange(-3.0, 3.0, 0.1)
y = (-weight[0, 0] - weight[1, 0] * x) / weight[2, 0]
ax.plot(x, y)
plt.xlabel('X1')
plt.ylabel('X2')
plt.show()
def random_grad_ascent(data_list, label_list):
data_mat = np.matrix(data_list)
label_mat = np.matrix(label_list).transpose()
m, n = data_mat.shape
alpha = 0.01
weight = np.ones((n, 1))
for i in range(m):
h = sigmoid(data_mat * weight)
error = label_mat - h
weight = weight + alpha * data_mat.transpose() * error
return weight
def random_grad_ascent1(data_list, label_list, num=150):
data_mat = np.matrix(data_list)
label_mat = np.matrix(label_list).transpose()
m, n = data_mat.shape
weight = np.ones((n, 1))
for i in range(num):
data_index = range(m)
for j in range(m):
alpha = 4 / (1.0 + i + j) + 0.01
rand_index = int(np.random.uniform(0, len(data_index)))
h = sigmoid(data_mat[data_index[rand_index]] * weight)
error = label_mat[rand_index] - h
weight = weight + alpha * data_mat[data_index[rand_index]].transpose() * error
# del data_index[rand_index]
return weight
def classify_vector(x, weight):
prob = sigmoid(sum(x * weight))
return 1.0 if prob > 0.5 else 0.0
def colic_test():
with open('horseColicTraining.txt', 'r') as f:
train_set = []
train_label = []
for line in f.readlines():
line_arr1 = line.strip().split("\t")
line_arr2 = [float(_) for _ in line_arr1[:21]]
train_set.append(line_arr2)
train_label.append(float(line_arr1[21]))
train_weight = random_grad_ascent1(data_list=train_set, label_list=train_label)
error_count = 0
num_test_vec = 0.0
with open('horseColicTest.txt', 'r') as f:
for line in f.readlines():
num_test_vec += 1.0
line_arr1 = line.strip().split("\t")
line_arr2 = [float(_) for _ in line_arr1[:21]]
if int(classify_vector(np.array(line_arr2), train_weight)) != int(line_arr1[21]):
error_count += 1
error_rate = error_count / num_test_vec
return error_rate
def multi_test():
num_test = 10
error_sum = 0.0
for i in range(num_test):
error_sum += colic_test()
print(f"num_test={num_test}, error_sum={error_sum},error_rate={error_sum / num_test}")
if __name__ == '__main__':
data_list, label_list = load_dataset()
weight1 = grad_ascent(data_list, label_list)
plot_best_fit(weight1)
weight2 = random_grad_ascent(data_list, label_list)
plot_best_fit(weight2)
weight3 = random_grad_ascent1(data_list, label_list, num=150)
plot_best_fit(weight3)
multi_test()
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
- 17.
- 18.
- 19.
- 20.
- 21.
- 22.
- 23.
- 24.
- 25.
- 26.
- 27.
- 28.
- 29.
- 30.
- 31.
- 32.
- 33.
- 34.
- 35.
- 36.
- 37.
- 38.
- 39.
- 40.
- 41.
- 42.
- 43.
- 44.
- 45.
- 46.
- 47.
- 48.
- 49.
- 50.
- 51.
- 52.
- 53.
- 54.
- 55.
- 56.
- 57.
- 58.
- 59.
- 60.
- 61.
- 62.
- 63.
- 64.
- 65.
- 66.
- 67.
- 68.
- 69.
- 70.
- 71.
- 72.
- 73.
- 74.
- 75.
- 76.
- 77.
- 78.
- 79.
- 80.
- 81.
- 82.
- 83.
- 84.
- 85.
- 86.
- 87.
- 88.
- 89.
- 90.
- 91.
- 92.
- 93.
- 94.
- 95.
- 96.
- 97.
- 98.
- 99.
- 100.
- 101.
- 102.
- 103.
- 104.
- 105.
- 106.
- 107.
- 108.
- 109.
- 110.
- 111.
- 112.
- 113.
- 114.
- 115.
- 116.
- 117.
- 118.
- 119.
- 120.
- 121.
- 122.
- 123.
- 124.
- 125.
- 126.
- 127.
- 128.
- 129.
- 130.
- 131.
- 132.
- 133.
- 134.
- 135.
其他logistic示例或者基于主流机器学习框架实现的logistic代码地址: