'''生成数据,三列变量 参考给出的笔记思路'''
import numpy as np
from sklearn import preprocessing
def generate_data(data_size_1,data_size_2):
x1_1 = np.random.normal(loc=5.0, scale=1.0, size=data_size_1)
x2_1 = np.random.normal(loc=4.0, scale=1.0, size=data_size_1)
x3_1 = np.random.normal(loc=7.0, scale=1.0, size=data_size_1)
y_1 = [0 for _ in range(data_size_1)]
x1_2 = np.random.normal(loc=10.0, scale=2.0, size=data_size_2)
x2_2 = np.random.normal(loc=8.0, scale=2.0, size=data_size_2)
x3_2 = np.random.normal(loc=12.0, scale=2.0, size=data_size_2)
y_2 = [1 for _ in range(data_size_2)]
x1 = np.concatenate((x1_1, x1_2), axis=0)
x2 = np.concatenate((x2_1, x2_2), axis=0)
x3 = np.concatenate((x3_1, x3_2), axis=0)
x = np.hstack((x1.reshape(-1, 1), x2.reshape(-1, 1),x3.reshape(-1,1)))
y = np.concatenate((y_1, y_2), axis=0)
data_size_all = data_size_1 + data_size_2
shuffled_index = np.random.permutation(data_size_all)
x = x[shuffled_index]
y = y[shuffled_index]
return x, y
'''定义数据集分割函数'''
def train_test_split(x, y):
split_index = int(len(y)*0.6)
x_train = x[:split_index]
y_train = y[:split_index]
x_test = x[split_index:]
y_test = y[split_index:]
return x_train, y_train, x_test, y_test
'''标准化数据'''
def standard_data(data):
data_scaled = preprocessing.scale(data)
return data_scaled
'''预测函数'''
def predict(xdata,w,b):
y_predict = xdata.dot(w)+b#计算预测值
y_sigmoid = 1.0 / (1.0 + np.exp(-y_predict))#用sigmoid 函数处理 预测值
for j in range(len(y_sigmoid)):
if y_sigmoid[j] <=0.5:
y_predict[j] = 0
else:
y_predict[j] = 1
return y_predict
'''评分函数'''
def accu( y_test, y_predict):
accuvalue = np.mean([1 if y_test[i] == y_predict[i] else 0 for i in range(len(y_test))])
return accuvalue
'''逻辑回归函数'''
def logisfunction(learing_rate,max_ite,xdata,ylabel):
w = np.random.normal(loc=0.0, scale=1.0, size=xdata.shape[1])
b = np.random.normal(loc=0.0, scale=1.0)
y_predict = np.zeros(len(ylabel))#存放计算出的预测标签 0 或者 1
for i in range(max_ite):
y_middle = xdata.dot(w)+b#计算预测值
y_sigmoid = 1.0 / (1.0 + np.exp(-y_middle))#用sigmoid 函数处理 预测值
for j in range(len(y_sigmoid)):
if y_sigmoid[j] <=0.5:
y_predict[j] = 0
else:
y_predict[j] = 1
dw = (y_predict - ylabel).dot(xdata) / len(ylabel)#求导
db = np.mean(y_predict - ylabel)#求导
w = w - learing_rate*dw#更新参数
b = b - learing_rate*db#更新参数
return w,b
x, y = generate_data(500,500)
x_train, y_train, x_test, y_test = train_test_split(x, y)
x_train = standard_data(x_train)
y_train = standard_data(y_train)
x_test = standard_data(x_test)
y_test = standard_data(y_test)
w,b = logisfunction(0.1,500,x_train,y_train)
predict(x_test,w,b)
X为2维时,分类结果: