3逻辑回归二分类
题目:根据学生的俩门学习成绩,预测该学生能否会被大学录取
数据集:ex2data1.txt
- 数据可视化
# 读取文件
data = pd.read_csv("ex2data1.txt", sep=",", names=["exam1", "exam2", "accepted"])
print(data.head())
# 数据可视化
fig, ax = plt.subplots()
ax.scatter(data[data["accepted"] == 0]["exam1"], data[data["accepted"] == 0]["exam2"], c= "r", marker="x", label="y=0")
ax.scatter(data[data["accepted"] == 1]["exam1"], data[data["accepted"] == 1]["exam2"], c= "b", marker="o", label="y=1")
ax.legend()
ax.set(xlabel="exam1", ylabel= "exam2")
plt.show()
- 数据集拆分(同以前,封装成一个函数)
def get_xy(data):
data.insert(0, "ones", 1)
X_ = data.iloc[:, 0:-1]
X = X_.values
y_ = data.iloc[:, -1]
y = y_.values.reshape(len(y_), 1)
return X, y
X, y = get_xy(data)
print(X.shape) # (100,3)
print(y.shape) # (100, 1)
- sigmoid函数
def sigmoid(z):
return 1 / (1 + np.exp(-z))
4 cost function
def cost_function(X, y, theta):
A = sigmoid(X @ theta)
first = y * np.log(A)
second = (1-y) * np.log(1 - A)
return -np.sum(first + second) / len(X)
theta = np.zeros((3, 1))
cost_init = cost_function(X, y, theta)
print(cost_init) # 0.693
- 梯度下降
def gradient_descent(X, y, theta, alpha, iters):
costs = []
for i in range(iters):
A = sigmoid(X @ theta)
theta = theta - (alpha / len(X)) * X.T @ (A - y)
cost = cost_function(X, y, theta)
costs.append(cost)
# 抽取几个cost查看,避免太多
if i % 10000 == 0:
print(costs)
return theta, costs
alpha = 0.004
iters = 200000
theta_final, costs = gradient_descent(X, y, theta, alpha, iters)
- 判断正确率
def predict(X, theta):
prob = sigmoid(X @ theta)
return [1 if x >= 0.5 else 0 for x in prob]
y_ = np.array(predict(X,theta_final))
y_pre = y_.reshape(len(y_), 1)
acc = np.mean(y_pre == y)
print(acc)
正确率达到了0.91
- 绘制决策边界
coef1 = -theta_final[0, 0] / theta_final[2, 0]
coef2 = -theta_final[1, 0] / theta_final[2, 0]
x = np.linspace(20, 100, 100) # 20到100之间均匀生成100个点
f = coef1 + coef2 * x
fig, ax = plt.subplots()
ax.scatter(data[data["accepted"] == 0]["exam1"], data[data["accepted"] == 0]["exam2"], c= "r", marker="x", label="y=0")
ax.scatter(data[data["accepted"] == 1]["exam1"], data[data["accepted"] == 1]["exam2"], c= "b", marker="o", label="y=1")
ax.legend()
ax.set(xlabel="exam1", ylabel= "exam2")
ax.plot(x, f, c="g")
plt.show()