4逻辑回归线性不可分
题目:设想你是工厂的生产主管,你有一些芯片在两次测试中的测试结果。对于这两次测试,你想决定是否芯片要被接受或抛弃
数据集:ex2data2.txt
- 数据可视化
# 读取文件
data = pd.read_csv("ex2data2.txt", sep=",", names=["test1", "test2", "accepted"])
print(data.head())
# 数据可视化
fig, ax = plt.subplots()
ax.scatter(data[data["accepted"] == 0]["test1"], data[data["accepted"] == 0]["test2"], c= "r", marker="x", label="y=0")
ax.scatter(data[data["accepted"] == 1]["test1"], data[data["accepted"] == 1]["test2"], c= "b", marker="o", label="y=1")
ax.legend()
ax.set(xlabel="test1", ylabel= "test2")
plt.show()
- 特征映射
如果样本量多,逻辑回归问题很复杂,而原始特征只有x1,x2可以用多项式创建更多的特征x1、x2、x1x2、x12、x22、… X1nX2n。因为更多的特征进行逻辑回归时,得到的分割线可以是任意高阶函数的形状。
def feature_mapping(x1, x2, power):
data = {}
for i in np.arange(power + 1):
for j in np.arange(i + 1):
data["F{}{}".format(i-j, j)] = np.power(x1, i-j) * np.power(x2, j)
return pd.DataFrame(data) # 转成dataframe结构
x1 = data["test1"]
x2 = data["test2"]
data2 = feature_mapping(x1, x2, 6)
print(data2.head())
- 正则化损失函数
不惩罚第一项,j从1开始
X = data2.values # (118,28)
y = data.iloc[:, -1].values
y = y.reshape(len(y), 1) # (118,1)
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def cost_function(X, y, theta, lamda):
A = sigmoid(X @ theta)
first = y * np.log(A)
second = (1-y) * np.log(1 - A)
reg = np.sum(np.power(theta[1:], 2)) * (lamda / (2 * len(X)))
return -np.sum(first + second) / len(X) + reg
theta = np.zeros((28, 1))
lamda = 1
cost_init = cost_function(X, y, theta, lamda)
print(cost_init) # 0.693
- 梯度下降函数
def gradient_descent(X, y, theta, alpha, iters, lamda):
costs = []
for i in range(iters):
reg = theta[1:] * (lamda / len(X)) # 现在是27行一列
reg = np.insert(reg, 0, values=0, axis=0) # 变为28行1列
theta = theta - (X.T @ (sigmoid(X @ theta) - y)) * alpha /len(X) - reg
cost = cost_function(X, y, theta, lamda)
costs.append(cost)
# 抽取几个cost查看,避免太多
if i % 10000 == 0:
print(costs)
return costs, theta
alpha = 0.001
iters = 200000
lamda = 0.001
costs, theta_final = gradient_descent(X, y, theta, alpha, iters, lamda)
print(costs) #0.469
- 准确率
def predict(X, theta):
prob = sigmoid(X @ theta)
return [1 if x >= 0.5 else 0 for x in prob]
y_ = np.array(predict(X,theta_final))
y_pre = y_.reshape(len(y_), 1)
acc = np.mean(y_pre == y)
print(acc) # 0.83
- 决策边界
x = np.linspace(-1.2, 1.2, 200)
xx, yy = np.meshgrid(x, x)
z = feature_mapping(xx.ravel(), yy.ravel(), 6).values
zz = z @ theta_final
zz = zz.reshape(xx.shape)
fig, ax = plt.subplots()
ax.scatter(data[data["accepted"] == 0]["test1"], data[data["accepted"] == 0]["test2"], c= "r", marker="x", label="y=0")
ax.scatter(data[data["accepted"] == 1]["test1"], data[data["accepted"] == 1]["test2"], c= "b", marker="o", label="y=1")
ax.legend()
ax.set(xlabel="test1", ylabel= "test2")
plt.contour(xx, yy, zz, 0)
plt.show()
改变lamda可改变图形效果