遇到的错误:
ValueError Traceback (most recent call last)
in
49
50 # 结果可视化
—> 51 plot_decision_boundary(log_reg, axis=[0, 100, 0, 100])
52 plt.scatter(data_X[data_y == 0, 0], data_X[data_y == 0, 1], color=‘red’)
53 plt.scatter(data_X[data_y == 1, 0], data_X[data_y == 1, 1], color=‘blue’)
in plot_decision_boundary(model, axis)
28 X_new = np.c_[x0.ravel(), x1.ravel()]
29 #X_new = PolynomialFeatures(degree=2).fit_transform(X_new)
—> 30 y_predict = model.predict(X_new)
31
32 zz = y_predict.reshape(x0.shape)
d:\120\python\python37\lib\site-packages\sklearn\linear_model_base.py in predict(self, X)
291 Predicted class label per sample.
292 “”"
–> 293 scores = self.decision_function(X)
294 if len(scores.shape) == 1:
295 indices = (scores > 0).astype(np.int)
d:\120\python\python37\lib\site-packages\sklearn\linear_model_base.py in decision_function(self, X)
271 if X.shape[1] != n_features:
272 raise ValueError(“X has %d features per sample; expecting %d”
–> 273 % (X.shape[1], n_features))
274
275 scores = safe_sparse_dot(X, self.coef_.T,
ValueError: X has 2 features per sample; expecting 6
错误原因:
在绘制决策线的时候,没有对输入进行多项式处理,在后面的代码中会标记一下。
因为在输入的时候,是把坐标轴上的每个点的x轴作为输入,即X_new。
源代码:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from matplotlib.colors import ListedColormap
from sklearn.linear_model import LogisticRegression
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
# 数据格式:成绩1,成绩2,是否被录取(1代表被录取,0代表未被录取)
# 读取数据
data = np.loadtxt('ex2data1.txt', delimiter=',')
data_X = data[:, 0:2]
data_y = data[:, 2]
# 函数(画决策边界)定义
def plot_decision_boundary(model, axis):
x0, x1= np.meshgrid(
# 创建等差数列,比如格子
np.linspace(axis[0], axis[1], int((axis[1] - axis[0]) * 100)).reshape(-1, 1),
np.linspace(axis[2], axis[3], int((axis[3] - axis[2]) * 100)).reshape(-1, 1),
)
#是坐标轴上的x轴!很重要!
X_new = np.c_[x0.ravel(), x1.ravel()]
#重点!!!!!!!!!!!!!!!!!!!
X_new = PolynomialFeatures(degree=2).fit_transform(X_new)
y_predict = model.predict(X_new)
zz = y_predict.reshape(x0.shape)
custom_cmap = ListedColormap(['#EF9A9A', '#FFF59D', '#90CAF9'])
plt.contourf(x0, x1, zz, cmap=custom_cmap)
# 数据分割
X_train, X_test, y_train, y_test = train_test_split(data_X, data_y, random_state=666)
X2 = PolynomialFeatures(degree=2).fit_transform(X_train)
X_test2 = PolynomialFeatures(degree=2).fit_transform(X_test)
# 训练模型
log_reg = LogisticRegression(max_iter=10000)
log_reg.fit(X2, y_train)
# 结果可视化
plot_decision_boundary(log_reg, axis=[0, 100, 0, 100])
plt.scatter(data_X[data_y == 0, 0], data_X[data_y == 0, 1], color='red')
plt.scatter(data_X[data_y == 1, 0], data_X[data_y == 1, 1], color='blue')
plt.xlabel('成绩1')
plt.ylabel('成绩2')
plt.title('两门课程成绩与是否录取的关系')
plt.show()
# 模型测试
print(log_reg.score(X2, y_train))
print(log_reg.score(X_test2, y_test))
输出结果:
运行速度可能比较慢。
单独的线性回归的部分代码:
可以跟上边的多项式回归的结合起来看。
# 数据分割
X_train, X_test, y_train, y_test = train_test_split(data_X, data_y, random_state=666)
# 训练模型
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
第二种多项式回归的代码:
这一种不用对决策边界函数进行处理。
前面要引入StandardScaler函数和Pipeline的源文件。
def PolynomialLogisticRegression(degree):
return Pipeline([('poly',PolynomialFeatures(degree=degree)),
('std_scaler',StandardScaler()),
('log_reg',LogisticRegression())
])
log_reg = PolynomialLogisticRegression(degree=2)
log_reg.fit(X_train, y_train)
上面这种准确率会低一些,原因是用StandardScaler对数据进行了处理,去掉这一句就跟第一种一样了。