Day4-6都在讲逻辑回归.
原来的代码中没有给出最后两幅图的代码,我自己尝试着写代码画了两个图.
# modified of code from 100-Days-of-ML-Code
# Day 4- 6: Logistic Regression
# Step 1: Data Preprocessing
# importing libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.linear_model import LogisticRegression
import matplotlib as mpl
import matplotlib.patches as mpatches
# importing dataset
dataset = pd.read_csv('Social_Network_Ads.csv')
X = dataset.iloc[:, [2,3]].values
Y = dataset.iloc[:, 4].values
# splitting the dataset into the training sets and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=0)
Y_train = np.expand_dims(Y_train, axis=1)
Y_test = np.expand_dims(Y_test, axis=1)
print('X_train:\n', X_train.shape)
print('Y_train:\n', Y_train.shape)
# using pipeline: Feature Scaling
lr = Pipeline([('sc', StandardScaler()),
('poly', PolynomialFeatures(degree=1)),
('clf', LogisticRegression())])
lr.fit(X_train, Y_train)
# Step 3: Prediction
# predicting the test set result
y_pred = lr.predict(X_test)
print(y_pred)
print('accuracy %.2f%%' % (100* np.mean(y_pred== Y_test)))
# Step 4: evaluating the prediction
# making the confusion matrix
from sklearn.metrics import confusion_matrix
confusionMat = confusion_matrix(Y_test, y_pred)
print('confusionMat:\n', confusionMat)
np.set_printoptions(suppress=True, linewidth=100, edgeitems=20)
# visualization
N, M = 200, 200 # 横纵各采样多少个值
age_min, age_max = X[:, 0].min(), X[:, 0].max()
salary_min, salary_max = X[:, 1].min(), X[:, 1].max()
t1 = np.linspace(age_min, age_max, N)
t2 = np.linspace(salary_min, salary_max, M)
age, salary = np.meshgrid(t1, t2) # 生成网格采样点
x_test = np.stack((age.flat, salary.flat), axis = 1)
# print('x_test:\n', x_test)
y_hat = lr.predict(x_test)
# y_hat = lr.predict(x_test)
print('y_hat:\n', y_hat.shape)
print('y_hat:\n', y_hat)
y_hat = y_hat.reshape(age.shape) # 使输出与输入的维度相同
cm_light = mpl.colors.ListedColormap(['#FF8080', '#FF8080', '#A0A0FF'])
cm_dark = mpl.colors.ListedColormap(['g', 'r', 'b'])
plt.figure(facecolor='w')
print('age , salary, y_hat: ', age.shape, salary.shape, y_hat.shape)
plt.pcolormesh(age, salary, y_hat, cmap= cm_light) # 显示测试值
# print('x[:, 0] , x[:, 1], y_hat: ', X[:, 0].shape, X[:, 1].shape, np.expand_dims(Y, axis=1).shape)
# visualization of Train set
plt.scatter(X_train[:,0], X_train[:, 1], c= np.expand_dims(Y_train, axis=1).flat, edgecolors='k', s= 50, cmap=cm_dark) # 显示样本
plt.xlabel('age', fontsize=14)
plt.ylabel('salary', fontsize=14)
plt.xlim(age_min, age_max)
plt.ylim(salary_min, salary_max)
plt.grid()
patchs = [mpatches.Patch(color='#FF8080', label='no buy'),
mpatches.Patch(color='#77E0A0', label='buy')]
plt.legend(handles=patchs, fancybox=True, framealpha=0.8)
plt.title('是否买车 逻辑回归(Train)', fontsize=18)
plt.show()
# visualization of test sets
plt.pcolormesh(age, salary, y_hat, cmap= cm_light) # 显示测试值
plt.scatter(X_test[:,0], X_test[:, 1], c= np.expand_dims(y_pred, axis=1).flat, edgecolors='k', s= 50, cmap=cm_dark) # 显示样本
plt.xlabel('age', fontsize=14)
plt.ylabel('salary', fontsize=14)
plt.xlim(age_min, age_max)
plt.ylim(salary_min, salary_max)
plt.grid()
patchs = [mpatches.Patch(color='#FF8080', label='no buy'),
mpatches.Patch(color='#77E0A0', label='buy')]
plt.legend(handles=patchs, fancybox=True, framealpha=0.8)
plt.title('是否买车 逻辑回归(Test)', fontsize=18)
plt.show()