100天机器学习算法-Day4-6: 逻辑回归

最新推荐文章于 2024-11-02 19:12:55 发布

曼车斯基

最新推荐文章于 2024-11-02 19:12:55 发布

阅读量176

点赞数

分类专栏： 100天机器学习文章标签： 100天机器学习逻辑回归

本文链接：https://blog.csdn.net/redredxcar/article/details/104104476

版权

100天机器学习专栏收录该内容

5 篇文章 1 订阅

订阅专栏

Day4-6都在讲逻辑回归.

原来的代码中没有给出最后两幅图的代码,我自己尝试着写代码画了两个图.

# modified of code from 100-Days-of-ML-Code
# Day 4- 6: Logistic Regression

# Step 1: Data Preprocessing
# importing libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.pipeline import  Pipeline, make_pipeline
from sklearn.linear_model import LogisticRegression
import matplotlib as mpl
import matplotlib.patches as mpatches

# importing dataset
dataset = pd.read_csv('Social_Network_Ads.csv')
X = dataset.iloc[:, [2,3]].values
Y = dataset.iloc[:, 4].values

# splitting the dataset into the training sets and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=0)
Y_train =  np.expand_dims(Y_train, axis=1)
Y_test =  np.expand_dims(Y_test, axis=1)
print('X_train:\n', X_train.shape)
print('Y_train:\n', Y_train.shape)


# using pipeline: Feature Scaling
lr = Pipeline([('sc', StandardScaler()),
               ('poly', PolynomialFeatures(degree=1)),
               ('clf', LogisticRegression())])
lr.fit(X_train, Y_train)

# Step 3: Prediction
# predicting the test set result
y_pred = lr.predict(X_test)
print(y_pred)
print('accuracy %.2f%%' % (100* np.mean(y_pred== Y_test)))


# Step 4: evaluating the prediction
# making the confusion matrix
from sklearn.metrics import confusion_matrix
confusionMat = confusion_matrix(Y_test, y_pred)
print('confusionMat:\n', confusionMat)

np.set_printoptions(suppress=True, linewidth=100, edgeitems=20)
# visualization
N, M = 200, 200 # 横纵各采样多少个值
age_min, age_max = X[:, 0].min(), X[:, 0].max()
salary_min, salary_max = X[:, 1].min(), X[:, 1].max()
t1 = np.linspace(age_min, age_max, N)
t2 = np.linspace(salary_min, salary_max, M)
age, salary = np.meshgrid(t1, t2) # 生成网格采样点
x_test = np.stack((age.flat, salary.flat), axis = 1)
# print('x_test:\n', x_test)

y_hat = lr.predict(x_test)
# y_hat = lr.predict(x_test)
print('y_hat:\n', y_hat.shape)
print('y_hat:\n', y_hat)
y_hat = y_hat.reshape(age.shape) # 使输出与输入的维度相同


cm_light = mpl.colors.ListedColormap(['#FF8080', '#FF8080', '#A0A0FF'])
cm_dark = mpl.colors.ListedColormap(['g', 'r', 'b'])
plt.figure(facecolor='w')

print('age , salary, y_hat: ', age.shape, salary.shape, y_hat.shape)
plt.pcolormesh(age, salary, y_hat, cmap= cm_light) # 显示测试值

# print('x[:, 0] , x[:, 1], y_hat: ', X[:, 0].shape, X[:, 1].shape, np.expand_dims(Y, axis=1).shape)
# visualization of Train set
plt.scatter(X_train[:,0], X_train[:, 1], c=  np.expand_dims(Y_train, axis=1).flat, edgecolors='k', s= 50, cmap=cm_dark) # 显示样本
plt.xlabel('age', fontsize=14)
plt.ylabel('salary', fontsize=14)
plt.xlim(age_min, age_max)
plt.ylim(salary_min, salary_max)
plt.grid()
patchs = [mpatches.Patch(color='#FF8080', label='no buy'),
            mpatches.Patch(color='#77E0A0', label='buy')]
plt.legend(handles=patchs, fancybox=True, framealpha=0.8)
plt.title('是否买车 逻辑回归(Train)', fontsize=18)
plt.show()

# visualization of test sets
plt.pcolormesh(age, salary, y_hat, cmap= cm_light) # 显示测试值
plt.scatter(X_test[:,0], X_test[:, 1], c=  np.expand_dims(y_pred, axis=1).flat, edgecolors='k', s= 50, cmap=cm_dark) # 显示样本
plt.xlabel('age', fontsize=14)
plt.ylabel('salary', fontsize=14)
plt.xlim(age_min, age_max)
plt.ylim(salary_min, salary_max)
plt.grid()
patchs = [mpatches.Patch(color='#FF8080', label='no buy'),
            mpatches.Patch(color='#77E0A0', label='buy')]
plt.legend(handles=patchs, fancybox=True, framealpha=0.8)
plt.title('是否买车 逻辑回归(Test)', fontsize=18)
plt.show()

训练集逻辑回归