Step1:库函数导入
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris
Step2:数据读取/载入
data = load_iris()
iris_target = data.target
iris_features = pd.DataFrame(data=data.data, columns=data.feature_names)
iris_features.info()
iris_features.head()
iris_target
pd.Series(iris_target).value_counts()
iris_features.describe()
Step4:可视化描述
iris_all = iris_features.copy()
iris_all["target"] = iris_target
sns.pairplot(data=iris_all, diag_kind='hist', hue = 'target')
plt.show()
for col in iris_features.columns:
sns.boxplot(x='target',y=col, saturation=0.5, palette='pastel',data=iris_all)
plt.title(col)
plt.show()
Step5:利用逻辑回归模型在二分类上进行训练和预测
from sklearn.model_selection import train_test_split
iris_features_part = iris_features.iloc[:100]
iris_target_part = iris_target[:100]
x_train, x_test, y_train, y_test = train_test_split(iris_features_part, iris_target_part,test_size=0.2, random_state=2020)
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0, solver='lbfgs')
clf.fit(x_train, y_train)
print("逻辑回归的权重为:",clf.coef_)
print("逻辑回归的截距为:",clf.intercept_)
train_predict = clf.predict(x_train)
test_predict = clf.predict(x_test)
from sklearn import metrics
print("训练集预测和真实值的准确度:",metrics.accuracy_score(y_true=y_train, y_pred=train_predict))
print("测试集预测和真实值的准确度:",metrics.accuracy_score(y_true=y_test, y_pred=test_predict))
confusion_matrix_result = metrics.confusion_matrix(test_predict,y_test)
print('混淆矩阵结果为:\n',confusion_matrix_result)
Step6:利用逻辑回归模型在多分类上进行训练和预测
x_train, x_test, y_train, y_test = train_test_split(iris_features,iris_target,test_size=0.2, random_state = 2020)
clf = LogisticRegression(random_state=0,solver='lbfgs')
clf.fit(x_train,y_train)
print("逻辑回归的权重为:\n",clf.coef_)
print("逻辑回归的截距为:\n",clf.intercept_)
train_predict = clf.predict(x_train)
test_predict = clf.predict(x_test)
train_predict_proba = clf.predict_proba(x_train)
test_predict_proba = clf.predict_proba(x_test)
print("训练集预测和真实值的准确度:",metrics.accuracy_score(y_true=y_train, y_pred=train_predict))
print("测试集预测和真实值的准确度:",metrics.accuracy_score(y_true=y_test, y_pred=test_predict))
confusion_matrix_result = metrics.confusion_matrix(test_predict,y_test)
print('混淆矩阵结果为:\n',confusion_matrix_result)