代码如下
# 1、导入数据集
from collections import OrderedDict
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import metrics
exam_Dict = {
'学习时间': [0.5, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75,
5.00, 5.50],
'通过考试': [0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1]}
examOrderDict = OrderedDict(exam_Dict)
examDf = pd.DataFrame(examOrderDict)
examDf.head()
print(examDf)
# 2、绘制散点图
# 提取特征
exam_x = examDf.loc[:, '学习时间']
# 提取标签
exam_y = examDf.loc[:, '通过考试']
plt.scatter(exam_x, exam_y, color='b', label='exam data')
plt.xlabel('Hours')
plt.ylabel('results')
plt.show()
# 3、建立训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(exam_x, exam_y, train_size=0.8)
# 输出特征和标签
print('原式数据特征:', exam_x.shape,
'训练集数据特征:', x_train.shape,
"测试集数据特征:", x_test.shape)
print('原式数据标签:', exam_y.shape,
"训练数据标签:", y_train.shape,
'测试集数据标签:', y_test.shape)
print(type(x_train))
x_train = np.array(x_train)
x_test = np.array(x_test)
# 将训练集和测试集转化成二维数组**行1列
x_train = x_train.reshape(-1, 1)
x_test = x_test.reshape(-1, 1)
# 4、创建及训练模型
# 第一种:随机森林
model = RandomForestClassifier(n_estimators=100)
model.fit(x_train, y_train)
# 5、评估模型
a = model.score(x_test, y_test)
print('随机森林模型拟合程度:', a)
y_pred = model.predict(x_test)
print('均方差:', metrics.mean_squared_error(x_test, y_pred))
# 第二种:逻辑回归
model = LogisticRegression()
model.fit(x_train, y_train)
# 5、评估模型
b = model.score(x_test, y_test)
print('逻辑回归模型拟合程度:', b)
y_pred = model.predict(x_test)
print('均方差:', metrics.mean_squared_error(x_test, y_pred))
# 第三种:支持向量机
model = SVC()
model.fit(x_train, y_train)
c = model.score(x_test, y_test)
print('支持向量机模型拟合程度:', c)
y_pred = model.predict(x_test)
print('均方差:', metrics.mean_squared_error(x_test, y_pred))
# 第四种:梯度增强机
model = GradientBoostingClassifier()
model.fit(x_train, y_train)
d1 = model.score(x_train, y_train)
print('梯度增强机训练集拟合程度:', d1)
d = model.score(x_test, y_test)
print('梯度增强机模型拟合程度:', d)
y_pred = model.predict(x_test)
print('均方差:', metrics.mean_squared_error(x_test, y_pred))
这里我根据之前看过的内容,自己又用了集中机器学习方法,并用均方差来进行评价,虽然代码不同,但是大致思路是一样的,也可以参考一下~