数据集
链接:https://pan.baidu.com/s/12QG95jbvnqbSC6Gt6eRXlg
提取码:fblc
# https://scikit-learn.org/stable/user_guide.html
# https://sklearn.apachecn.org/
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
f1 = open("../Dataset/train_data.text", "r")
lines = f1.readlines()
'''行读,返回列表
'''
data = []
'''
申请存储列表'''
label = []
for i in lines:
x, y, z = map(float, i.split())
data.append([x, y]) # [[-10.734951053477072, 1.2478857582135388]]
label.append(z)
'''
'''
# if z == 1.0:
# plt.scatter(x, y, c='red', label='类别1')
# elif z == -1.0:
# plt.scatter(x, y, c='blue', label='类别2')
'''
AdaBoostClassifier算法
sample_weight用于对损失函数加权(训练期间)
为数据集中的数据分配不同的权重。
'''
model = AdaBoostClassifier(n_estimators=100, random_state=0)
M = model.fit(X=data, y=label, sample_weight=None)
'''
M是训练的模型'''
def plot_decision_regions(X, y1, classifier, test_idx=None, resolution=0.02):
# setup marker generator and color map
markers = ('s', 'x', 'o', '^', 'v')
colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
'''unique 返回排序后唯一的元素'''
cmap = ListedColormap(colors[:len(np.unique(y1))]) # colors that need in classfication
# plot the decision surface
# get the range of axes
# 获取x ,y 数据
x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
# create meshgrid coordinates for plotting contour
xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
np.arange(x2_min, x2_max, resolution))
# get predicted Z, which determine how the boundary will be like
print(np.array([xx1.ravel(), xx2.ravel()]).T)
Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
Z = Z.reshape(xx1.shape)
'''
predict是训练后返回预测结果,是标签值。
predict_proba返回的是一个 n 行 k 列的数组,
第 i 行 第 j 列上的数值是模型预测 第 i 个预测样本为某个标签的概率,并且每一行的概率和为1。
ravel()对xx1降维'''
# plot the contour
plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap)
plt.xlim(xx1.min(), xx1.max())
plt.ylim(xx2.min(), xx2.max())
# scatter plot the data points
'''分开点
'''
# for idx, cl in enumerate(np.unique(y1)):
# plt.scatter(x=X[y1 == cl, 0], y=X[y1 == cl, 1],
# alpha=0.8, c=colors[idx],
# marker=markers[idx], label=cl,
# edgecolor='black')
#
# import numpy as np
# f1 = open("./Dataset/test_data.text", "r")
# lines = f1.readlines()
# data1 = []
# label1 = []
# for i in lines:
# x, y, z = map(float, i.split())
# data1.append([x, y])
# label1.append(z)
# '''
# []列表
# ()元组
# {}字典
# '''
#
# data1 = np.array(data1)
#
# label1 = np.array(label1)
# for a, b in enumerate(np.unique(label1)):
# print(a)
#
# print('a 是enumerate索引值')
# for a, b in enumerate(np.unique(label1)):
# print(b)
# print('b unique中值')
#
#
# # highlight test examples
# if test_idx:
# # plot all examples
# X_test, y_test = X[test_idx, :], y1[test_idx]
# plt.scatter(X_test[:, 0], X_test[:, 1],
# c='', edgecolor='black', alpha=1.0,
# linewidth=1, marker='o',
# s=100, label='test set')
f1 = open("../Dataset/test_data.text", "r")
lines = f1.readlines()
data1 = []
label1 = []
for i in lines:
x, y, z = map(float, i.split())
data1.append([x, y])
label1.append(z)
'''
[]列表
()元组
{}字典
'''
data1 = np.array(data1)
label1 = np.array(label1)
model.score(data1, label1)
plot_decision_regions(data1, label1, M)
plt.show()
'''
自适应提升'''