from sklearn.neighbors import KNeighborsClassifier
from sklearn import ensemble
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.learning_curve import learning_curve
from sklearn.svm import SVC
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# 第一步: 加载数据集
data = pd.read_excel('/home/kesci/input/data_9096/data.xlsx')
X = data.drop('Sex', axis=1)
y = data.Sex
# 第二步: 数据集标准化, 划分测试集、训练集
X = preprocessing.scale(X) # 标准化数据
X_train, X_test, y_train, y_test = train_test_split( # 随机划分训练集和测试集 7:3
X, y, test_size=0.3, random_state=0)
# 第三步: 训练模型
# KNN 模型
knn = KNeighborsClassifier() # 训练模型
knn.fit(X_train, y_train)
print(knn.predict(X_test)) # 利用模型做预测
print(y_test)
print(knn.score(X_test, y_test)) # 模型打分
# GBDT 模型
clf = ensemble.GradientBoostingClassifier()
clf.fit(X_train, y_train)
GBDT、KNN数据建模分析步骤
最新推荐文章于 2022-09-17 20:03:45 发布