import pandas as pd
import numpy as np
import sklearn
from sklearn import datasets # 导入数据集
from sklearn.model_selection import train_test_split# 导入模块 切分数据
from sklearn.preprocessing import StandardScaler # 标准化
from sklearn.preprocessing import MinMaxScaler # 归一化
from sklearn.neighbors import KNeighborsClassifier, NeighborhoodComponentsAnalysis # 模型
from sklearn.model_selection import GridSearchCV # 网格搜索
from sklearn.pipeline import Pipeline # 流水线管道操作
from sklearn.metrics import accuracy_score # 得分验证
# ===========================加载数据3种方法==================================================
#
# #方式1
# from sklearn.datasets import load_iris
# data = load_iris()
#
# #导入数据和标签
# data_X = data.data
# data_y = data.target
#
# #方式2
# from sklearn import datasets
# loaded_data = datasets.load_iris() # 导入数据集的属性
#
# #导入样本数据
# data_X = loaded_data.data
# # 导入标签
# data_y = loaded_data.target
#
# #方式3
# # 直接返回
# data_X, data_y = load_iris(return_X_y=True)
# =============================================================================
# iris数据
iris = datasets.load_iris()
#回归数据-波士顿房价数据
boston = datasets.load_boston()
# ==============================获取数据不同属性信息2种方法===============================================
#
#
# print(iris.data)
# #print(iris["data"])
# #将数据读取到pandas
# df_iris=pd.DataFrame(iris.data,columns=iris.feature_names)
# #df_iris=pd.DataFrame(iris.data,columns=iris["feature_names"])
# df_iris["target"]=iris.target
# print(df_iris)
#
# =============================================================================
print(boston.keys()) # 查看键(属性) ['data','target','feature_names','DESCR', 'filename']
print(boston.data.shape,boston.target.shape) # 查看数据的形状
print(boston.feature_names) # 查看有哪些特征
print(boston["DESCR"]) # described 数据集描述信息 使用字符串也可读取
print(boston.filename) # 文件路径
data_X=iris.data
data_y=iris.target
# =============================切分数据为测试和训练数据================================================
# 划分为训练集和测试集数据
#
# random_state:
# 1、训练集测试集的划分
# 2、构建决策树
# 3、构建随机森林
# 改变此值,随机数发生变化
#
# =============================================================================
X_train, X_test, y_train, y_test = train_test_split(
data_X,
data_y,
test_size=0.2,
random_state=111
)
print(len(X_train))
# =================================数据预处理===================================
# 数据进行归一化和标准化
# =============================================================================
# 标准化
#ss = StandardScaler()
#X_norm = ss.fit_transform(X_train) # 传入待标准化的数据
# 归一化
#mm = MinMaxScaler()
#X_scaled = mm.fit_transform(X_train)
# 模型实例化
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
#KNeighborsClassifier()
y_pred = knn.predict(X_test)
score=knn.score(X_test,y_test)
acc=accuracy_score(y_pred,y_test)
print("score:" ,score);
print("acc:" ,acc);
scikit-lean学习笔记------初步了解
最新推荐文章于 2024-07-25 22:24:14 发布