import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
def get_data(fire_path): # 获取数据,存为df
file = pd.read_csv('Iris.csv')
df = pd.DataFrame(file)
return df
def check_data(df): # 查看数据信息
print(df.head())
print(df.tail())
print(df.shape)
print(df.describe)
print(df.info)
def prepro_data(df): # 处理数据,转为为字典
arr2 = np.array(df['Species'])
df.drop('Species',axis = 1,inplace = True) # 去掉标签列
df.drop('Id',axis = 1,inplace = True) # 去掉id列
arr = np.array(df) # 获取数据转化成数组
iris = {'data':arr,'target':arr2}
return iris
def eng_data(iris): # 特征工程
X = iris['data'] # 获取数据
Y = iris['target'].reshape(-1,1) # 标签转换成数字
#print(x.shape,y.shape) # 数据和标签的格式
x_train, x_test, y_train, y_test = train_test_split(X, Y , test_size=0.3, random_state=22) # 分割训练集和测试集,比例3:7
TF = StandardScaler() # 实例化
x_train = TF.fit_transform(x_train) #特征值
x_test = TF.fit_transform(x_test) # 目标值
return x_train,x_test,y_train, y_test
def training(x_train,y_train,n_neighbors): # 训练模型
estimator = KNeighborsClassifier(n_neighbors)
estimator.fit(x_train, y_train)
return estimator
def asse(estimator,x_test,y_test): # 预测评估
y_predict = estimator.predict(x_test)
print("预测值真实值的对比:\n", y_predict == y_test)
score = estimator.score(x_test, y_test) # x_代表测试集数据,y_test代表测试集标签。
print(score)
fire_path = 'Iris.csv'
n_neighbors = 3 # 设置邻近
df = get_data(fire_path) # 1,从scv文件获取数据,存为df
#print(df)
iris = prepro_data(df) # 2,数据预处理,去掉不用的列,将数据转为字典
#print(iris)
x_train,x_test,y_train, y_test = eng_data(iris) # 3,特征工程,传入字典,将数据进行分割,并进行归一化与标准化处理,返回处理好的数据
#print(x_train,x_test,y_train, y_test)
estimator = training(x_train,y_train,n_neighbors) # 4,传入数据,进行训练,这里可以设置n_neighbors的值进而改变准确率
#print(estimator)
asse(estimator,x_test,y_test) #5, 传入训练的模型,测试数据,进行评估与预测
准确率