#导入pandas库
import pandas as pd
#导入数据
data=pd.read_csv('Data.csv')
#缺失值处理
data['Age']=data['Age'].fillna(data['Age'].mean())
data['Salary']=data['Salary'].fillna(data['Salary'].mean())
#提取数据
X = data.iloc[:, :-1]
y = data.iloc[:, 3]
#独热编码
X = pd.get_dummies(X)
#标签编码
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y) #就看成数据标准化的过程
#数据的拆分
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=0)
#数据标准化
from sklearn.preprocessing import StandardScaler #标准化
from sklearn.preprocessing import MinMaxScaler #0,1 标准化
scaler=StandardScaler().fit(x_train)
x_train_scaled=scaler.transform(x_train)
x_test_scaled=scaler.transform(x_test)
机器学习第一篇:数据预处理
最新推荐文章于 2024-04-29 10:06:27 发布