from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
#训练集,测试集划分
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(newdata.iloc[:,0:7],newdata['winPlacePerc'])
#标准化
from sklearn.preprocessing import StandardScaler
trainsfer=StandardScaler()
x_train=trainsfer.fit_transform(x_train)
x_test=trainsfer.transform(x_test)
#K近邻算法
from sklearn.neighbors import KNeighborsClassifier
#sklearn模型的保存和加载API
from sklearn.externals import joblib
保存:joblib.dump(estimator, 'test.pkl')
加载:estimator = joblib.load('test.pkl')
from sklearn.feature_selection import VarianceThreshold
#特征降维:1:皮尔逊相关系数,2:斯皮尔曼相关系数
from scipy.stats import pearsonr,spearmanr
#主成分分析PCA(n_components=None)
#n_components:
- 小数:表示保留百分之多少的信息
- 整数:减少到多少特征
from sklearn.decomposition import PCA
#线性回归
from sklearn.linear_model import Ridge,RidgeCV,LinearRegression
#决策树算法
from sklearn.tree import DecisionTreeClassifier, export_graphviz
#均方误差
from sklearn.metrics import mean_squared_error,mean_absolute_error