import pandas as pd
import numpy as np
import os
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
#读取数据集
wine = pd.read_csv('./data/wine.csv',encoding='gbk')
winequality = pd.read_csv('./data/winequality.csv',sep=';',encoding='gbk')
#拆分数据和标签
wine_data=wine.iloc[:,1:]
wine_label=wine.iloc[:,0]
print("wine数据集的数据:\n",wine_data)
print("wine数据集的标签:\n",wine_label)
winequality_data=winequality.iloc[:,:10]
winequality_label=winequality.iloc[:,11]
print("winequality数据集的数据:\n",winequality_data)
print("winequality数据集的标签:\n",winequality_label)
#标准化
stdScale1 = StandardScaler().fit(wine_data)
wine_Scaler = stdScale1.transform(wine_data)
print('标准差标准化后wine数据集数据的方差为:',np.var(wine_Scaler))
print('标准差标准化后wine数据集数据的均值为:',np.mean(wine_Scaler))
stdScale1 = StandardScaler().fit(winequality_data)
winequality_Scaler = stdScale1.transform(winequality_data)
print('标准差标准化后winequality数据集数据的方差为:',np.var(winequality_Scaler))
print('标准差标准化后winequality数据集数据的均值为:',np.mean(winequality_Scaler))
#划分训练集和数据集
#print('\n(把winequality划分为训练集和测试集后)')
wine_data_train, wine_data_test, wine_label_train, wine_label_test = train_test_split(wine_Scaler, wine_label,
test_size=0.3, random_state=42)
#print('\n(把winequality划分为训练集和测试集后)')
winequality_data_train, winequality_data_test, winequality_label_train, winequality_label_test = train_test_split(winequality_Scaler, winequality_label,
test_size=0.3, random_state=42)