【Python】机器学习模型在红酒质量等级评价中的案例应用

【Python】机器学习模型在红酒质量等级评价中的案例应用

1.红酒质量数据

import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")
df = pd.read_csv('white wine data.csv')
df
fixed acidityvolatile aciditycitric acidresidual sugarchloridesfree sulfur dioxidetotal sulfur dioxidedensitypHsulphatesalcoholquality
07.40.7000.001.90.07611.034.00.99783.510.569.45
17.80.8800.002.60.09825.067.00.99683.200.689.85
27.80.7600.042.30.09215.054.00.99703.260.659.85
311.20.2800.561.90.07517.060.00.99803.160.589.86
47.40.7000.001.90.07611.034.00.99783.510.569.45
.......................................
21926.40.4500.071.10.03010.0131.00.99052.970.2810.85
21936.40.4750.061.00.0309.0131.00.99042.970.2910.85
21946.30.2700.380.90.0517.0140.00.99263.450.5010.57
21956.90.4100.3310.10.04328.0152.00.99683.200.529.45
21967.00.2900.374.90.03426.0127.00.99283.170.4410.86

2197 rows × 12 columns

2. 划分数据并训练机器学习模型

X,y = df.iloc[:,0:-1].values,df.iloc[:,-1].values.reshape([-1,1])


from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# 对数据进行标准化处理, 主要是X_train
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
ss = ss.fit(X_train)
X_train_std = ss.fit_transform(X_train)
X_test_std = ss.fit_transform(X_test)
# ss1 = StandardScaler()
# ss1 = ss1.fit(y_train)
# y_train_std = ss1.fit_transform(y_train)
# y_test_std = ss1.fit_transform(y_test)

#---------------  Modllong

# SVM Classifier  
def svm_classifier(train_x, train_y):  
    from sklearn.svm import SVC  
    model = SVC(kernel='rbf', probability=True)  
    model.fit(train_x, train_y)  
    return model 



# KNN Classifier  
def knn_classifier(train_x, train_y):  
    from sklearn.neighbors import KNeighborsClassifier  
    model = KNeighborsClassifier()  
    model.fit(train_x, train_y)  
    return model  
    
# Logistic Regression Classifier  
def logistic_regression_classifier(train_x, train_y):  
    from sklearn.linear_model import LogisticRegression  
    model = LogisticRegression(penalty='l2')  
    model.fit(train_x, train_y)  
    return model 

# Random Forest Classifier  
def random_forest_classifier(train_x, train_y):  
    from sklearn.ensemble import RandomForestClassifier  
    model = RandomForestClassifier(n_estimators=8)  
    model.fit(train_x, train_y)  
    return model

train_x = X_train_std
train_y = y_train

model_svc = svm_classifier(train_x, train_y)
model_knn = knn_classifier(train_x, train_y)
model_logistic =  logistic_regression_classifier(train_x, train_y)
model_rf = random_forest_classifier(train_x, train_y)
# ----------

y_svc = model_svc.predict(X_test_std)
y_knn = model_knn.predict(X_test_std)
y_logistic = model_logistic.predict(X_test_std)
y_rf = model_rf.predict(X_test_std)

# 结果分析

from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import  precision_score
from sklearn.metrics import recall_score

print('分类准确率为:',accuracy_score(y_test,y_svc),accuracy_score(y_test,y_knn),accuracy_score(y_test,y_logistic),accuracy_score(y_test,y_rf))
print('宏平均准确率:',precision_score(y_test,y_svc,average='macro'),precision_score(y_test,y_knn,average='macro'),precision_score(y_test,y_logistic,average='macro'),precision_score(y_test,y_rf,average='macro'))
print('微平均准确率:',precision_score(y_test,y_svc,average='micro'),precision_score(y_test,y_knn,average='micro'),precision_score(y_test,y_logistic,average='micro'),precision_score(y_test,y_rf,average='micro'))
print('宏平均召回率为:',recall_score(y_test,y_svc,average='macro'),recall_score(y_test,y_knn,average='macro'),recall_score(y_test,y_logistic,average='macro'),recall_score(y_test,y_rf,average='macro'))
print('微平均召回率为:',recall_score(y_test,y_svc,average='micro'),recall_score(y_test,y_knn,average='micro'),recall_score(y_test,y_logistic,average='micro'),recall_score(y_test,y_rf,average='micro'))
print('宏平均f1值为:',f1_score(y_test,y_svc,average='macro'),f1_score(y_test,y_knn,average='macro'),f1_score(y_test,y_logistic,average='macro'),f1_score(y_test,y_rf,average='macro'))
print('微平均f1值为:',f1_score(y_test,y_svc,average='micro'),f1_score(y_test,y_knn,average='micro'),f1_score(y_test,y_logistic,average='micro'),f1_score(y_test,y_rf,average='micro'))
# 误差评估
分类准确率为: 0.5818181818181818 0.5515151515151515 0.5575757575757576 0.6075757575757575
宏平均准确率: 0.280583340709923 0.2986745934975547 0.3499023740988492 0.3984623113419726
微平均准确率: 0.5818181818181818 0.5515151515151515 0.5575757575757576 0.6075757575757575
宏平均召回率为: 0.27642774299410267 0.27744523345842165 0.2858453966079045 0.3401924573344921
微平均召回率为: 0.5818181818181818 0.5515151515151515 0.5575757575757576 0.6075757575757575
宏平均f1值为: 0.27488434754737406 0.27950149117164064 0.29807537284434943 0.3552209623496858
微平均f1值为: 0.5818181818181818 0.5515151515151515 0.5575757575757576 0.6075757575757575
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

春风惹人醉

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值