葡萄酒数据可视化分析

葡萄酒数据可视化分析

在必应壁纸必应壁纸供图

数据集:https://download.csdn.net/download/weixin_53742691/87982219

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
wine = pd.read_csv("wine_quality/wine_edited.csv")
wine.head()
fixed acidityvolatile aciditycitric acidresidual sugarchloridesfree sulfur dioxidetotal sulfur dioxidedensitypHsulphatesalcoholqualitycoloralcohol_levelacidity_level
07.40.700.001.90.07611.034.00.99783.510.569.45redlowlow
17.80.880.002.60.09825.067.00.99683.200.689.85redlowmod_high
27.80.760.042.30.09215.054.00.99703.260.659.85redlowmedium
311.20.280.561.90.07517.060.00.99803.160.589.86redlowmod_high
47.40.700.001.90.07611.034.00.99783.510.569.45redlowlow
wine.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6497 entries, 0 to 6496
Data columns (total 15 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fixed acidity         6497 non-null   float64
 1   volatile acidity      6497 non-null   float64
 2   citric acid           6497 non-null   float64
 3   residual sugar        6497 non-null   float64
 4   chlorides             6497 non-null   float64
 5   free sulfur dioxide   6497 non-null   float64
 6   total sulfur dioxide  6497 non-null   float64
 7   density               6497 non-null   float64
 8   pH                    6497 non-null   float64
 9   sulphates             6497 non-null   float64
 10  alcohol               6497 non-null   float64
 11  quality               6497 non-null   int64  
 12  color                 6497 non-null   object 
 13  alcohol_level         6497 non-null   object 
 14  acidity_level         6496 non-null   object 
dtypes: float64(11), int64(1), object(3)
memory usage: 761.5+ KB

单一变量的可视化

sns.countplot(x='color',data=wine);


png

sns.countplot(x='acidity_level',data=wine);

png

sns.countplot(x='quality',data=wine);

png

sns.color_palette()

base_color = sns.color_palette()[0]
sns.countplot(x='color',data=wine,color=base_color);

png

sns.countplot(x='quality',data=wine,color=base_color);

png

sns.countplot(y='acidity_level',data=wine,color=base_color);

png

 wine.acidity_level.value_counts()
high        1717
mod_high    1643
low         1574
medium      1562
Name: acidity_level, dtype: int64
ph_order = wine.acidity_level.value_counts().index
sns.countplot(y='acidity_level',data=wine,color=base_color,order=ph_order);

png

sns.countplot(x='acidity_level',data=wine,color=base_color,order=ph_order);

png

sns.countplot(x='acidity_level',data=wine,color=base_color,order=ph_order);

acidity_class = wine['acidity_level'].value_counts()
locs,lables = plt.xticks()

for i in range(len(locs)):
    put_string = acidity_class[i]
    plt.text(locs[i],put_string-100,put_string,color='white',ha='center')

png

sns.countplot(y='acidity_level',data=wine,color=base_color,order=ph_order);

acidity_class = wine['acidity_level'].value_counts()
locs,lables = plt.yticks()

for i in range(len(locs)):
    put_string = acidity_class[i]
    plt.text(put_string+100,locs[i],put_string,ha='center')

png

alcohol_data = wine.alcohol_level.value_counts().reset_index()
alcohol_data = alcohol_data.rename(columns={'index':'alcohol_level','alcohol_level':'alcohol_counts'})
alcohol_data
alcohol_levelalcohol_counts
0high3320
1low3177
sns.barplot(x='alcohol_level',y='alcohol_counts',data=alcohol_data,color=base_color);

png

plt.pie(alcohol_data.alcohol_counts,labels=alcohol_data.alcohol_level,startangle=90)
plt.show()

png

acidity_class = wine['acidity_level'].value_counts().reset_index()
acidity_class = acidity_class.rename(columns={'index':'acidity_level','acidity_level':'level_counts'})
acidity_class
acidity_levellevel_counts
0high1717
1mod_high1643
2low1574
3medium1562
plt.pie(acidity_class.level_counts,labels=acidity_class.acidity_level,startangle=90)
plt.show()

png

plt.pie(acidity_class.level_counts,labels=acidity_class.acidity_level,startangle=90,wedgeprops={'width':0.4})
plt.show()

png

wine.describe()
fixed acidityvolatile aciditycitric acidresidual sugarchloridesfree sulfur dioxidetotal sulfur dioxidedensitypHsulphatesalcoholquality
count6497.0000006497.0000006497.0000006497.0000006497.0000006497.0000006497.0000006497.0000006497.0000006497.0000006497.0000006497.000000
mean7.2153070.3396660.3186335.4432350.05603430.525319115.7445740.9946973.2185010.53126810.4918015.818378
std1.2964340.1646360.1453184.7578040.03503417.74940056.5218550.0029990.1607870.1488061.1927120.873255
min3.8000000.0800000.0000000.6000000.0090001.0000006.0000000.9871102.7200000.2200008.0000003.000000
25%6.4000000.2300000.2500001.8000000.03800017.00000077.0000000.9923403.1100000.4300009.5000005.000000
50%7.0000000.2900000.3100003.0000000.04700029.000000118.0000000.9948903.2100000.51000010.3000006.000000
75%7.7000000.4000000.3900008.1000000.06500041.000000156.0000000.9969903.3200000.60000011.3000006.000000
max15.9000001.5800001.66000065.8000000.611000289.000000440.0000001.0389804.0100002.00000014.9000009.000000
plt.hist(data=wine,x='pH')
plt.show()

png

import numpy as np
bin_edges = np.arange(wine.pH.min(),wine.pH.max()+0.05,0.05)
len(bin_edges )
27
plt.hist(data=wine,x='pH',bins=bin_edges)
plt.show()

png

bin_edges = np.arange(0.0,0.125+0.005,0.005)
plt.hist(data=wine,x='chlorides',bins=bin_edges)
plt.xlim((0.0,0.125))
plt.show()

png

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

IT小辉同学

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值