# 小提琴图
## 小提琴图(violin plot)
用于显示数据分布及其概率密度;
图表结合了箱形图和密度图特征;
主要用来显示数据的分布形状;
中间的黑色粗条表示四分位数范围;
从其中延伸的幼细黑线代表95%置信区间;
黑色横线为中位数;## 绘制小提琴图
plotnine包中的geom_violin()函数实现;
绘制时需要设定核密度估计带宽(bandwidth)
在小提琴图添加箱形图,能更加全面地展示数据;
plotnine包中的geom_jetter()函数添加抖动散点图;
### 小提琴图(+箱形图)
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from plotnine import *
df=pd.read_csv('d:\python\out\DistributionD.csv')
df['class']=df['class'].astype("category",["n", "s", "k", "mm"])
#小提琴图
violin_plot=(ggplot(df,aes(x='class',y="value",fill="class"))
+geom_violin(show_legend=False)
+geom_boxplot(fill="white",width=0.1,show_legend=False)
+scale_fill_hue(s = 0.90, l = 0.65, h=0.0417,color_space='husl')
+theme_matplotlib()
+theme(aspect_ratio =1.05,
dpi=100,
figure_size=(4,4)))
print(violin_plot)
violin_plot.save("d:\python\out\violin_plot.pdf")
### 小提琴图(+抖动散点图)
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from plotnine import *
df=pd.read_csv('d:\python\out\DistributionD.csv')
df['class']=df['class'].astype("category",["n", "s", "k", "mm"])
#小提琴图
violin_plot1=(ggplot(df,aes(x='class',y="value",fill="class"))
+geom_violin(show_legend=False)
+geom_jitter(fill="black",width=0.3,size=1,stroke=0.1,show_legend=False)
+scale_fill_hue(s = 0.90, l = 0.65, h=0.0417,color_space='husl')
+theme_matplotlib()
+theme(aspect_ratio =1.05,
dpi=100,
figure_size=(4,4)))
print(violin_plot1)
violin_plot1.save("violin_plot2.pdf")