先导入需要的库。
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from subprocess import check_output
%matplotlib inline
再倒入数据集,用的iris数据集。
print(check_output(["ls", "./Desktop/iris"]).decode("utf8"))
iris = pd.read_excel('./Desktop/iris/iris.xlsx')
iris.head()
new_col_name = species_data.columns.values.tolist()
new_col_name = [item.lower().strip() for item in new_col_name]
iris.columns = new_col_name
一、Bar Plot
# Bar Plot
#求每种花SepalLengthCm的平均值
species_unique = list(iris.species.unique())
each_species_SepalLengthCm_avg_list = []
for i in species_unique:
x = iris[iris['species'] == i]
each_species_SepalLengthCm_avg = sum(x.iloc[:, 0])/len(x.iloc[:, 0])
each_species_SepalLengthCm_avg_list.append(each_species_SepalLengthCm_avg)
#print(Species_unique)
#print(each_species_SepalLengthCm_avg_list)
each_species_SepalLengthCm_avg_df = pd.DataFrame({'species': species_unique,
'each_species_SepalLengthCm_avg': each_species_SepalLengthCm_avg_list})
species_index = each_species_SepalLengthCm_avg_df['each_species_SepalLengthCm_avg'].sort_values(ascending=False).index.values
sorted_data = each_species_SepalLengthCm_avg_df.reindex(species_index)
#作图
plt.figure(figsize=(10, 6))
sns.barplot(x=sorted_data['species'], y=sorted_data['each_species_SepalLengthCm_avg'])
plt.xticks(rotation=45)
#输出图表:
二、Point Plot
#Point Plot
species_data = iris[iris['species'] == 'setosa']
f, ax1 = plt.subplots(figsize=(15, 8))
sns.pointplot(x=species_data.iloc[:, 0], y=species_data.iloc[:, 1], color='lime', alpha=0.8)
sns.pointplot(x=species_data.iloc[:, 0], y=species_data.iloc[:, 2], color='red', alpha=0.8)
plt.text(4.7, 4, 'SepalLengthCm VS SepalWidthCm', color='lime', fontsize=17, style='italic')
plt.text(4.7, 2, 'SepalLengthCm VS PetalLengthCm', color='red', fontsize=18, style='italic')
plt.title('prepariton')
plt.grid()
plt.savefig('./Desktop/iris/Point Plot.png')
#输出图表:
三、Joint Plot
#Joint Plot
#kind must be either 'scatter', 'reg', 'resid', 'kde', or 'hex'
sns.jointplot(x=species_data.iloc[:, 0], y=species_data.iloc[:, 1], kind='reg', size=7, color='r')
plt.savefig('./Desktop/iris/Joint Plot.png')
#输出图表:
四、Pie Chart
#Pie Chart
species = iris['species'].value_counts().index.tolist()
counter = iris['species'].value_counts().values.tolist()
plt.figure(figsize=(7, 7))
plt.pie(counter,labels=species, autopct='%1.1f%%')
plt.savefig('./Desktop/iris/Pie Chart.png')
#输出图表:
五、Lm Plot
#Lm Plot
sns.lmplot(x="sepallengthcm", y="sepalwidthcm", data=species_data)
plt.savefig('./Desktop/iris/Lm Plot.png')
#输出图表:
六、Kde Plot
#Kde Plot
sns.kdeplot(species_data.iloc[:,0], species_data.iloc[:, 1], shade=True, cut=2, color='g')
plt.savefig('./Desktop/iris/Kde Plot.png')
#输出图表:
七、Violin Plot
#Violin Plot
data = species_data[['species', 'sepallengthcm', 'sepalwidthcm']]
sns.violinplot(data=data, inner='points')
plt.savefig('./Desktop/iris/Violin Plot.png')
#输出图表:
八、Heatmap
#Heatmap
plt.subplots(figsize=(5, 6))
sns.heatmap(data.corr(), annot=True, linewidths=0.5, linecolor='g', fmt='.1f')
plt.savefig('./Desktop/iris/Violin Plot.png')
#输出图表:
九、Box Plot
#Box Plot
data = iris[['species', 'sepallengthcm', 'sepalwidthcm']]
sns.boxplot(x="species", y="sepalwidthcm", data=data)
sns.stripplot(x="species", y="sepalwidthcm", data=data,jitter=True, edgecolor='gray')
plt.savefig('./Desktop/iris/Box Plot.png')
#输出图表:
十、Swarm Plot and Stripplot
#Swarm Plot and Stripplot
#区别在于:Swarm Plot解决了Stripplot中重叠的点,通过算法,在类别坐标轴的方向上去‘延伸’绘制这些原本重合的点
fig, ax = plt.subplots(2,1,figsize=(8, 6))
ax1 = sns.swarmplot(x="species", y="sepalwidthcm", data=data,ax=ax[0])
ax2 = sns.stripplot(x="species", y="sepalwidthcm", data=data, ax=ax[1])
plt.savefig('./Desktop/iris/Swarm Plot and Stripplot.png')
#输出图表:
十一、Pair Plot
#Pair Plot
sns.pairplot(data=data, hue='species', size=3)
plt.savefig('./Desktop/iris/Pair Plot.png')
#输出图表:
十二、Count Plot
#Count Plot
sns.countplot(x = data['species'])
plt.savefig('./Desktop/iris/Count Plot.png')
#输出图表: