4.6Python数据处理篇之Matplotlib系列(六)---plt.hist()与plt.hist2d()直方图www.jianshu.comSeaborn 0.9 中文文档www.cntofu.com关于使用python seaborn库绘制violinplot小提琴图的一些小坑www.cnblogs.com
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
#读取下载好的数据
df=pd.read_csv(r'C:\Users\73835\Downloads\pokemon.csv')
percent_missing=df.isnull().sum()*100/len(df)
percent_missing=df.isnull().mean() #相互等价
#构造一个dataframe
missing_value_df=pd.DataFrame({
'column_name':df.columns,
'percent_missing':percent_missing})
missing_value_df.sort_values(by='percent_missing', ascending=False).head()
df['generation'].value_counts().plot.barh()
df['generation'].value_counts().plot(kind='barh')
plt.subplots(figsize=(20, 15))
ax=plt.axes()
ax.set_title('Correlation Heatmap')
coor=df.corr()
sns.heatmap(coor)
interested=['hp', 'attack', 'defense', 'sp_attack', 'sp_defense', 'speed']
sns.pairplot(df[interested])
plt.subplots(figsize=(10,8))
ax = plt.axes()
ax.set_title("Correlation Heatmap")
corr = df[interested].corr()
sns.heatmap(corr,
xticklabels=corr.columns.values,
yticklabels=corr.columns.values,
annot=True, fmt="f",cmap="YlGnBu")
for c in interested:
df[c] = df[c].astype(float)
df = df.assign(total_stats = df[interested].sum(axis=1)) #添加新列
total_stats = df.total_stats
plt.hist(total_stats, bins=35)
df.total_stats.plot(kind='hist', bins=35)
plt.xlabel('total_stats')
plt.ylabel('Frequency')
plt.subplots(figsize=(20,12))
ax = sns.violinplot(x="type1", y="total_stats",
data=df, palette="muted")
df[(df.total_stats >= 570) & (df.is_legendary == 0)]['name'].head(10)