Google Play应用数据处理+plotnine画图
数据源:https://www.kaggle.com/imakash3011/customer-personality-analysis
实际上先进行数据处理,再画图,由于本文主要描述使用plotnine(ggplot2)图形绘制,因此将画图部分放到前面
画图
from plotnine import *
import numpy as np
#定义一些后面调整x轴间距和标签使用的函数
def labels(from_, to_, step_):
return pd.Series(np.arange(from_, to_ + step_, step_)).apply(lambda x: '{:,}'.format(x)).tolist()
def breaks(from_, to_, step_):
return pd.Series(np.arange(from_, to_ + step_, step_)).tolist()
p1=(
ggplot(df2,aes(x='install_new',y='Rating',fill='App_y'))
+geom_boxplot()
+xlim('0-1,000','1,000-10,000', '10,000-100,000','100,000-1,000,000','1,000,000-10,000,000','10,000,000-100,000,000','100,000,000-1,000,000,000','1,000,000,000+')
+labs(x='下载量',y='评分',title='不同下载量量级的APP评分分布')
+scale_y_continuous(
limits = (0, 5),
labels = labels(0, 5, 0.5),
breaks = breaks(0, 5, 0.5))
# +theme_light()
+scale_fill_cmap(cmap_name='plasma',name='APP数量')
+theme(axis_text_x=element_text(angle=30,hjust=1))#theme参数需要放在最后
+theme(text = element_text(family='SimSun'),dpi=420)
)
p1
```python
p2=(
ggplot(df2,aes(x='reorder(Category,Rating)',y='Rating',fill='Category'))
+geom_violin(show_legend=False,width=1)#legend,图例
# +ylim(3,5)
# +schale_y_disccrete()
+geom_boxplot(fill='white',width=0.3,show_legend=False)
+labs(x = "类别", y = "评分")
# +geom_point(show_legend=False,shape='.',size=1)
# +geom_jitter(fill='black',shape='.',width=0.05,size=1,stroke=0.1,show_legend=False)
+scale_fill_hue(s=0.90,l=0.65,h=0.0417,color_space='husl')
# h指定色域,范围越大,颜色区分度越大。范围整体移动改变取色区域
# s越大饱和度越高
+ggtitle('各类APP评分分布情况')
+theme_light()
+theme(axis_text_x=element_text(angle=75,hjust=1,color='red'))#angle调整角度,hjust调整标签位置,取值0-1,0表示左对齐,1表示右对齐,vjust调整标签距离图的距离
+theme(text = element_text(family='SimSun'),dpi=600)
)
p2
p3=(
ggplot(cate_cnt,aes(x='reorder(Category,App)',y='App',fill='Category'))
+geom_bar(stat = "identity",show_legend=False,width=0.6)
+labs(x = "类别", y = "数量")
+coord_flip()
+geom_text(aes(label='App'),size=8)
+scale_fill_hue(s=0.90,l=0.65,h=0.0417,color_space='husl')
+ggtitle('各类别APP数量')
+theme_light()
+theme(text = element_text(family='SimSun'))
+ theme(
# panel_background=element_rect(fill=gray, alpha=.2),
dpi=600,
# figure_size=(6,6), # inches
aspect_ratio=1) # height:width
)
p3
import pandas as pd
数据处理
读取源数据并查看
df=pd.read_csv(