ggplot2对豆瓣top250的简单分析
首先:数据导入及清洗
data<-read.csv("豆瓣排行榜.csv",encoding = "UTF-8")
mat1<-data.frame(data$电影名字,data$评价人数)#评价人数表
mat2<-data.frame(data$电影名字,data$评分)#评分情况
mat3<-data.frame(data$电影名字,data$上映年份)#年份表
0.词云图 根据评分人数设置
先导入所需包
library(webshot)
library(wordcloud2) #加载ibrary(webshot)
library(htmlwidgets)
library(ggplot2)
进行绘制词云图
plot0<- wordcloud2(mat1,
size = 0.1,shape = "pentagon",backgroundColor = "grey",
minRotation = 30,maxRotation = 45,rotateRatio = 0.3,
color = "random-light") + WCtheme(1)
saveWidget(plot0,"tmp.html",selfcontained = F) #先保存为网页格式
webshot("tmp.html","词云图.jpg", delay = 3,vwidth = 1000, vheight=1000) #在依据网页格式生成jpg图片格式
运行最终结果:
1.电影评分分析 | 柱状图
p1<-data.frame(table(data$评分))
plot1<-ggplot(data=p1,mapping=aes(x=Var1,y=Freq,fill=Var1,group=factor(1)))+
geom_bar(stat="identity",width=0.8,position = 'stack') + #基础图表
theme_bw()+
labs(x="评分",y="数量",fill="图例",title="豆瓣电影评分数据概述")+
theme(#标题字体设置
text=element_text(size=12),
#设置标题居中hjust = 0.5
plot.title = element_text(hjust = 0.5,vjust = 0),
#Y轴字体设
#X轴字体设angle:调整横轴标签倾斜角度
#hjust:上下移动横轴标
axis.text.x=element_text(size=12, color = "black",angle = 45, hjust = 0.5,
vjust = 0.5),
#图例的标题字体设置,可以修改colour、size
legend.title=element_text(size=12))+ labs(x="评分",y="数量",title="电影评分分析")
plot1
ggsave("电影评分分析.png", plot = plot1,scale = 1,dpi = 1000)
2.电影上映年份分析 | 时间折线
table2<-table(data$上映年份)
p2<-data.frame(table2)
p2<-p2[-11,]
rownames(p2)<- 1:nrow(p2)
library(ggplot2)
plot2<- ggplot(data = p2 , mapping = aes(x = Var1, y = Freq)) +
geom_point()+geom_line(data=p2,aes(x=Var1, y=Freq),
group = 1,size=0.5,linetype="solid",
colour="red")+
theme( #标题字体设置
text=element_text(size=10),
#设置标题居中
plot.title = element_text(hjust = 0.5,vjust = 0),
#Y轴字体设
#X轴字体设angle:调整横轴标签倾斜角度
#hjust:上下移动横轴标
axis.text.x=element_text(size=5, color = "black",
angle = 90, hjust = 0.5,vjust = 0.5))+
labs(x="上映年份",y="数量",title="电影上映年份分析")
plot2
ggsave("电影上映年份分析.png", plot = plot2,scale = 1,dpi = 1000)
3.饼图 |根据评分数据划分图形
summary(mat2$data.评分)
结果:
#Min. 1st Qu. Median Mean 3rd Qu. Max.
#8.300 8.700 8.900 8.912 9.100 9.700
group1<-cut(mat2$data.评分,c(seq(8,10,1)),include.lowest = TRUE)
group1<-data.frame(group1)#分组统计
group1
percentage <- scales::percent(p1$Freq / sum(p1$Freq)) #计算百分比,利用scales包的percent()函数,将计算的小数比例转换成百分
library(ggplot2)
label_value <- paste('(', round(p1$Freq/sum(p1$Freq) * 100, 1), '%)', sep = '')
label_value#将百分比表示出来
label <- paste(p1$Var1, label_value, sep = '')
label#下面还需要为这些百分比值对应到各个组
library(ggplot2)
blank_theme <- theme_minimal()+
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
panel.border = element_blank(),
panel.grid=element_blank(),
axis.ticks = element_blank(),
plot.title=element_text(size=14, face="bold")
)
plot3<-ggplot(data=p1,mapping=aes(x='Var',y=Freq,fill=Var1))+
geom_bar(stat="identity",width=1,position = 'stack')+coord_polar(theta='y')+
labs(x = '', y = '', title = '豆瓣评分统计')+blank_theme+
# theme(axis.text = element_blank(),axis.ticks = element_blank()
# plot.title = element_text(hjust = 0.5,vjust = 0.5),)+
scale_fill_discrete(labels = label)
plot3
ggsave("豆瓣评分统计.png", plot = plot3,scale = 1, dpi = 1000)
运行所得: