jiebaR
---
title: "景区点评分析"
output: word_document
---
library(jiebaR)
mixseg = worker("tag",stop_word="C:\\Users\\wuchaojin\\Desktop\\stopword.txt")
split_word<-mixseg[as.character(pundat[,1])]
split_word<-freq(split_word)
split_word<-as.data.frame(split_word,responseName="freq")
split_word<-split_word[order(-split_word$freq),]
canyin<-read.csv("C:\\Users\\wuchaojin\\Desktop\\名镇古迹\\餐饮.csv")
fengjing<-read.csv("C:\\Users\\wuchaojin\\Desktop\\名镇古迹\\风景景点.csv")
fuwu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\名镇古迹\\服务.csv")
ruyuan<-read.csv("C:\\Users\\wuchaojin\\Desktop\\名镇古迹\\购票入园.csv")
gouwu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\名镇古迹\\购物.csv")
guanli<-read.csv("C:\\Users\\wuchaojin\\Desktop\\名镇古迹\\管理.csv")
huanjing<-read.csv("C:\\Users\\wuchaojin\\Desktop\\名镇古迹\\环境.csv")
jiage<-read.csv("C:\\Users\\wuchaojin\\Desktop\\名镇古迹\\价格.csv")
jiaotong<-read.csv("C:\\Users\\wuchaojin\\Desktop\\名镇古迹\\交通.csv")
sheshi<-read.csv("C:\\Users\\wuchaojin\\Desktop\\名镇古迹\\设施设备.csv")
tianqi<-read.csv("C:\\Users\\wuchaojin\\Desktop\\名镇古迹\\天气.csv")
zhusu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\名镇古迹\\住宿.csv")
split_word$category[split_word$char %in% canyin$word ]<-"餐饮"
split_word$category[split_word$char %in% fengjing$word ]<-"风景景点"
split_word$category[split_word$char %in% fuwu$word ]<-"服务"
split_word$category[split_word$char %in% ruyuan$word ]<-"购票入园"
split_word$category[split_word$char %in% gouwu$word ]<-"购物"
split_word$category[split_word$char %in% guanli$word ]<-"管理"
split_word$category[split_word$char %in% huanjing$word ]<-"环境"
split_word$category[split_word$char %in% jiage$word ]<-"价格"
split_word$category[split_word$char %in% jiaotong$word ]<-"交通"
split_word$category[split_word$char %in% sheshi$word ]<-"设施"
split_word$category[split_word$char %in% tianqi$word ]<-"天气"
split_word$category[split_word$char %in% zhusu$word ]<-"住宿"
freq_data<-aggregate(split_word$freq,by=list(split_word$category),FUN=sum,na.rm=TRUE)
library(reshape)
freq_data_all<-rename(freq_data,c(Group.1="category",x="freq_all"))
freq_data_all<-freq_data_all[order(-freq_data_all$freq_all),]
write.csv(split_word,"new_split_all.csv",row.names = FALSE)
write.csv(freq_data_all,"freq_data_all.csv",row.names = FALSE)
library(jiebaR)
mixseg1 = worker("tag",stop_word="C:\\Users\\wuchaojin\\Desktop\\stopword.txt")
split_word1<-mixseg1[as.character(pundat1[,1])]
split_word1<-freq(split_word1)
split_word1<-as.data.frame(split_word1,responseName="freq")
split_word1<-split_word1[order(-split_word1$freq),]
canyin<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\canyin.csv")
fuwu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\fuwu.csv")
huanjing<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\huanjing.csv")
jiage<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\jiage.csv")
jiaotong<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\jiaotong.csv")
guanli<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\guanli.csv")
sheshi<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\sheshi.csv")
ruyuan<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\ruyuan.csv")
gouwu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\gouwu.csv")
zhusu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\zhusu.csv")
tianqi<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\tianqi.csv")
xiangmu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\xiangmu.csv")
split_word1$category[split_word1$char %in% tianqi$word ]<-"天气"
split_word1$category[split_word1$char %in% canyin$word ]<-"餐饮"
split_word1$category[split_word1$char %in% fuwu$word ]<-"服务"
split_word1$category[split_word1$char %in% xiangmu$word ]<-"娱乐项目"
split_word1$category[split_word1$char %in% jiage$word ]<-"价格"
split_word1$category[split_word1$char %in% jiaotong$word ]<-"交通"
split_word1$category[split_word1$char %in% sheshi$word ]<-"设施"
split_word1$category[split_word1$char %in% ruyuan$word ]<-"购票入园"
split_word1$category[split_word1$char %in% guanli$word ]<-"管理"
split_word1$category[split_word1$char %in% zhusu$word ]<-"住宿"
split_word1$category[split_word1$char %in% huanjing$word ]<-"环境氛围"
split_word1$category[split_word1$char %in% gouwu$word ]<-"购物"
freq_data1<-aggregate(split_word1$freq,by=list(split_word1$category),FUN=sum,na.rm=TRUE)
library(reshape)
freq_data_1<-rename(freq_data1,c(Group.1="category",x="freq_1"))
write.csv(split_word1,"new_split_1.csv",row.names = FALSE)
write.csv(freq_data_1,"freq_data_1.csv",row.names = FALSE)
library(jiebaR)
mixseg2 = worker("tag",stop_word="C:\\Users\\wuchaojin\\Desktop\\stopword.txt")
split_word2<-mixseg2[as.character(pundat2[,1])]
split_word2<-freq(split_word2)
split_word2<-as.data.frame(split_word2,responseName="freq")
split_word2<-split_word2[order(-split_word2$freq),]
canyin<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\canyin.csv")
fuwu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\fuwu.csv")
huanjing<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\huanjing.csv")
jiage<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\jiage.csv")
jiaotong<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\jiaotong.csv")
guanli<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\guanli.csv")
sheshi<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\sheshi.csv")
ruyuan<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\ruyuan.csv")
gouwu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\gouwu.csv")
zhusu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\zhusu.csv")
tianqi<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\tianqi.csv")
xiangmu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\xiangmu.csv")
split_word2$category[split_word2$char %in% tianqi$word ]<-"天气"
split_word2$category[split_word2$char %in% canyin$word ]<-"餐饮"
split_word2$category[split_word2$char %in% fuwu$word ]<-"服务"
split_word2$category[split_word2$char %in% xiangmu$word ]<-"娱乐项目"
split_word2$category[split_word2$char %in% jiage$word ]<-"价格"
split_word2$category[split_word2$char %in% jiaotong$word ]<-"交通"
split_word2$category[split_word2$char %in% sheshi$word ]<-"设施"
split_word2$category[split_word2$char %in% ruyuan$word ]<-"购票入园"
split_word2$category[split_word2$char %in% guanli$word ]<-"管理"
split_word2$category[split_word2$char %in% zhusu$word ]<-"住宿"
split_word2$category[split_word2$char %in% huanjing$word ]<-"环境氛围"
split_word2$category[split_word2$char %in% gouwu$word ]<-"购物"
freq_data2<-aggregate(split_word2$freq,by=list(split_word2$category),FUN=sum,na.rm=TRUE)
library(reshape)
freq_data_2<-rename(freq_data2,c(Group.1="category",x="freq_2"))
write.csv(split_word2,"new_split_2.csv",row.names = FALSE)
write.csv(freq_data_2,"freq_data_2.csv",row.names = FALSE)
library(jiebaR)
mixseg3 = worker("tag",stop_word="C:\\Users\\wuchaojin\\Desktop\\stopword.txt")
split_word3<-mixseg3[as.character(pundat3[,1])]
split_word3<-freq(split_word3)
split_word3<-as.data.frame(split_word3,responseName="freq")
split_word3<-split_word3[order(-split_word3$freq),]
canyin<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\canyin.csv")
fuwu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\fuwu.csv")
huanjing<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\huanjing.csv")
jiage<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\jiage.csv")
jiaotong<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\jiaotong.csv")
guanli<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\guanli.csv")
sheshi<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\sheshi.csv")
ruyuan<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\ruyuan.csv")
gouwu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\gouwu.csv")
zhusu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\zhusu.csv")
tianqi<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\tianqi.csv")
xiangmu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\xiangmu.csv")
split_word3$category[split_word3$char %in% tianqi$word ]<-"天气"
split_word3$category[split_word3$char %in% canyin$word ]<-"餐饮"
split_word3$category[split_word3$char %in% fuwu$word ]<-"服务"
split_word3$category[split_word3$char %in% xiangmu$word ]<-"娱乐项目"
split_word3$category[split_word3$char %in% jiage$word ]<-"价格"
split_word3$category[split_word3$char %in% jiaotong$word ]<-"交通"
split_word3$category[split_word3$char %in% sheshi$word ]<-"设施"
split_word3$category[split_word3$char %in% ruyuan$word ]<-"购票入园"
split_word3$category[split_word3$char %in% guanli$word ]<-"管理"
split_word3$category[split_word3$char %in% zhusu$word ]<-"住宿"
split_word3$category[split_word3$char %in% huanjing$word ]<-"环境氛围"
split_word3$category[split_word3$char %in% gouwu$word ]<-"购物"
freq_data3<-aggregate(split_word3$freq,by=list(split_word3$category),FUN=sum,na.rm=TRUE)
library(reshape)
freq_data_3<-rename(freq_data3,c(Group.1="category",x="freq_3"))
write.csv(split_word3,"new_split_3.csv",row.names = FALSE)
write.csv(freq_data_3,"freq_data_3.csv",row.names = FALSE)
library(jiebaR)
mixseg4 = worker("tag",stop_word="C:\\Users\\wuchaojin\\Desktop\\stopword.txt")
split_word4<-mixseg4[as.character(pundat4[,1])]
split_word4<-freq(split_word4)
split_word4<-as.data.frame(split_word4,responseName="freq")
split_word4<-split_word4[order(-split_word4$freq),]
canyin<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\canyin.csv")
fuwu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\fuwu.csv")
huanjing<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\huanjing.csv")
jiage<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\jiage.csv")
jiaotong<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\jiaotong.csv")
guanli<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\guanli.csv")
sheshi<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\sheshi.csv")
ruyuan<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\ruyuan.csv")
gouwu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\gouwu.csv")
zhusu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\zhusu.csv")
tianqi<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\tianqi.csv")
xiangmu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\xiangmu.csv")
split_word4$category[split_word4$char %in% tianqi$word ]<-"天气"
split_word4$category[split_word4$char %in% canyin$word ]<-"餐饮"
split_word4$category[split_word4$char %in% fuwu$word ]<-"服务"
split_word4$category[split_word4$char %in% xiangmu$word ]<-"娱乐项目"
split_word4$category[split_word4$char %in% jiage$word ]<-"价格"
split_word4$category[split_word4$char %in% jiaotong$word ]<-"交通"
split_word4$category[split_word4$char %in% sheshi$word ]<-"设施"
split_word4$category[split_word4$char %in% ruyuan$word ]<-"购票入园"
split_word4$category[split_word4$char %in% guanli$word ]<-"管理"
split_word4$category[split_word4$char %in% zhusu$word ]<-"住宿"
split_word4$category[split_word4$char %in% huanjing$word ]<-"环境氛围"
split_word4$category[split_word4$char %in% gouwu$word ]<-"购物"
freq_data4<-aggregate(split_word4$freq,by=list(split_word4$category),FUN=sum,na.rm=TRUE)
library(reshape)
freq_data_4<-rename(freq_data4,c(Group.1="category",x="freq_4"))
write.csv(split_word4,"new_split_4.csv",row.names = FALSE)
write.csv(freq_data_4,"freq_data_4.csv",row.names = FALSE)
library(jiebaR)
mixseg5 = worker("tag",stop_word="C:\\Users\\wuchaojin\\Desktop\\stopword.txt")
split_word5<-mixseg5[as.character(pundat5[,1])]
split_word5<-freq(split_word5)
split_word5<-as.data.frame(split_word5,responseName="freq")
split_word5<-split_word5[order(-split_word5$freq),]
canyin<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\canyin.csv")
fuwu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\fuwu.csv")
huanjing<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\huanjing.csv")
jiage<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\jiage.csv")
jiaotong<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\jiaotong.csv")
guanli<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\guanli.csv")
sheshi<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\sheshi.csv")
ruyuan<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\ruyuan.csv")
gouwu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\gouwu.csv")
zhusu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\zhusu.csv")
tianqi<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\tianqi.csv")
xiangmu<-read.csv("C:\\Users\\wuchaojin\\Desktop\\category\\xiangmu.csv")
split_word5$category[split_word5$char %in% tianqi$word ]<-"天气"
split_word5$category[split_word5$char %in% canyin$word ]<-"餐饮"
split_word5$category[split_word5$char %in% fuwu$word ]<-"服务"
split_word5$category[split_word5$char %in% xiangmu$word ]<-"娱乐项目"
split_word5$category[split_word5$char %in% jiage$word ]<-"价格"
split_word5$category[split_word5$char %in% jiaotong$word ]<-"交通"
split_word5$category[split_word5$char %in% sheshi$word ]<-"设施"
split_word5$category[split_word5$char %in% ruyuan$word ]<-"购票入园"
split_word5$category[split_word5$char %in% guanli$word ]<-"管理"
split_word5$category[split_word5$char %in% zhusu$word ]<-"住宿"
split_word5$category[split_word5$char %in% huanjing$word ]<-"环境氛围"
split_word5$category[split_word5$char %in% gouwu$word ]<-"购物"
freq_data5<-aggregate(split_word5$freq,by=list(split_word5$category),FUN=sum,na.rm=TRUE)
library(reshape)
freq_data_5<-rename(freq_data5,c(Group.1="category",x="freq_5"))
write.csv(split_word5,"new_split_5.csv",row.names = FALSE)
write.csv(freq_data_5,"freq_data_5.csv",row.names = FALSE)
freq_data_all$freq_1<-freq_data_1[match(freq_data_all$category,freq_data_1$category),2]
freq_data_all$freq_2<-freq_data_2[match(freq_data_all$category,freq_data_2$category),2]
freq_data_all$freq_3<-freq_data_3[match(freq_data_all$category,freq_data_3$category),2]
freq_data_all$freq_4<-freq_data_4[match(freq_data_all$category,freq_data_4$category),2]
freq_data_all$freq_5<-freq_data_5[match(freq_data_all$category,freq_data_5$category),2]
freq_data_all$freq_1[is.na(freq_data_all$freq_1)]<-0
freq_data_all$freq_2[is.na(freq_data_all$freq_2)]<-0
freq_data_all$freq_3[is.na(freq_data_all$freq_3)]<-0
freq_data_all$freq_4[is.na(freq_data_all$freq_4)]<-0
freq_data_all$freq_5[is.na(freq_data_all$freq_5)]<-0
total<-freq_data_all
total_amount<-total
total_level<-transform(total,level=1*freq_1/freq_all+2*freq_2/freq_all+3*freq_3/freq_all+4*freq_4/freq_all+5*freq_5/freq_all)
total_l_a<-merge(total_amount[,c(1,2)],total_level[c(1,8)],by="category")
total_l_a<-total_l_a[order(-total_l_a$freq_all),]
&
&
&
&
&
&
overall_level1$星级<-as.numeric(overall_level1$星级)
overall_level2<-round(mean(overall_level1$星级)/5,2)
overall_level3<-aggregate(overall_level1$星级,by=list(overall_level1$时间),FUN=mean,na.rm=TRUE)
library(reshape)
overall_level3<-rename(overall_level3,c(Group.1="时间",x="星级"))
library(ggplot2)
library(gtable)
library(grid)
&
景区关注度趋势图数据:
overall_amount
ggplot(overall_amount,aes(x=factor(时间),y=点评数))+
geom_bar(stat="identity",fill="#BED742")+
geom_text(aes(label=点评数),vjust=-0.4,colour="black",size=3)+
ylim(0,max(overall_amount$点评数)*1.05)+
theme_bw()+
labs(x="日期",y="点评数")+
ggtitle(paste("景区关注度趋势图 总点评数:",comment_num,sep=""))+
theme(axis.text.x=element_text(angle=65,hjust=1,vjust=1,size=8),
axis.title.x=element_text(colour="darkgrey",size=10),
axis.title.y=element_text(colour="darkgrey",size=10),
panel.border=element_blank(),
panel.grid.major.x=element_blank(),
panel.grid.minor.x=element_blank(),
panel.grid.major.y=element_blank(),
panel.grid.minor.y=element_blank(),
axis.line=element_line(color="gray",size=1))
&
景区满意度趋势图数据:
overall_level3
#折线图
library(ggplot2)
ggplot(overall_level3,aes(x=factor(时间),y=星级/5,group=1))+
geom_line(color="#BED742")+geom_point(color="#BED742")+
geom_text(aes(label=round(星级/5,2)),vjust=-0.4,colour="black",size=3)+
ylim(min(overall_level3$星级/5)*0.9,max(overall_level3$星级/5)*1.05)+
theme_bw()+
labs(x="日期",y="满意度")+
ggtitle(paste("景区满意度趋势图 总体满意度",round(mean(overall_level1$星级)/5,2),sep=""))+
theme(axis.text.x=element_text(angle=65,hjust=1,vjust=1,size=8),
axis.title.x=element_text(colour="darkgrey",size=9),
axis.title.y=element_text(colour="darkgrey",size=9),
panel.border=element_blank(),
panel.grid.major.x=element_blank(),
panel.grid.minor.x=element_blank(),
panel.grid.major.y=element_blank(),
panel.grid.minor.y=element_blank())
#### 3.2 维度细分
  通过下图景区各维度满意度分布,(可见游客对景区的XX最为关注,平均每条评论提及XX次。其次是XX、XX以及XX,人均提及近XX次。这充分反映了游客对该景区以及该类景区的热切关注点以及潜在需求点。合理丰富娱乐项目,加强园内设施及管理,合理设置调整门票及园内项目价格,重视园内餐饮,根据顾客需求有针对性发展及创新。)
景区各维度关注度及满意度数据:
#total_data<-read.csv("D:\\Documents\\total_l_a.csv")
total_l_a
library(gcookbook)
library(ggplot2)
ggplot(total_l_a,aes(x=reorder(category,-freq_all),y=freq_all/table(scenic)))+
geom_bar(stat="identity",fill="#BED742")+
geom_text(aes(label=round(total_l_a$freq_all/table(scenic),2)),vjust=-0.4,colour="black",size=4)+
ylim(0,max(total_l_a$freq_all/table(scenic))*1.1)+
theme_bw()+
labs(x="景区维度",y="平均单条点评提及频次")+
ggtitle("景区各维度关注度")+
theme(axis.text.x=element_text(angle=60,hjust=1,vjust=1,size=10),
axis.title.x=element_text(colour="darkgrey",size=10),
axis.title.y=element_text(colour="darkgrey",size=10),
panel.border=element_blank(),
panel.grid.major.x=element_blank(),
panel.grid.minor.x=element_blank(),
panel.grid.major.y=element_blank(),
panel.grid.minor.y=element_blank(),
axis.line.y=element_line(color="gray",size=1),
axis.line.x=element_line(color="gray",size=1))
  (景区各维度满意度整体分布在XX-XX之间,其中,对娱乐项目关注度最高且满意度也较高达XX,管理及设施方面游客也极为关注但满意度较低,景区应提起对这两方面的重视,选择合理化的园内管理方式,改善园内设施,环境卫生以及服务质量用户体验较差,急需提升。)
ggplot(total_l_a,aes(reorder(x=category,-freq_all),y=level/5,group=1))+geom_line(color="#BED742")+
geom_text(aes(label=round(level/5,2)),vjust=-0.4,colour="black",size=3.5)+
ylim(min(total_l_a$level/5)*0.95,max(total_l_a$level/5)*1.05)+
geom_point(color="#BED742")+
theme_bw()+
labs(x="景区维度",y="满意度")+
ggtitle("景区各维度满意度")+
theme(axis.text.x=element_text(angle=60,hjust=1,vjust=1,size=10),
axis.title.x=element_text(colour="darkgrey",size=10),
axis.title.y=element_text(colour="darkgrey",size=10),
panel.border=element_blank(),
panel.grid.major.x=element_blank(),
panel.grid.minor.x=element_blank(),
panel.grid.major.y=element_blank(),
panel.grid.minor.y=element_blank())
#### 3.3 具体问题反馈
  通过词云展现了点评高频词汇,可直观展现游客所关注的问题有哪些,(娱乐项目维度有热带风暴、大喇叭、深海巨蟒、滑道等比较受关注,管理方面排队问题备受关注,拖鞋、储物柜、更衣室、柜子等词出现频繁,说明储物设施方面也应着重管理等。)
data<-read.csv("D:\\Documents\\new_split_all.csv")
library(wordcloud)
head(data)
wordcloud(data$char,data$freq,min.freq=max(data$freq)*0.03,random.order=FALSE,random.color=TRUE,
colors=c('pink','red','blue','green','yellow','purple','beige','brown','peru','khaki'))
  下面是针对一星差评点评做出的词云统计,(去除整体点评用户均会涉及的“项目”,可见“排队”,“插队”, “人多”, “服务“,“工作人员”,“垃圾”,“脏”,“恶心”,“设施”等词汇在差评中出现频次颇多,)在一定程度上反映了一些急需解决的实际问题,同时也印证了3.2中各景区维度满意度的评估。
data1<-read.csv("D:\\Documents\\new_split_1.csv")
wordcloud(data1$char,data$freq,min.freq=max(data$freq)*0.001,max.words=50,random.order=FALSE,random.color=TRUE,
colors=c('pink','red','blue','green','yellow','purple','beige','brown','peru','khaki'))
Rwordseg
reviewpath <- "D:/work/桌面/点评报告/情感分析"
completepath <- list.files(reviewpath, pattern = "*.txt$", full.names = TRUE)
####批量读入文本
read.txt <- function(x) {
des <- readLines(x) #每行读取
return(paste(des, collapse = "")) #没有return则返回最后一个函数对象
}
review <- lapply(completepath, read.txt)
#如果程序警告,这里可能是部分文件最后一行没有换行导致,不用担心
#中文主要有知网整理的情感词典Hownet和台湾大学整理发布的NTUSD两个情感词典,还有哈工大信息检索研究室开源的《同义词词林》可以用于情感词典的扩充。
setwd("D:\\work\\桌面\\点评报告\\情感分析\\中文情感极性词典")
library(openxlsx)
posneg<-read.xlsx("posneg.xlsx",1)
dict<-posneg[,"term"]
install.packages("Rwordseg", repos = "http://R-Forge.R-project.org")
library(Rwordseg)
#listDict() #查看已有词库
#uninstallDict() #删除安装的词典
insertWords(dict)
train.test<-read.xlsx("D:\\work\\桌面\\点评报告\\情感分析\\test1.xlsx",1)
sentence <- as.vector(train.test$点评内容) #文本内容转化为向量sentence
sentence <- gsub("[[:digit:]]*", "", sentence) #清除数字[a-zA-Z]
sentence <- gsub("[a-zA-Z]", "", sentence) #清除英文字符
sentence <- gsub("\\.", "", sentence) #清除全英文的dot符号
train.test <- train.test[!is.na(sentence), ] #清除一些空值文本(文本名)
sentence<- sentence[!is.na(sentence)] #清除对应sentence里面的空值(文本内容),要先执行文本名
train.test<- train.test[!nchar(sentence) < 2, ] #筛选字符数小于2的文本
sentence<- sentence[!nchar(sentence) < 2] #`nchar`函数对字符计数,英文叹号为R语言里的“非”函数
system.time(x <- segmentCN(strwords = sentence))
#每次可能耗费时间较长的过程,都要使用少量数据预估一下时间,这是一个优秀的习惯
temp <- lapply(x, length) #每一个元素的长度,即文本分出多少个词
head(testterm,100)
head(x)
temp<- unlist(temp) #lapply返回的是一个list,所以3行unlist
id <- rep(test[, "id"], temp) #将每一个对应的id复制相应的次数,就可以和词汇对应了
label <- rep(test[, "label"], temp)#id对应的情感倾向标签复制相同的次数
term <- unlist(x) #将list解散为向量
testterm <- as.data.frame(cbind(id, term, label), stringsAsFactors = F)
#将一一对应的三个向量按列捆绑为数据框,分词整理就基本结束了
x<- unlist(x)
x<-data.frame(x)
names(x)<-'term'
library(plyr)
testterm <- join(x, posneg)
testterm <- testterm[!is.na(testterm$weight), ]
head(testterm)
segmentCN(strwords = "地址是圣诺亚大厦",returnType="tm")