R 数据打标签+分词性绘制自定义形状词云图

最新推荐文章于 2024-05-29 09:51:42 发布

我心马拉维

最新推荐文章于 2024-05-29 09:51:42 发布

阅读量402

点赞数

文章标签： r语言华为开发语言

本文链接：https://blog.csdn.net/weixin_44140819/article/details/121094596

版权

library(readr)
library(readxl)
library(stringr)
set.seed(1)
a<-read_excel("C:\\Users\\lengs\\Desktop\\真的评论原始数据.xlsx")
b<-read_excel("C:\\Users\\lengs\\Desktop\\抽样出来的315家店.xlsx")
b$brand=1
b=b[-149,]
a$brand=1
a$y=0
a=a[-which(a$店铺序号num==149),]
a[a$店铺序号num<=105,18]=1
a[(a$店铺序号num>105)&(a$店铺序号num<=210),18]=2
a[(a$店铺序号num>210)&(a$店铺序号num<=315),18]=3
myFun<-function(x){
if(str_detect(x[13], "华为")){
brand="华为"
}
else if(str_detect(x[13], "联想")){
brand="联想"
}
else if(str_detect(x[13], "惠普")){
brand="惠普"
}
else if(str_detect(x[13], "华硕")){
brand="华硕"
}
# else if(str_detect(x[13], "苹果")){
# brand="苹果"
# }
# else if(str_detect(x[13], "小米")){
# brand="小米"
#}
else if(str_detect(x[13], "机械革命")||str_detect(x[13], "机械师")){
brand="游戏类其他"
}
else if(str_detect(x[13], "ROG")||str_detect(x[13], "雷神")||str_detect(x[13], "雷蛇")||str_detect(x[13], "微星")||str_detect(x[13], "外星人")){
brand="游戏类其他"
}
# else if(str_detect(x[13], "神舟")){
# brand="神舟"
# }
else if(str_detect(x[13], "宏基")||(str_detect(x[13], "宏碁"))){
brand="其他"
}
# else if(str_detect(x[13], "外星人")){
# brand="外星人"
# }
# else if(str_detect(x[13], "微星")){
# brand="微星"
# }
else if(str_detect(x[13], "戴尔")){
brand="戴尔"
}
else{
brand="其他"
}
}
b$brand=apply(b,1,myFun)
a$brand=apply(a,1,myFun)
b$y=0
b[1:105,18]=1
b[105:209,18]=2
b[209:314,18]=3
#readr::write_excel_csv(b, file = "C:\\Users\\lengs\\Desktop\\带牌子.xlsx")
#readr::write_excel_csv(a, file = "C:\\Users\\lengs\\Desktop\\真的有牌子评论.xlsx")

library(jiebaR)
library(jiebaRD)#用于分词
library(ggplot2)#用于作图
library(tidyverse)#enframe函数需要用到
library(dplyr)#用于使用过滤函数filter()
library(wordcloud2)
library(Rwordseg)
library(tmcn)
library(tm)
#另存数据
comment = data.frame(a[,5])#另存为数据，只保留标题一栏，使不破坏原数据
seg <- worker('tag')
seg_question = segment(comment$评价内容,seg) # 对所有的标题进行中文分词。

#seg_question#显示标题中所有的词语及其词性,这一过程需要耗时15秒

#str(seg_question)#查看数据类型
stopwords <- read.table("C:\\Users\\lengs\\Desktop\\hit_stopwords.txt",quote = "",fileEncoding = "UTF-8")#去除未知编码字符
class(stopwords)
stopwords <- as.vector(stopwords[,1])
#stopwords<-stopwords[Encoding(stopwords)!='unknown']
processeddata <- removeWords(seg_question,stopwords)
#Encoding(seg_question) <- "UTF-8"

freq<-table(processeddata) #词频统计
freq= data.frame(freq) #查看词频统计结果
freq=freq[freq$Freq>10,]
wordcloud2(freq,shape='star',) #绘制词云
#分词性
title_table <- enframe(seg_question)
adj_question <- filter(.data=title_table,name == "v")
processeddata <- removeWords(adj_question$value,stopwords)
freq<-table(processeddata) #词频统计
freq= data.frame(freq) #查看词频统计结果
freq=freq[-which(freq$processeddata==''),]
#freq=freq[freq$Freq>5,]
wordcloud2(freq,shape='dog') #绘制词云
js_color_fun = "function (word, weight) { if(weight > 300) {return '#f02222'} else if(weight > 200) {return '#ee7558'} else if(weight > 100){return'#c09292'} else{return '#b6b6b6'}}"
jingdong = system.file("examples/京东东.png",package = "wordcloud2")
my_graph<-wordcloud2(freq, fontFamily = "微软雅黑",figPath = jingdong,size=1,color = htmlwidgets::JS(js_color_fun))
my_graph

#library(webshot)
#library(htmlwidgets)
#saveWidget(my_graph,"tmp.html",selfcontained = F) #先保存为网页格式
#webshot("tmp.html","wordcloud.jpg", delay = 3,vwidth = 1000, vheight=1000) #在依据网页格式生成jpg图片格式
#去除图例
# ggplot(ToothGrowth, aes(x = dose, y = len))+
# geom_boxplot(aes(fill = dose), show.legend = FALSE) +
# scale_fill_viridis_d()

我心马拉维

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
R 数据打标签+分词性绘制自定义形状词云图

library(readr)library(readxl)library(stringr)set.seed(1)a<-read_excel("C:\\Users\\lengs\\Desktop\\真的评论原始数据.xlsx")b<-read_excel("C:\\Users\\lengs\\Desktop\\抽样出来的315家店.xlsx")b$brand=1b=b[-149,]a$brand=1a$y=0a=a[-which(a$店铺序号num==149),]a[a$店铺
复制链接

扫一扫