R 数据打标签+分词性绘制自定义形状词云图

library(readr)
library(readxl)
library(stringr)
set.seed(1)
a<-read_excel("C:\\Users\\lengs\\Desktop\\真的评论原始数据.xlsx")
b<-read_excel("C:\\Users\\lengs\\Desktop\\抽样出来的315家店.xlsx")
b$brand=1
b=b[-149,]
a$brand=1
a$y=0
a=a[-which(a$店铺序号num==149),]
a[a$店铺序号num<=105,18]=1
a[(a$店铺序号num>105)&(a$店铺序号num<=210),18]=2
a[(a$店铺序号num>210)&(a$店铺序号num<=315),18]=3
myFun<-function(x){
  if(str_detect(x[13], "华为")){
    brand="华为"
  }
  else if(str_detect(x[13], "联想")){
    brand="联想"
  }
  else if(str_detect(x[13], "惠普")){
    brand="惠普"
  }
  else if(str_detect(x[13], "华硕")){
    brand="华硕"
  }
  #  else if(str_detect(x[13], "苹果")){
  #    brand="苹果"
  # }
  #  else if(str_detect(x[13], "小米")){
  #   brand="小米"
  #}
  else if(str_detect(x[13], "机械革命")||str_detect(x[13], "机械师")){
     brand="游戏类其他"
  }
  else if(str_detect(x[13], "ROG")||str_detect(x[13], "雷神")||str_detect(x[13], "雷蛇")||str_detect(x[13], "微星")||str_detect(x[13], "外星人")){
    brand="游戏类其他"
  }
  #  else if(str_detect(x[13], "神舟")){
  #    brand="神舟"
  #  }
  else if(str_detect(x[13], "宏基")||(str_detect(x[13], "宏碁"))){
    brand="其他"
  }
#  else if(str_detect(x[13], "外星人")){
#    brand="外星人"
#  }
#  else if(str_detect(x[13], "微星")){
#    brand="微星"
#  }
  else if(str_detect(x[13], "戴尔")){
    brand="戴尔"
  }
  else{
    brand="其他"
  }
}
b$brand=apply(b,1,myFun)
a$brand=apply(a,1,myFun)
b$y=0
b[1:105,18]=1
b[105:209,18]=2
b[209:314,18]=3
#readr::write_excel_csv(b, file = "C:\\Users\\lengs\\Desktop\\带牌子.xlsx")
#readr::write_excel_csv(a, file = "C:\\Users\\lengs\\Desktop\\真的有牌子评论.xlsx")

library(jiebaR)
library(jiebaRD)#用于分词
library(ggplot2)#用于作图
library(tidyverse)#enframe函数需要用到
library(dplyr)#用于使用过滤函数filter()
library(wordcloud2)
library(Rwordseg)
library(tmcn)
library(tm)
#另存数据
comment = data.frame(a[,5])#另存为数据,只保留标题一栏,使不破坏原数据
seg <- worker('tag')
seg_question = segment(comment$评价内容,seg) # 对所有的标题进行中文分词。

#seg_question#显示标题中所有的词语及其词性,这一过程需要耗时15秒

#str(seg_question)#查看数据类型
stopwords <- read.table("C:\\Users\\lengs\\Desktop\\hit_stopwords.txt",quote = "",fileEncoding = "UTF-8")#去除未知编码字符
class(stopwords) 
stopwords <- as.vector(stopwords[,1]) 
#stopwords<-stopwords[Encoding(stopwords)!='unknown']
processeddata <- removeWords(seg_question,stopwords)
#Encoding(seg_question) <- "UTF-8"

freq<-table(processeddata) #词频统计
freq= data.frame(freq) #查看词频统计结果
freq=freq[freq$Freq>10,]
wordcloud2(freq,shape='star',) #绘制词云
#分词性
title_table <- enframe(seg_question)
adj_question <- filter(.data=title_table,name == "v")
processeddata <- removeWords(adj_question$value,stopwords)
freq<-table(processeddata) #词频统计
freq= data.frame(freq) #查看词频统计结果
freq=freq[-which(freq$processeddata==''),]
#freq=freq[freq$Freq>5,]
wordcloud2(freq,shape='dog') #绘制词云
js_color_fun = "function (word, weight) { if(weight > 300) {return '#f02222'} else if(weight > 200) {return '#ee7558'} else if(weight > 100){return'#c09292'} else{return '#b6b6b6'}}" 
jingdong = system.file("examples/京东东.png",package = "wordcloud2")
my_graph<-wordcloud2(freq, fontFamily = "微软雅黑",figPath  = jingdong,size=1,color = htmlwidgets::JS(js_color_fun))
my_graph


#library(webshot)
#library(htmlwidgets)
#saveWidget(my_graph,"tmp.html",selfcontained = F) #先保存为网页格式
#webshot("tmp.html","wordcloud.jpg", delay = 3,vwidth = 1000, vheight=1000) #在依据网页格式生成jpg图片格式
#去除图例
# ggplot(ToothGrowth, aes(x = dose, y = len))+ 
#   geom_boxplot(aes(fill = dose), show.legend = FALSE) +
#   scale_fill_viridis_d()
 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值