#加载所需包
library("jiebaRD")
library("jiebaR")
library('stringi')
library('pbapply')
library('cidian')
library(wordcloud2)
data<-readLines(con <- file("123.txt", encoding = "UTF-8"))
mixseg <- worker()
#专用词
dic <- c('令狐冲',
'任盈盈',
'岳灵珊',
'林平之',
'岳不群',
'左冷禅',
'任我行',
'辟邪剑谱',
'葵花宝典',
'日月神教',
'华山派',
'大师兄')
new_user_word(mixseg, dic)
seg <- segment(data, mixseg)
seg
#停用词
stopwords <- readLines('stop_word.txt',encoding = 'UTF-8', warn = FALSE)
seg <- filter_segment(seg, stopwords)
wordfreqs <- jiebaR::freq(seg) # 统计词频,明确指定使用jiebaR包中的freq函数,而不是其他可能存在同名函数的包中的函数。
wordfreqs <- dplyr::arrange(wordfreqs,-freq) # 将词频由高到低展示
head(wordfreqs,200) # 看看前20个词频是什么!
#词云图
colorVec = rep(c('red', 'skyblue'), length.out=nrow(wordfreqs))
my_graph <- wordcloud2(
data=wordfreqs,color = colorVec,
fontWeight = "bold",shape = "star",
size=0.8)
my_graph
wordcloud2(wordfreqs,backgroundColor = 'black',
color = ifelse(wordfreqs[, 2] > 1100, '#f02222', '#c09292'))

