1,环境:RStudio
安装wordcloud2和jieBar包:
library(wordcloud2) #词云
library(jiebaR) #分词
2,准备分词:
#定义jieBar分词引擎,stop_word貌似没有用。。。,因为是中文文档,encoding为UTF-8:
mixseg=worker(type = "mix", dict = DICTPATH, hmm = HMMPATH, user = USERPATH,
idf = IDFPATH, stop_word = 'C:/Users/xutao/Desktop/stopw.txt', write = T, qmax = 20, topn = 5,
encoding = "UTF-8", detect = T, symbol = F, lines = 1e+05,
output = NULL, bylines = F, user_weight = "max")
3,对txt进行分词:
mixseg <= "C:/Users/xutao/Desktop/test.txt"
#这行会把分词后的txt输出。
3,读入txt:
#test-4-2.txt是上面输出的分词后的txt。
data <- scan('C:/Users/xutao/Desktop/test-4-2.txt',sep='',what="",encoding="UTF-8")
#sep这条要加上,表明以空行分割,不然会出错,what=""以字符串读入,encoding为UTF-8.
4,建立词频?:
library(plyr)
tableWork <- table(data)
5,排序取前100:
tableWork <- sort(tableWork, decreasing = TRUE)[1:100]
6,做词云1:
wordcloud2(tableWork,size = 1,shape='star',color = 'random-light', backgroundColor = "black",fontFamily = "微软雅黑")
#字体大小size为1,越小出来的图也越小,建议设置1以上,shape为内置输出图形。
7,自定义形状,要黑白图形
wordcloud2(tableWork,figPath="C:/Users/xutao/Documents/R/win-library/3.4/wordcloud2/examples/R1.png",size=0.8, backgroundColor = "black")
#figPath为自己的图片地址,通过调整size达到较好效果。
8,字符形状:
letterCloud(tableWork,word='Z',wordSize = 0.5,color = 'random-light')
#word是显示的字符,没发现怎么修改背景颜色,有时间再说吧。