song学习词云图

最新推荐文章于 2024-07-17 17:53:42 发布

楚漪菡

最新推荐文章于 2024-07-17 17:53:42 发布

阅读量53

点赞数

文章标签：大数据 java

本文链接：https://blog.csdn.net/m0_63650993/article/details/125446158

版权

splitwords <- function(x)
{
substring(x, 1:(nchar(x)-1), 2:nchar(x))
#substring("abcdef",1:5, 2:6)
}

#调用
txt <- read.csv("SongPoem.csv",colClasses="character")
# 句子用标点符号分割
sentences <- strsplit(txt$Sentence,"，|。|！|？|、")
sentences <- unlist(sentences)
sentences <- sentences[sentences!=""]
s.len <- nchar(sentences)

# 单句太长了说明有可能是错误的字符，去除掉
sentences <- sentences[s.len<=15]
s.len <- nchar(sentences)

#词频统计
words <- mapply(splitwords,sentences,SIMPLIFY=TRUE,USE.NAMES=FALSE)
words <- unlist(words)
words.freq <- table(words)
words.freq <- sort(words.freq,decreasing=TRUE)
words.freq[1:100]

#画词云
require(wordcloud2)
wf <- words.freq[2:500]
d <- data.frame(word = names(wf), freq = as.numeric(wf))
wordcloud2(d, size = 0.5)