R语言读取文件报错之一:incomplete final line found by readTableHeader on ‘xxxx.txt‘
遇到的问题及解决:
载入了名字空间‘ellipsis’ 0.3.1,但需要的是>= 0.3.2
频数提取代码:
install.packages('dplyr')
install.packages('jiebaR')
library(dplyr)
setwd('E:\\a乱七八糟\\r语言作业\\资料') #将工作文件夹转换到当前目录
stopwords <- readtext::readtext("stopwords.txt") %>%
as.character() %>%
stringr::str_split('\n') %>%
unlist()
#tolower(stopwords)
#显示前50个停用词
head(stopwords, n=50)
library(jiebaR)
#告诉worker停用词表的位置
tokenizer <- worker(stop_word = 'stopwords.txt')
#读取三体.txt为字符串
text <- readtext::readtext("debate.txt") %>% as.character()
#分词
words <- segment(text, tokenizer)
#显示分词结果的前20个词
head(words, n=20)
new_words <- c()
for (word in words) {
if (!word %in% stopwords){
new_words <- c(new_words, word)
}
}
head(new_words)
wordfreqs <- jiebaR::freq(new_words)
wordfreqs <- dplyr::arrange(wordfreqs, -freq)
#wordfreqs
library('wordcloud')
wordcloud(wordfreqs$char , wordfreqs$freq, colors = rainbow(300), random.order=F)
关键词代码
install.packages('pacman')
library(pacman)
library(dplyr)
p_load(tidyverse,tidytext,textrank,rio,jiebaR)
p_load(cidian)
wordfreqs %>% mutate(id = 1:1) -> f_table
f_table %>% bind_tf_idf(term = char,document = id,n = freq) -> tf_idf_table
tf_idf_table %>%
group_by(id) %>%
top_n(3,tf) %>%
ungroup() -> top3
wordcloud(top3$char , top3$freq, colors = rainbow(30), random.order=F)
参考资料:
R语言自然语言处理:关键词提取(TF-IDF)
情感分析代码
install.packages("tidyr")
library(tidyr)
get_sentiments("bing")
wordfreqs %>% mutate(id = 1:n()) -> wordfreqs
woman_D<-inner_join(wordfreqs,get_sentiments("bing"), by = c("char" = "word"))
woman_E<-count(woman_D,index=id%/%40,sentiment)
woman_F<-spread(woman_E,sentiment,n)
woman_G<-mutate(woman_F,sentiment=positive-negative)
ggplot(woman_G,aes(index,sentiment))+geom_col()