install.packages("RODBC")
library(RODBC)
fileName <- "宋詞三百首.txt"
#数据获取https://github.com/rime-aca/corpus
SC <- readChar(fileName, file.info(fileName)$size)
getwd()
help(readChar)
substr(SC, 1000, 1100)
install.packages("jiebaR")
library("jiebaR")
cc = worker()
analysis <- as.data.frame(table(cc[SC]))
analysis <- analysis[order(-analysis$Freq),]
names(analysis) <- c("word","freq")
analysis$word <- as.character(analysis$word)
head(analysis)
install.packages("wordcloud2")
library(wordcloud2)
wordcloud2(analysis)
wordcloud2(analysis[analysis$freq>1& analysis$freq < 300 & nchar(analysis$word) == 1,])
wordcloud2(analysis[analysis$freq>1& analysis$freq < 300 & nchar(analysis$word) == 2,])
wordcloud2(analysis[analysis$freq>1 & analysis$freq < 300 & nchar(analysis$word) == 3,])
cipai <- "画堂晨起,来报雪花坠。高卷帘栊 看 佳瑞,皓色远 迷 庭砌。盛气光引 炉烟,素草寒生玉佩。应是天仙狂醉,乱把白云揉碎。"
tagger <- worker("tag")
cipai_2 <- tagger <= cipai
cipai_2
example <- subset(analysis, freq >1 & nchar(word) <3 & freq < 300)
cixing <- attributes(cipai_2)$names
example_2 <- tagger <= example$word
write_songci <- function(m){
set.seed(m)
empty <- ""
for (i in 1:length(cipai_2)){
temp_file <- example_2[attributes(example_2)$name == cixing[i]]
temp_file <- temp_file[nchar(temp_file) == nchar(cipai_2[i])]
empty <- paste0(empty, sample(temp_file,1))
}
result <- paste0(substr(empty, 1,4), ",", substr(empty,5,9),"。",
substr(empty, 10,16), ",", substr(empty, 17,22),"。",
substr(empty, 23,28), ",", substr(empty, 29,34),"。",
substr(empty, 35,40), ",", substr(empty, 41,46),"。")
}
lapply(5:6, write_songci)
