library(tm)
library(ggplot2)
spam.path<-"data/spam/"
get.msg<-function(path){
con<-file(path,open="rt",encoding="latin1")
text<-readLines(con)
msg<-text[seq(which(text=="")[1]+1,length(text),1)]
close(con)
return(paste(msg,collapse="\n"))
}
spam.docs<-dir(spam.path)
spam.docs<-spam.docs[which(spam.docs!="cmds")]
all.spam<-sapply(spam.docs,
function(p) get.msg(paste(spam.path,p,sep="")))
运行到data/spam/00035.7ce3307b56dd90453027a6630179282e时出错,提示:
Error in seq.default(which(text == "")[1] + 1, length(text), 1) :
'from' must be a finite number
: Warning messages:
1: In readLines(con) :
'data/spam/00006.5ab5620d3d7c6c0db76234556a16f6c1'
2: In readLines(con) :
'data/spam/00009.027bf6e0b0c4ab34db3ce0ea4bf2e