如何利用贝叶斯算法来判断一条信息是否是垃圾短信。
代码如下:
data<-read.csv("sms_spam.csv",stringsAsFactors = FALSE)
str(data)
data
data$type<-factor(data$type)
data$text<-iconv(x = data$text,from = "WINDOWS-1252",to = "UTF-8")
library(tm)
data_corpus_text<-Corpus(VectorSource(data$text))
data_corpus_text_train<-data_corpus_text[1:5000]
data_corpus_text_test<-data_corpus_text[5001:5558]
dtm_train<-DocumentTermMatrix(x = data_corpus_text_train)
dtm_test<-DocumentTermMatrix(x = data_corpus_text_test)
inspect(dtm_test)
conver_to_matrix<-function(x){
x<-ifelse(x>0,1,0)
x<-factor(x,levels = c(0,1),labels = c("\"No\"","\"Yes\""))
return(x)
}
m_dtm_train<-apply(X = dtm_train,MARGIN = 2,FUN = conver_to_matrix)
m_dtm_test<-apply(X = dtm_test,MARGIN = 2,FUN = conver_to_matrix)
library(e1071)
classifi