R语言实现朴素贝叶斯

#Naive Bayes朴素贝叶斯
library(R6)
NBayes <- R6Class(“NBayes”)
loadDataSet <- function(){
postingList<-list(
c(‘my’,‘dog’,‘has’,‘flea’,‘problems’,‘help’,‘please’),
c(‘maybe’,‘not’,‘take’,‘him’,‘to’,‘dog’,‘park’,‘stupid’),
c(‘my’,‘dalmation’,‘is’,‘so’,‘cute’,‘I’,‘love’,‘him’),
c(‘stop’,‘posting’,‘stupid’,‘worthless’,‘garbage’),
c(‘mr’,‘licks’,‘ate’,‘my’,‘steak’,‘how’,‘to’,‘stop’,‘him’),
c(‘quit’,‘buying’,‘worthless’,‘dog’,‘food’,‘stupid’)
)
postingList
}#文本分析第一步构建词汇,实例的特征、类别
NBayes$set(“private”,“loadDataSet”,loadDataSet,overwrite=T)

createVocabList <- function(dataset){
vocabset <- c()
for(document in dataset){
vocabset <- union(vocabset,document)
}
return(vocabset) }
NBayes$set(“private”,“createVocabList”,createVocabList,overwrite = T)

#依据词汇表构建特征,tm包文本分析
createData <- function(){
setofWords2Vec <- function(vocabList,inputset){
as.numeric(vocabList %in% inputset)
}
tmp <- private l o a d D a t a S e t ( ) v o c a b L i s t &lt; − p r i v a t e loadDataSet() vocabList &lt;- private loadDataSet()vocabList<privatecreateVocabList(tmp)
tmp1 <- lapply(tmp, setofWords2Vec,vocabList=vocabList)
res <- as.data.frame(t(as.data.frame(tmp1)))
row.names(res) <- 1:nrow(res)
names(res) <- vocabList
resKaTeX parse error: Expected 'EOF', got '}' at position 71: …") res }̲ NBayesset(“public”,“createData”,createData,overwrite = T)
nbayes <- NBayes n e w ( ) n b a y e s new() nbayes new()nbayescreateData()

#条件独立
nbayes <- NBayesKaTeX parse error: Expected 'EOF', got '#' at position 7: new() #̲构建实例 demoData<-…createData()#6行若干列的数据
rn1<-nrow(subset(demoData,my0&cls’abusive’))#分子两个事件出现的次数
rn2<-nrow(subset(demoData,cls==‘abusive’))
rn1/rn2
featPropTbl <- function(dataset){
featProb <- function(feat){
tbl <- table(dataset[,ncol(dataset)],dataset[,feat])
prop.table(tbl,margin = 1)
}
res <- lapply(names(dataset)[1:(ncol(dataset)-1)],featProb)
names(res) <- names(dataset)[1:(ncol(dataset)-1)]
res
} #res <- lapply(names(demoData)[1:(ncol(demoData)-1)],featProb)
NBayes s e t ( &quot; p u b l i c &quot; , &quot; f e a t P r o p T b l &quot; , f e a t P r o p T b l , o v e r w r i t e = T ) n b a y e s &lt; − N B a y e s set(&quot;public&quot;,&quot;featPropTbl&quot;,featPropTbl,overwrite = T) nbayes &lt;- NBayes set("public","featPropTbl",featPropTbl,overwrite=T)nbayes<NBayesnew()
nbayesProbTbl <- nbayes$featPropTbl(demoData)

tbl <- nbayesProbTbl[[“flea”]]
tbl[row.names(tbl)“abusive”,colnames(tbl)“1”]

trainNB0<-function(trainingData){
model<-selfKaTeX parse error: Expected 'EOF', got '#' at position 74: …trainingData)])#̲不同类别出现的次数 tbl…set(“public”,“trainNB0”,trainNB0,overwrite=T)
nbbayes<-NBayes n e w ( ) m o d e l &lt; − n b b a y e s new() model&lt;-nbbayes new()model<nbbayestrainNB0(demoData)
attr(model,“priorProb”)

classifyNB<-function(model,w){
lookupProbTbl<-function(cls,feat,featVal){ #内部函数-查询条件概率表
tbl<-model[[feat]]
tbl[row.names(tbl)==as.character(cls),colnames(tbl)==as.character(featVal)]
}
getProb4Cls<-function(w,cls){ #内部函数
sapply(1:length(w),function(i){lookupProbTbl(cls,names(w)[i],w[i])}) #i取1:length(w)
}
priorTbl<-attr(model,“priorProb”)
res<-c()
for(cls in names(priorTbl)){
res<-c(res,prod(getProb4Cls(w,cls))*priorTbl[cls])
}
print(res)
names(priorTbl)[which.max(res)[1]]
}
NBayes$set(“public”,“classifyNB”,classifyNB,overwrite=T)

via<-c(“yes”,“yes”,“no”,“yes”,“no”,“no”,“no”)
mon<-c(“yes”,“no”,“yes”,“no”,“yes”,“no”,“no”)
groc<-c(“no”,“no”,“yes”,“no”,“yes”,“yes”,“no”)
unsub<-c(“yes”,“no”,“no”,“no”,“yes”,“no”,“yes”)
cls<-c(“spam”,“spam”,“ham”,“ham”,“ham”,“ham”,“spam”)
mydf<-data.frame(via=via,mon=mon,groc=groc,unsub=unsub,cls=cls,stringsAsFactors = F)
mydf

newemail<-c(‘yes’,‘no’,‘no’,‘yes’)
names(newemail)<-names(mydf)[1:(ncol(mydf)-1)]
nbayes<-NBayes n e w ( ) m o d e l &lt; − n b a y e s new() model&lt;-nbayes new()model<nbayestrainNB0(mydf)
nbayes$classifyNB(model,newemail)

nbayes <- NBayes n e w ( ) m o d e l &lt; − n b a y e s new() model &lt;- nbayes new()model<nbayestrainNB0(demoData)
nbayes$classifyNB(model,unlist(demoData[1,-ncol(demoData)]))

classifyNB <- function(model,w){
lookupProbTbl <- function(cls,feat,featVal){
tbl <- model[[feat]]
tbl[row.names(tbl)==as.character(cls),colnames(tbl)==as.character(featVal)]
}
getProb4Cls <- function(w,cls){
sapply(1:length(w),function(i){lookupProbTbl(cls,names(w)[i],w[i])})
}
priorTbl <- attr(model,“priorProb”)
res <- c()
for(cls in names(priorTbl)){
res <- c(res,sum(log(getProb4Cls(w,cls)))+log(priorTbl[cls]))
}
print(res)
names(priorTbl)[which.max(res)[1]]
}
NBayes s e t ( &quot; p u b l i c &quot; , &quot; c l a s s i f y N B &quot; , c l a s s i f y N B , o v e r w r i t e = T ) n b a y e s &lt; − N B a y e s set(&quot;public&quot;,&quot;classifyNB&quot;,classifyNB,overwrite = T) nbayes &lt;- NBayes set("public","classifyNB",classifyNB,overwrite=T)nbayes<NBayesnew()
model <- nbayes t r a i n N B 0 ( d e m o D a t a ) n b a y e s trainNB0(demoData) nbayes trainNB0(demoData)nbayesclassifyNB(model,unlist(demoData[1,-ncol(demoData)]))
newemail <- c(‘yes’, ‘no’, ‘no’, ‘yes’)
names(newemail) <- names(mydf)[1:(ncol(mydf)-1)]
nbayes <- NBayes n e w ( ) m o d e l &lt; − n b a y e s new() model &lt;- nbayes new()model<nbayestrainNB0(mydf)
nbayes$classifyNB(model,newemail)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值