#A Naive Bayesian Classifier
total ={} #类训练实例,存储对应类的出现的次数
histo ={} #存储对应类中的对应特征的值得频率
train = open("D:\\iris.trn",'r')
for line in train:
f = line.rstrip().split(',')
label = f.pop()
if not total.has_key(label):
total[label] =0
histo[label]=[{},{},{},{}]
total[label] +=1
for i in range(4):
histo[label][i][f[i]] = 1 +histo[label][i].get(f[i],0.0)
train.close()
#读取测试集并且评估可能性,选出最大可能性的类
hit , miss = 0,0
test = open("D:\\iris.tst")
for line in test:
f = line.rstrip().split(',')
true = f.pop()
p = {} #类的可能性
for label in total.keys():
p[label] =1
for i in range(4):
p[label] *= histo[label][i].get(f[i],0.0)/total[label] #计算出类中的对应的属性的频率的乘积
mx ,predicted = 0,-1;
for k in p.keys():#找出最大的概率
if p[k] >=mx:
mx,predicted=p[k],k
if true == predicted:
flag ='+'
hit +=1
else:
flag ='-'
miss +=1
print flag ,"\t",true,"\t",predicted,"\t",
for label in p.keys():
print label,":",p[label],"\t",
print
print
print hit,"out of ",hit+miss,"correct-Accuracy: ",hit/(hit+miss+0.0)
test.close()
结果
+ Iris-setosa Iris-setosa Iris-virginica : 0.0 Iris-setosa : 0.000764069733796 Iris-versicolor : 0.0
+ Iris-setosa Iris-setosa Iris-virginica : 0.0 Iris-setosa : 0.000377136983989 Iris-versicolor : 0.0
+ Iris-versicolor Iris-versicolor Iris-virginica : 0.0 Iris-setosa : 0.0 Iris-versicolor : 2.88e-05
+ Iris-versicolor Iris-versicolor Iris-virginica : 0.0 Iris-setosa : 0.0 Iris-versicolor : 0.0004368
+ Iris-virginica Iris-virginica Iris-virginica : 1.728e-05 Iris-setosa : 0.0 Iris-versicolor : 0.0
5 out of 5 correct-Accuracy: 1.0