贝叶斯:
#!/usr/bin/python
# -*-coding:utf-8-*-
'''
Created on Mar. 28, 2017
'''
import numpy as np
def getProb(data,label):
if len(data)!=len(label):
return False
length=len(data)
label1Count=0
label0Count=0
pro0Dict={}
pro1Dict={}
for i in range(length):
if label[i]==-1:
label0Count += 1
if label[i]==1:
label1Count += 1
for i in range(length):
if label[i] == -1:
for j in range(len(data[0])):
if j not in pro0Dict:
pro0Dict[j]={}
if data[i][j] not in pro0Dict[j]:
pro0Dict[j][data[i][j]]=0.0
pro0Dict[j][data[i][j]] += 1.0
if label[i] == 1:
for j in range(len(data[0])):
if j not in pro1Dict:
pro1Dict[j] = {}
if data[i][j] not in pro1Dict[j]:
pro1Dict[j][data[i][j]] = 0.0
pro1Dict[j][data[i][j]] += 1.0
for j in range(len(data[0])):
for k in pro0Dict[j].keys():
pro0Dict[j][k]=(pro0Dict[j][k]+1)/(label0Count+ len(pro0Dict[j].keys()))
for k in pro1Dict[j].keys():
pro1Dict[j][k] = (pro1Dict[j][k] + 1) / (label1Count + len(pro1Dict[j].keys()))
pro1=float(label1Count+1)/(length+2)
pro0=float(label0Count+1)/(length+2)
return pro0, pro1,pro0Dict, pro1Dict
def predict(trainData,trainLabel,testData):
pro0, pro1, pro0Dict, pro1Dict=getProb(trainData,trainLabel)
p0= pro0* pro0Dict[0][str(testData[0])]*pro0Dict[1][str(testData[1])]
p1 = pro1 * pro1Dict[0][str(testData[0])] * pro1Dict[1][str(testData[1])]
print p0,p1
if p0 >= p1:
return -1
else:
return 1
if __name__ == "__main__":
data=np.array([[1,'S'],[1,'M'],[1,'M'],[1,'S'],[1,'S'],[2,'S'],[2,'M'],[2,'M'],[2,'L'],[2,'L'],[3,'L'],[3,'M'],[3,'M'],[3,'L'],[3,'L']])
label=np.array([-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1])
test=[2,'S']
print test[0]
print getProb(data,label)
print predict(data,label,test)