输入和输出
极大似然估计法
算法流程
import numpy as np
def createData( ):
#训练数据集
T=np.array([ [[1,'S'],-1], [[1,'M'],-1], [[1,'M'],1], [[1,'S'],1],
[[1,'S'],-1], [[2,'S'],-1], [[2,'M'],-1], [[2,'M'],1],
[[2,'L'],1], [[2,'L'],1], [[3,'L'],1], [[3,'M'],1],
[[3,'M'],1], [[3,'L'],1], [[3,'L'],-1] ])
#X1,X2为特征,取值的集合分别为A1,A2
#Y为类标记,Y包含于C
X1 = [ 0 ] * len(T)
X2 = [ 0 ] * len(T)
C = [ 0 ] * len( T )
for i in range(len(T)):
X1[i]=T[i][0][0]
X2[i]=T[i][0][1]
C[i]=T[i][1]
A1=list(set(X1))
A2=list(set(X2))
Y=list(set(C))
return X1, X2, C, A1, A2, Y
def probability( X1, X2, C, A1, A2, Y ):
P_y={} #P(y)先验概率
for y in Y:
P_y[y] = C.count(y)/len(C)
P_xy={} #P(xy)
X=A1.copy()
X.extend(A2)
for y in Y:
for x_index in range(len(X)):
count=0
x=X[x_index]
key = str(x) + ',' + str(y)
if x in A1:
for i in range(len(X1)):
if X1[i] == x and C[i] == y:
count=count+1
elif x in A2:
for j in range(len(X2)):
if X2[j] == x and C[j] == y:
count=count+1
P_xy[key]=count/len(C)
P={} #条件概率P(x|y)
for x in X:
for y in P_y.keys():
key = str(x) + '|' + str(y)
P[key] = P_xy[str(x)+',' +str(y)] / P_y[y]
return P_y,P
def testProbability( x ):
X1, X2, C, A1, A2, Y=createData()
P_y,P=probability( X1, X2, C, A1, A2, Y )
testP=[ 0 ] *(len(Y))
i=0
for y in Y:
key1 = str(x[0]) + '|' + str(y)
key2 = str(x[1]) + '|' + str(y)
testP[i]=P_y[y]*P[key1] * P[key2]
i=i+1
print(Y[testP.index(max(testP))])
testProbability( [2,'S'] ) #实例[2,S]分类
**
贝叶斯估计法
**
其中Sj是X的可能取值数,K是Y的可能取值数
import numpy as np
def createData( ):
n=1 #拉普拉斯平滑
#训练数据集
略
return n,X1, X2, C, A1, A2, Y
def probability( n,X1, X2, C, A1, A2, Y ):
P_y={} #P(y)先验概率
for y in Y:
P_y[y] = (C.count(y)+n)/(len(C)+(len(Y)+1)*n)
N_xy={} #P(xy)
略
P={} #条件概率P(x|y)
for x in X:
for y in P_y.keys():
key = str(x) + '|' + str(y)
if x in X1:
de= C.count(y) + len(A1) + 1
elif x in X2:
de= C.count(y) + len(A2) + 1
P[key] = (N_xy[str(x)+',' +str(y)]+1) / de
return P_y,P
def testProbability( x ):
略
testProbability( [2,'S'] ) #实例[2,S]分类