data = {'s':4,'f':7,'e':8}
print(max(data,key = data.get)) #输出为e
#读取数据
def loadData(filename):
with open(filename) as fr:
dataSet = fr.readlines() #读取数据,每一行为列表中的元素
dataSetMat = []
labels = []
for line in dataSet:
line = line.strip().split(' ') #去空格,分割
labels.append(line[-1])
dataSetMat.append(line)
return dataSetMat,labels
dataSet,labels = loadData('data.txt')
def train(dataSet,labels):
unique_labels = set(labels)
res = {}
#分别考虑每个类 书上151页 (机器学习 周志华)
for label in unique_labels:
res[label] = []
res[label].append(labels.count(label)/float(len(labels))) #每个类的概率
#每个属性下的个个取值的概率
for i in range(len(dataSet[0]) - 1):
tempCol = [l[i] for l in dataSet if l[-1] == label] #对应类标签下的某个属性下的列取值
uniqueVlaue = set(tempCol)
dict = {}
for value in uniqueVlaue:
count = tempCol.count(value)
prob = count/float(labels.count(label)) #计算的是某个类下属性取值为value的概率
dict[value] = prob
res[label].append(dict)
return res
#{'soft': [0.3333333333333333, {'3': 0.375, '1': 0.25, '2': 0.375}, {'1': 0.375, '2': 0.625}, {'1': 0.875, '2': 0.125}, {'1': 0.25, '2': 0.75}, {'3': 0.5, '1': 0.125, '2': 0.375}], 'no': [0.4583333333333333, {'3': 0.36363636363636365, '1': 0.36363636363636365, '2': 0.2727272727272727}, {'1': 0.5454545454545454, '2': 0.45454545454545453}, {'1': 0.45454545454545453, '2': 0.5454545454545454}, {'1': 0.9090909090909091, '2': 0.09090909090909091}, {'3': 0.36363636363636365, '1': 0.36363636363636365, '2': 0.2727272727272727}], 'hard': [0.20833333333333334, {'3': 0.2, '1': 0.4, '2': 0.4}, {'1': 0.6, '2': 0.4}, {'2': 1.0}, {'2': 1.0}, {'1': 0.6, '2': 0.4}]}
#就是字典的索引
def test(testVect,probMat): #proMat可就是上边的那个字典
hard = probMat['hard'] #每个类对应的后面的值
soft = probMat['soft']
no = probMat['no']
#每个类的概率
phard = hard[0]
psoft = soft[0]
pno = no[0]
#循环测试数据
for i in range(len(testVect)):
if testVect[i] in hard[i+1].keys():
phard *= hard[i+1][testVect[i]]
else:
phard = 0
if testVect[i] in soft[i+1].keys():
psoft *= soft[i+1][testVect[i]]
else:
psoft = 0
if testVect[i] in no[i+1].keys():
pno *= no[i+1][testVect[i]]
else:
pno = 0
res = {}
res['hard'] = phard
res['soft'] = psoft
res['no'] = pno
print(phard,psoft,pno)
return max(res,key = res.get)
probMat = train(dataSet,labels)
res = test(['3','1','2','2','1'],probMat)
print(res)
python实现贝叶斯
最新推荐文章于 2023-01-26 18:04:12 发布