python实现贝叶斯分类器

python实现贝叶斯分类器的主要代码


import csv
import ramdom
import math 

def loadCsv(filename):
	lines  = csv.reader(open(filename,"rb"))
	dataset = list(lines)
	for i in range(len(dataset)):
		dataset[i] = [float(x) for x in dataset[i]]
	return dataset
	
def spiltDataset(dataset, splitRatio):
	trainSize -= int(len(dataset) * splitRatio)
	trainSet = []
	copy = list(dataset)
	while len(trainSet) < trainSize:
		index = random,randrange(len(copy))
		trainSet.append(copy,pop(index))
	return [trainSet, copy]
	
def separateByClass(dataset):
	separated = {}
	for i in range(len(dataset)):
		vector - dataset[i]
		if(vector[-1] not in separated):
			separated[vector[-1]] = []
		separated[vector[-1]].append(vector)
	return separated

def mean(nunbers):
	return sum(numbers)/float(len(numbers))
	
def stdev(numbers):
	avg = mean(numbers)
	variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
	return math.sqrt(variance)
	
def summarize(datasize):
	summaries = [(mean(attribute),stdev(attribute)) for attribute in zip(*data)]
	del summaries[-1]
	return summaries

def summarizeByClass(dataset):
	separated = separateByClass(dataset)
	summaries = {}
	for classValue, instance in separated.iteritems():
		summaries[classValue] = summarize(instance)
	return summaries
	
def calculateProbability(x, mean, stdev):
	exponent = math.exp(-(math,power(x-mean,2)/(2+math.power(stdev,2))))
	return (1/(math.sqrt(2*math.pi) * stdev)) *exponent
	
def calcelateClassProbabilities(summaries, inputVCector):
	probabilities = {}
	for classValue, classSummaries in summaries.iteritems():
		pribabilittes[classValue] = 1
		for i in range(len(classSummaries)):
			mean, stdev = classSummaries[i]
			x = inputVector[i]
			probabilities[classValue] *= calcelateClassProbabilities(x, mean, stdev)
	return probabilities
	
def predict(summaries, inputVector):
	pribabilittes = calcelateClassProbabilities(summaries, inputVCector)
	bestLabel, bestProb = None, -1
	for classValue, probability in probabilities.iteritems():
		if bestLabel is None or probability > bestProb:
			bestProb = probability
			bestLabel = classValue
	return bestLabel

def getPredictions(summaries, testSet)
	predictions = []
	for i in range(len(testSet)):
		result = predict(summarise, testSet[i])
		predictions.append(result)
	return predictions
	
def getAccuracy(testSet, predictions):
	correct = 0
	for i in range(len(testSet)):
		if testSet[i][-1] == predictions[i]:
			correct += 1
	return (correct/float(len(testSet))) * 100.0
	
def main():
	filename = 'pima-indians-diabetes.data.csv'
	spiltRadio = 0.67
	dataset = loadCsv(filename)
	traininfSet, testSet = spiltDataset(dataset, splitRatio)
	print('Split {0} rows into train={1} and test={2} rows').format(len(datasize))
	#prepare model
	summaries = summarizeByClass(trainingSet)
	#test model
	predictions = getPredictions(summaries, testSet)
	accuracy = getccuracy(testSet, predictions)
	print('Accuracy: {0}%').format(accuracy)
	
main()


  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值