#朴素贝叶斯:
def getdata():
file = open("E://data.txt")
filetest = open("E://test.txt")
line = file.readlines()
linetest = filetest.readlines()
data = []
datatest = []
for perline in line:
templine = perline.strip().split(",")
data.append(templine)
for perline in linetest:
templine = perline.strip().split(",")
datatest.append(templine)
label = ['unacc','acc','good','vgood']
return datatest,data,label
#划分数据集:
def classify_data(data):
unacc = []
acc = []
good = []
vgood =[]
for perline in data:
if perline[-1] == 'unacc':
unacc.append(perline)
elif perline[-1] == 'acc':
acc.append(perline)
elif perline[-1] == 'good':
good.append(perline)
else:
vgood.append(perline)
return unacc,acc,good,vgood
#获取先验概率和条件概率:
def first_probability(data):
unacc, acc, good, vgood = classify_data(data)
unacc_value = len(unacc) / len(data)
acc_value = len(acc) / len(data)
good_value = len(good) / len(data)
vgood_value = len(vgood) / len(data)
return unacc_value,acc_value,good_value,vgood_value
#获取条件概率:
#tempdata取值为unacc or acc or good or vgood,nature_i是属性
def conditions_probability(tempdata,nature0,nature1,nature2,nature3,nature4,nature5):
buying = []
maint = []
doors = []
persons = []
lug_boot = []
safety = []
for perline in tempdata:
buying.append(perline[0])
maint.append(perline[1])
doors.append(perline[2])
persons.append(perline[3])
lug_boot.append(perline[4])
safety.append(perline[5])
buying_value = buying.count(nature0) / len(tempdata)
#print(buying.count(nature0))
maint_value = maint.count(nature1) / len(tempdata)
doors_value = doors.count(nature2) / len(tempdata)
persons_value = persons.count(nature3) / len(tempdata)
lug_boot_value = lug_boot.count(nature4) / len(tempdata)
safety_value = safety.count(nature5) / len(tempdata)
return buying_value,maint_value,doors_value,persons_value,lug_boot_value,safety_value
#预测:
def predicition(nature0,nature1,nature2,nature3,nature4,nature5):
probability = []
datatest, data, label = getdata()
unacc, acc, good, vgood = classify_data(data)
unacc_value, acc_value, good_value, vgood_value = first_probability(data)
newdata = [unacc,acc,good,vgood]
value = [unacc_value, acc_value, good_value, vgood_value]
j = 0
for tempdata in newdata:
buying_value, maint_value, doors_value, persons_value, lug_boot_value, safety_value = conditions_probability(tempdata,nature0,nature1,nature2,nature3,nature4,nature5)
probability.append(value[j] * buying_value * maint_value * doors_value * persons_value * lug_boot_value * safety_value)
j = j+1
print(label[probability.index(max(probability))])
#计算准确率率:
def accuracy():
datatest, data, label = getdata()
unacc, acc, good, vgood = classify_data(data)
unacc_value, acc_value, good_value, vgood_value = first_probability(data)
newdata = [unacc,acc,good,vgood]
value = [unacc_value, acc_value, good_value, vgood_value]
sum = 0
for perline in datatest:
j = 0
probability = []
for tempdata in newdata:
buying_value, maint_value, doors_value, persons_value, lug_boot_value, safety_value = conditions_probability(tempdata,perline[0],perline[1],perline[2],perline[3],perline[4],perline[5])
probability.append(value[j] * buying_value * maint_value * doors_value * persons_value * lug_boot_value * safety_value)
j = j+1
if label[probability.index(max(probability))] == perline[6]:
sum = sum+1
print("准确率是:")
print(sum / len(datatest))
if __name__ == "__main__":
accuracy()
predicition('low','low','5more','more','big','med')
predicition('vhigh','vhigh','2','2','big','high')
07-19
“相关推荐”对你有帮助么?
-
非常没帮助
-
没帮助
-
一般
-
有帮助
-
非常有帮助
提交