# -*- coding: utf-8 -*- import pandas as pd import numpy as np data = pd.read_csv(r'data.csv') train = data.ix[0:4000,:] Y = list(train.columns)[-1] C = list(train[Y].unique()) test = data.iloc[601:611,0:6] Xtest = test.iloc[:,0:5] #当预测值为c的情况下的概率 def XP(testline,c,trainpart): Xp = 1 for index in testline.index: x = testline[index] xp = len(trainpart[trainpart[index]==x])/len(trainpart) print(len(trainpart[trainpart[index]==x])) Xp=Xp*xp print(Xp) return Xp #预测值为C的概率 def YP(train,C,testline): p=[] for c in C: trainpart = train[train[Y]==c] Yp = XP(testline,c,trainpart)*(len(trainpart)/len(train)) p.append(Yp) p = np.array(p) print(p) return C[p.argmax()] #每行记录的预测值 def NB(train,Xtest,C): result=[] for i in Xtest.index: testline = Xtest.ix[i,:] result.append(YP(train,C,testline)) return result result = NB(train,Xtest,C)