import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.cross_validation import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
SEED = 222
random_state = SEED
data = pd.read_csv("wine.data", names = list("QWERTYUIOPASDF"))
data.sample(frac = 1)
feature = data.iloc[:,1:]
label = data.iloc[:,0]
train_feature,test_feature,train_label,test_label = train_test_split(feature,label,test_size = 0.2,random_state = SEED)
test_features = np.array(test_feature)
test_labels = np.array(test_label)
train_label = pd.DataFrame(train_label,columns = ["Q"])
train_data = pd.merge(train_feature,train_label,left_index = True,right_index = True)
data_1 = train_data.ix[data.Q == 1,:]
data_2 = train_data.ix[data.Q == 2,:]
data_3 = train_data.ix[data.Q == 3,:]
data_1 = data_1.values
data_2 = data_2.values
data_3 = data_3.values
train_labels_1 = data_1[:,13]
train_features_1 = data_1[:,:13]
train_labels_2 = data_2[:,13]
train_features_2 = data_2[:,:13]
train_labels_3 = data_3[:,13]
train_features_3 = data_3[:,:13]
def Gauss_distribution_computation(train_features_1,train_features_2,train_features_3,train_labels_1,train_labels_2,train_labels_3):
train_label = np.hstack([train_labels_1,train_labels_2,train_labels_3])
elem_spe_train_label = np.unique(train_label)
prior_proba = np.zeros((len(elem_spe_train_label)))
for i in range(len(train_label)):
label = train_label[i] - 1
prior_proba[int(label)] += 1
mean_values_1 = []
mean_values_2 = []
mean_values_3 = []
standards_deviation_1 = []
standards_deviation_2 = []
standards_deviation_3 = []
for i in range(13):
mean_value_1 = np.mean(train_features_1[:,i])
standard_deviation_1 = np.std(train_features_1[:,i])
mean_values_1.append(mean_value_1)
standards_deviation_1.append(standard_deviation_1)
for j in range(13):
mean_value_2 = np.mean(train_features_2[:,j])
standard_deviation_2 = np.std(train_features_2[:,j])
mean_values_2.append(mean_value_2)
standards_deviation_2.append(standard_deviation_2)
for k in range(13):
mean_value_3 = np.mean(train_features_3[:,k])
standard_deviation_3 = np.std(train_features_3[:,k])
mean_values_3.append(mean_value_3)
standards_deviation_3.append(standard_deviation_3)
mean_values = np.vstack([mean_values_1,mean_values_2,mean_values_3])
standards_deviation = np.vstack([standards_deviation_1,standards_deviation_2,standards_deviation_3])
return prior_proba,mean_values,standards_deviation
def Prediction_calculation(prior_proba,mean_values,standards_deviation,test_feature):
Gauss_calculated_value = []
朴素贝叶斯分类器python代码实现(由于上一次对算法理解有误,此次更正如下)
最新推荐文章于 2022-04-29 15:00:40 发布
本文详细介绍了朴素贝叶斯分类器的原理,并提供了正确的Python代码实现,纠正了之前算法理解的错误,帮助读者深入理解并应用朴素贝叶斯分类方法。
摘要由CSDN通过智能技术生成