先简单介绍一下原理:
以下为用朴素贝叶斯分类预测用户满意度的python实现:
import pandas as pd
def data_to_df(data_path, test_num=100):
# 读取数据,并分割训练集和测试集
df_data = pd.read_csv(data_path, names=['buying', 'maint', 'doors', 'persons', 'lug-boot', 'safety', 'label'])
df_test = df_data.sample(n=test_num)
df_train = df_data.drop(df_test.index)
return df_train, df_test
class Bayesian_Classifier():
"""贝叶斯分类器"""
def __init__(self, df):
# prior_probability_dict:先验概率字典,记录各类别的先验概率,格式:{'unacc':概率值, 'acc': 概率值, 'good': 概率值, 'vgood': 概率值}
self.prior_probability_series = df['label'].value_counts(normalize=True)