C++语言实现朴素贝叶斯算法

亚丁号

于 2024-08-28 06:28:25 发布

阅读量214

点赞数 10

文章标签：算法人工智能机器学习

本文链接：https://blog.csdn.net/zy0412326/article/details/141617394

版权

朴素贝叶斯（Naive Bayes）是一种简单但效果良好的分类算法，基于贝叶斯定理并假设特征之间相互独立。以下是一个使用C++语言实现朴素贝叶斯算法的基本示例。

假设我们有一个简单的分类问题，例如通过几个特征预测一封电子邮件是否是垃圾邮件。

代码实现：

#include <iostream>
#include <vector>
#include <map>
#include <cmath>
#include <string>

class NaiveBayes {
public:
    NaiveBayes() {}

    // 训练模型
    void train(const std::vector<std::vector<std::string>>& X, const std::vector<std::string>& y) {
        for (size_t i = 0; i < X.size(); ++i) {
            const std::string& label = y[i];
            ++classCounts[label];
            for (const std::string& feature : X[i]) {
                ++featureCounts[std::make_pair(feature, label)];
            }
        }

        // 计算每个类别的先验概率
        for (const auto& pair : classCounts) {
            priorProbs[pair.first] = static_cast<double>(pair.second) / X.size();
        }

        // 计算条件概率
        for (const auto& pair : featureCounts) {
            const std::string& feature = pair.first.first;
            const std::string& label = pair.first.second;
            conditionalProbs[pair.first] = static_cast<double>(pair.second) / classCounts[label];
        }
    }

    // 预测新样本的类别
    std::string predict(const std::vector<std::string>& X) {
        std::string bestLabel;
        double maxProb = -1.0;

        for (const auto& pair : classCounts) {
            const std::string& label = pair.first;
            double prob = std::log(priorProbs[label]);

            for (const std::string& feature : X) {
                prob += std::log(conditionalProbs[std::make_pair(feature, label)]);
            }

            if (prob > maxProb) {
                maxProb = prob;
                bestLabel = label;
            }
        }

        return bestLabel;
    }

private:
    std::map<std::string, int> classCounts;  // 类别计数
    std::map<std::pair<std::string, std::string>, int> featureCounts;  // 特征计数
    std::map<std::string, double> priorProbs;  // 先验概率
    std::map<std::pair<std::string, std::string>, double> conditionalProbs;  // 条件概率
};

int main() {
    // 训练数据集
    std::vector<std::vector<std::string>> X = {
        {"cheap", "viagra"},
        {"win", "lottery"},
        {"cheap", "lottery"},
        {"hello", "friend"},
        {"dear", "friend"}
    };

    std::vector<std::string> y = {
        "spam",
        "spam",
        "spam",
        "ham",
        "ham"
    };

    NaiveBayes nb;
    nb.train(X, y);

    // 测试样本
    std::vector<std::string> testSample = {"cheap", "win"};
    std::string prediction = nb.predict(testSample);

    std::cout << "Prediction: " << prediction << std::endl;

    return 0;
}