C++语言实现随机森林 (Random Forest)算法

灬沙海灬

于 2024-10-03 15:57:17 发布

阅读量70

点赞数 3

文章标签：算法 c++ 随机森林

本文链接：https://blog.csdn.net/adendean/article/details/142692816

版权

在 C++ 中实现随机森林（Random Forest）算法通常涉及以下几个步骤：

数据准备：读取并处理数据集，分为训练集和测试集。
决策树构建：实现一个决策树，支持分类和回归。
随机抽样：为每棵树生成随机样本集。
投票机制：通过各棵树的预测结果来决定最终的输出。

下面是一个简化的随机森林实现示例：

#include <iostream>
#include <vector>
#include <map>
#include <cstdlib>
#include <ctime>

class DecisionTree {
public:
    void train(const std::vector<std::vector<float>>& data, const std::vector<int>& labels) {
        // 实现决策树训练逻辑
    }

    int predict(const std::vector<float>& input) {
        // 实现决策树预测逻辑
        return 0; // 返回分类结果
    }
};

class RandomForest {
private:
    std::vector<DecisionTree> trees;
    int n; // 树的数量

public:
    RandomForest(int nTrees) : n(nTrees) {
        trees.resize(n);
    }

    void train(const std::vector<std::vector<float>>& data, const std::vector<int>& labels) {
        for (int i = 0; i < n; ++i) {
            auto [sampleData, sampleLabels] = bootstrapSample(data, labels);
            trees[i].train(sampleData, sampleLabels);
        }
    }

    int predict(const std::vector<float>& input) {
        std::map<int, int> votes;
        for (const auto& tree : trees) {
            int prediction = tree.predict(input);
            votes[prediction]++;
        }
        
        int result = -1, maxVotes = 0;
        for (const auto& vote : votes) {
            if (vote.second > maxVotes) {
                maxVotes = vote.second;
                result = vote.first;
            }
        }
        return result;
    }

private:
    std::pair<std::vector<std::vector<float>>, std::vector<int>> bootstrapSample(const std::vector<std::vector<float>>& data, const std::vector<int>& labels) {
        int n = data.size();
        std::vector<std::vector<float>> sampleData;
        std::vector<int> sampleLabels;
        for (int i = 0; i < n; ++i) {
            int index = rand() % n;
            sampleData.push_back(data[index]);
            sampleLabels.push_back(labels[index]);
        }
        return {sampleData, sampleLabels};
    }
};

int main() {
    srand(static_cast<unsigned>(time(0))); // 设置随机种子
    std::vector<std::vector<float>> data = {
        {1.0, 2.0},
        {2.0, 3.0},
        {3.0, 4.0}
        // 添加更多数据
    };
    std::vector<int> labels = {0, 1, 0}; // 示例标签

    RandomForest rf(10); // 10棵树
    rf.train(data, labels);

    std::vector<float> input = {2.5, 3.5};
    int prediction = rf.predict(input);
    std::cout << "预测结果: " << prediction << std::endl;

    return 0;
}