基于Boost Multi-index库,我们可以实现一个C++语言的,类pandas dataframe的操作
#include <iostream>
#include <string>
#include <boost/multi_index_container.hpp>
#include <boost/multi_index/ordered_index.hpp>
#include <boost/multi_index/member.hpp>
using namespace boost::multi_index;
// 定义 DataFrame 类
class DataFrame {
public:
// 定义行类型
struct Row {
int id;
std::string name;
double value;
Row(int id, const std::string& name, double value)
: id(id), name(name), value(value) {}
};
// 定义多索引容器类型
typedef multi_index_container<
Row,
indexed_by<
ordered_unique<member<Row, int, &Row::id>>,
ordered_non_unique<member<Row, std::string, &Row::name>>
>
> Container;
// 插新行
void insert(int id, const std::string& name, double value) {
data_.insert(Row(id, name, value));
}
// 根据 ID 查找行
const Row* find_by_id(int id) const {
auto& index = data_.get<0>();
auto it = index.find(id);
return it != index.end() ? &(*it) : nullptr;
}
// 根据名称找行
std::vector<const Row*> find_by_name(const std::string& name) const {
auto& index = data_.get<1>();
auto range = index.equal_range(name);
std::vector<const Row*> result;
for (auto it = range.first; it != range.second; ++it) {
result.push_back(&(*it));
}
return result;
}
private:
Container data_;
};
int main() {
DataFrame df;
// 插数据
df.insert(1, "Alice", 3.14);
df.insert(2, "Bob", 2.71);
df.insert(3, "Alice", 1.23);
// 根据 ID 查找行
const DataFrame::Row* row1 = df.find_by_id(2);
if (row1) {
std::cout << "ID: " << row1->id << ", Name: " << row1->name << ", Value: " << row1->value << std::endl;
}
// 根据名称查找行
std::vector<const DataFrame::Row*> rows = df.find_by_name("Alice");
for (const auto* row : rows) {
std::cout << "ID: " << row->id << ", Name: " << row->name << ", Value: " << row->value std::endl;
}
return 0;
}
如果我需要从csv里面加载数据,请参考如下代码
// 包含 DataFrame 类的定义
class DataFrame {
// 省略原有代码...
public:
// 添加从 CSV 文件加载数据的接口
void loadFromCsv(const std::string& file_path) {
std::ifstream file(file_path);
if (!file.is_open()) {
std::cout << "Failed to open file: " << file_path << std::endl;
return;
}
// 读取表头
std::string header_line;
std::getline(file, header_line);
boost::tokenizer<boost::escaped_list_separator<char>> header_tokenizer(header_line);
std::vector<std::string> headers(header_tokenizer.begin(), header_tokenizer.end());
// 创建 DataFrame 的字段
for (const auto& header : headers) {
addField(header);
}
// 逐行读取数据
std::string line;
while (std::getline(file, line)) {
boost::tokenizer<boost::escaped_list_separator<char>> tokenizer(line);
std::vector<std::string> tokens(tokenizer.begin(), tokenizer.end());
if (tokens.size() != headers.size()) {
std::cout << "Invalid row in CSV file: " << line << std::endl;
continue;
}
// 将数据添加到 DataFrame 中
for (size_t i = 0; i < headers.size(); ++i) {
setFieldValue(headers[i], tokens[i]);
}
}
file.close();
}
// 省略原有代码...
};
int main() {
DataFrame df;
// 从 CSV 文件加载数据
df.loadFromCsv("path/to/your/file.csv");
// 其他操作...
return 0}
参考文献:
Does C++ has a data structure similar to Python Pandas’ DataFrame?