目标:在C++层实现将Arrow Table的表数据写入到文件;在GO层实现读取文件完成表数据的提取。
实现流程如下:
C++层:
//构建表数据
/*表结构
a | b
0 | 9
1 | 8
2 | 7
3 | 6
4 | 5
*/
std::shared_ptr<arrow::Table> CreateTable1() {
auto schema =
arrow::schema({arrow::field("a", arrow::int64()),arrow::field("b", arrow::int64())});
std::shared_ptr<arrow::Array> array_a;
std::shared_ptr<arrow::Array> array_b;
arrow::NumericBuilder<arrow::Int64Type> builder;
ABORT_ON_FAILURE(builder.AppendValues({0, 1, 2, 3, 4}));
ABORT_ON_FAILURE(builder.Finish(&array_a));
builder.Reset();
ABORT_ON_FAILURE(builder.AppendValues({9, 8, 7, 6, 5}));
ABORT_ON_FAILURE(builder.Finish(&array_b));
builder.Reset();
return arrow::Table::Make(schema, {array_a, array_b});
}
//表数据写入文件
std::string CreateExampleFeatherDataset(const std::shared_ptr<fs::FileSystem>& filesystem,
const std::string& root_path) {
auto base_path = root_path + "/feather_dataset";//文件夹路径
ABORT_ON_FAILURE(filesystem->CreateDir(base_path));
// Create an Arrow Table
auto table = CreateTable1();
// Write it into two Feather files
auto output = filesystem->OpenOutputStream(base_path + "/data1.arrow").ValueOrDie();//写入的文件路径 /home/weili/CLionProjects/ExampleDataSet/feather_dataset/data1.feather
auto writer = arrow::ipc::MakeFileWriter(output.get(), table->schema()).ValueOrDie();//从stream sink and schema创建一个新的IPC文件写入器
ABORT_ON_FAILURE(writer->WriteTable(*table));//通过创建 record batches序列来写可能是分块的表
ABORT_ON_FAILURE(writer->Close());//写完关闭文件
return base_path;
}
GO层实现:(GO版本编译需要GO1.17以上)
//读C++写入的文件
func TestReadFile(t *testing.T) {
schema := arrow.NewSchema(
[]arrow.Field{
arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int64,Nullable:true},
arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
}, nil,
)
mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
defer mem.AssertSize(t, 0)
f, err := os.OpenFile("/home/sss/CLionProjects/ExampleDataSet/feather_dataset/data1.arrow", os.O_RDWR, 0600)
if err != nil {
t.Fatal(err)
}
defer f.Close()
_, err1 := f.Seek(0, io.SeekStart)
if err1 != nil {
t.Fatal(err1)
}
r, err2 := ipc.NewFileReader(f, ipc.WithSchema(schema), ipc.WithAllocator(mem))
if err2 != nil {
t.Fatal(err2)
}
defer r.Close()
for i := 0; i < r.NumRecords(); i++ {
rec, err := r.Record(i)
if err != nil {
t.Fatalf("could not read record %d: %v", i, err)
}
fmt.Printf("该表行数: %d\n",rec.NumRows())
fmt.Printf("该表列数: %d\n",rec.NumCols())
fmt.Printf("该表第1列名字: %s\n",rec.ColumnName(0))
fmt.Printf("该表第2列名字: %s\n",rec.ColumnName(1))
fmt.Printf("该表第1列数据: ")
fmt.Println(rec.Column(0))
fmt.Printf("该表第2列数据: ")
fmt.Println(rec.Column(1))
}
}
输出结果:
结论:与C++层写入的信息一致。