import pyarrow.orc as orc
import pyarrow as pa
# 指定 .snappy.orc 文件的路径
orc_file_path = 'a.snappy.orc'
# 读取 ORC 文件内容为 PyArrow 表
orc_file = orc.ORCFile(orc_file_path)
table = orc_file.read()
# 获取表的列名
columns = table.schema.names
# 创建一个字典存储列数据
data_dict = {column: table.column(column).to_pylist() for column in columns}
# 定义输出文件路径
txt_file_path = 'data/output_file.txt'
# 打开文件进行写入
with open(txt_file_path, 'w', encoding='utf-8') as file:
# 写入表头
file.write('\t'.join(columns) + '\n')
# 获取行数
num_rows = len(data_dict[columns[0]])
# 逐行写入数据
for i in range(num_rows):
row_data = []
for column in columns:
# 获取当前列的数据
value = data_dict[column][i]
# 将复杂数据类型转换为字符串(如列表、字典等)
if isinstance(value, (list, dict, np.ndarray)):
value = str(value)
# 将数据转换为字符串
row_data.append(str(value))
# 写入当前行到文件中
file.write('\t'.join(row_data) + '\n')
print(f"数据已成功写入到 {txt_file_path}")
复杂orc文件转txt文件
最新推荐文章于 2024-09-14 19:55:48 发布