ORC数据的输入输出
仅支持ORC格式的写入。
ORC和CH数据类型的匹配关系
备注:
- 不支持的ORC数据类型:DATE32, TIME32, FIXED_SIZE_BINARY, JSON, UUID, ENUM。
- ClickHouse表的列名必须与ORC表的列名一致。
使用Spark生成ORC文件
val list = List(
("113.248.234.232", "123.212.22.01", "2018-07-12 14:35:31"),
("115.248.158.231", "154.245.56.23", "2020-07-12 13:26:26"),
("115.248.158.231", "154.245.56.23", "2020-07-12 13:22:13"),
("187.248.135.230", "221.228.112.45", "2019-08-09 13:17:39"),
("187.248.234.232", "221.228.112.24", "2019-08-09 20:51:16"),
("115.248.158.231", "154.245.56.23", "2020-07-12 17:22:56")
)val rdd = sc.makeRDD(list)
import spark.implicits._
val df = rdd.toDF("srcip", "destip", "time")
df.repartition(1).write.format("orc").mode("append").save("/tmp/orc")
创建测试表
create table orc_demo (srcip String, destip String, time DateTime) ENGINE=TinyLog;
数据导入
cat file.orc | clickhouse-client --query="INSERT INTO test.orc_demo FORMAT ORC"
查询结果
select * from orc_demo