数据总量:
select count(*) from hive.tmp.test_orc_file;
107,608,023
分别执行 count max where
select 'orc',count(*) from hive.tmp.test_orc_file;
select 'avro',count(*) from hive.tmp.test_avro_file;
select 'rcfile',count(*) from hive.tmp.test_rcfile_file;
select 'seqfile',count(*) from hive.tmp.test_seqfile_file;
select 'parquet',count(*) from hive.tmp.test_parquet_file;
sql> select 'orc',count(*) from hive.tmp.test_orc_file
[2021-03-15 14:08:12] 1 row retrieved starting from 1 in 565 ms (execution: 509 ms, fetching: 56 ms)
sql> select 'avro',count(*) from hive.tmp.test_avro_file
[2021-03-15 14:09:55] 1 row retrieved starting from 1 in 1 m 43 s 119 ms (execution: 1 s 129 ms, fetching: 1 m 41 s 990 ms)
sql> select 'rcfile',count(*) from hive.tmp.test_rcfile_file
[2021-03-15 14:10:08] 1 row retrieved starting from 1 in 12 s 305 ms (execution: 1 s 83 ms, fetching: 11 s 222 ms)
sql> select 'seqfile',count(*) from hive.tmp.test_seqfile_file
[2021-03-15 14:10:24] 1 row retrieved starting from 1 in 15 s 868 ms (execution: 1 s 68 ms, fetching: 14 s 800 ms)
sql> select 'parquet',count(*) from hive.tmp.test_parquet_file
[2021-03-15 14:42:20] 1 row retrieved starting from 1 in 1 s 434 ms (execution: 1 s 77 ms, fetching: 357 ms)
select 'orc',max(date(part_date)) from hive.tmp.test_orc_file;
select 'avro',max(date(part_date)) from hive.tmp.test_avro_file;
select 'rcfile',max(date(part_date)) from hive.tmp.test_rcfile_file;
select 'seqfile',max(date(part_date)) from hive.tmp.test_seqfile_file;
select 'parquet',max(date(part_date)) from hive.tmp.test_parquet_file;
sql> select 'orc',max(date(part_date)) from hive.tmp.test_orc_file
[2021-03-15 14:11:54] 1 row retrieved starting from 1 in 1 s 757 ms (execution: 1 s 100 ms, fetching: 657 ms)
sql> select 'avro',max(date(part_date)) from hive.tmp.test_avro_file
[2021-03-15 14:13:36] 1 row retrieved starting from 1 in 1 m 41 s 197 ms (execution: 1 s 70 ms, fetching: 1 m 40 s 127 ms)
sql> select 'rcfile',max(date(part_date)) from hive.tmp.test_rcfile_file
[2021-03-15 14:13:49] 1 row retrieved starting from 1 in 12 s 880 ms (execution: 1 s 88 ms, fetching: 11 s 792 ms)
sql> select 'seqfile',max(date(part_date)) from hive.tmp.test_seqfile_file
[2021-03-15 14:14:18] 1 row retrieved starting from 1 in 29 s 488 ms (execution: 1 s 71 ms, fetching: 28 s 417 ms)
sql> select 'parquet',max(date(part_date)) from hive.tmp.test_parquet_file
[2021-03-15 14:42:54] 1 row retrieved starting from 1 in 2 s 865 ms (execution: 1 s 83 ms, fetching: 1 s 782 ms)
select 'orc',uuid from hive.tmp.test_orc_file where uuid='784d78db-e00d-4efb-906b-98049944daa2';
select 'avro',uuid from hive.tmp.test_avro_file where uuid='784d78db-e00d-4efb-906b-98049944daa2';
select 'rcfile',uuid from hive.tmp.test_rcfile_file where uuid='784d78db-e00d-4efb-906b-98049944daa2';
select 'seqfile',uuid from hive.tmp.test_seqfile_file where uuid='784d78db-e00d-4efb-906b-98049944daa2';
select 'parquet',uuid from hive.tmp.test_parquet_file where uuid='784d78db-e00d-4efb-906b-98049944daa2';
sql> select 'orc',uuid from hive.tmp.test_orc_file where uuid='784d78db-e00d-4efb-906b-98049944daa2'
[2021-03-15 14:17:00] 1 row retrieved starting from 1 in 8 s 999 ms (execution: 1 s 134 ms, fetching: 7 s 865 ms)
sql> select 'avro',uuid from hive.tmp.test_avro_file where uuid='784d78db-e00d-4efb-906b-98049944daa2'
[2021-03-15 14:18:44] 1 row retrieved starting from 1 in 1 m 44 s 286 ms (execution: 1 s 122 ms, fetching: 1 m 43 s 164 ms)
sql> select 'rcfile',uuid from hive.tmp.test_rcfile_file where uuid='784d78db-e00d-4efb-906b-98049944daa2'
[2021-03-15 14:18:57] 1 row retrieved starting from 1 in 13 s 209 ms (execution: 1 s 119 ms, fetching: 12 s 90 ms)
sql> select 'seqfile',uuid from hive.tmp.test_seqfile_file where uuid='784d78db-e00d-4efb-906b-98049944daa2'
[2021-03-15 14:19:19] 1 row retrieved starting from 1 in 21 s 335 ms (execution: 1 s 81 ms, fetching: 20 s 254 ms)
单位 秒 | count | max | where |
orc | 0.565 | 1 | 8 |
avro | 43 | 101 | 104 |
rcfile | 12 | 12 | 13 |
seqfile | 15 | 29 | 21 |
parquet | 1 | 2 | 8 |