1.Hive使用的文件格式
: SEQUENCEFILE//序列化
| TEXTFILE -- (Default, depending on hive.default.fileformat configuration)
| RCFILE -- (Note: Available in Hive 0.6.0 and later) Row存储 Columnar字段也就是列
| ORC -- (Note: Available in Hive 0.11.0 and later) O optimized 优化
| PARQUET -- (Note: Available in Hive 0.13.0 and later)
| AVRO -- (Note: Available in Hive 0.14.0 and later)
可以将默认TEXTFILE 文件格式的表映射完成之后,另外保存成其他格式ORC PARQUET
create table if not exists file_text(
t_time string,
t_url string,
t_uuid string,
t_refered_url string,
t_ip string,
t_user string,
t_city string
)
row format delimited fields terminated by '\t'
stored as textfile;
load data local inpath '/home/user01/page_views.data' into table file_text;
//默认的TEXTFILE格式大小
dfs -du -s -h /user/hive/warehouse/db01.db/file_text;
18.1 M 18.1 M /user/hive/warehouse/db01.db/file_text
//存储为ORC格式
create table if not exists file_orc row format delimited fields terminated by '\t' stored as ORC as select * from file_text;
dfs -du -s -h /user/hive/warehouse/db01.db/file_orc;
2.6 M 2.6 M /user/hive/warehouse/db01.db/file_orc
//存储为parquet格式
create table if not exists file_parquet row format delimited fields terminated by '\t' stored as PARQUET as select * from file_text;
dfs -du -s -h /user/hive/warehouse/db01.db/file_parquet;
13.1 M 13.1 M /user/hive/warehouse/db01.db/file_parquet
【注意:】
1.对于具体格式在创建表时一定要指定stored as orc/parquet/textfile
2.插入数据的时候不能使用load加载数据
: SEQUENCEFILE//序列化
| TEXTFILE -- (Default, depending on hive.default.fileformat configuration)
| RCFILE -- (Note: Available in Hive 0.6.0 and later) Row存储 Columnar字段也就是列
| ORC -- (Note: Available in Hive 0.11.0 and later) O optimized 优化
| PARQUET -- (Note: Available in Hive 0.13.0 and later)
| AVRO -- (Note: Available in Hive 0.14.0 and later)
可以将默认TEXTFILE 文件格式的表映射完成之后,另外保存成其他格式ORC PARQUET
create table if not exists file_text(
t_time string,
t_url string,
t_uuid string,
t_refered_url string,
t_ip string,
t_user string,
t_city string
)
row format delimited fields terminated by '\t'
stored as textfile;
load data local inpath '/home/user01/page_views.data' into table file_text;
//默认的TEXTFILE格式大小
dfs -du -s -h /user/hive/warehouse/db01.db/file_text;
18.1 M 18.1 M /user/hive/warehouse/db01.db/file_text
//存储为ORC格式
create table if not exists file_orc row format delimited fields terminated by '\t' stored as ORC as select * from file_text;
dfs -du -s -h /user/hive/warehouse/db01.db/file_orc;
2.6 M 2.6 M /user/hive/warehouse/db01.db/file_orc
//存储为parquet格式
create table if not exists file_parquet row format delimited fields terminated by '\t' stored as PARQUET as select * from file_text;
dfs -du -s -h /user/hive/warehouse/db01.db/file_parquet;
13.1 M 13.1 M /user/hive/warehouse/db01.db/file_parquet
【注意:】
1.对于具体格式在创建表时一定要指定stored as orc/parquet/textfile
2.插入数据的时候不能使用load加载数据