hive 查询结果转为parquet格式 存入hdfs中 ,
步骤:
1.首先根据我们hive查询结果 ,新建一张表 ,2.然后,对表结构做修改 ,更改表指定存储格式为parquet ,
3.然后删除之前创建的那装表, 然后新建这张指定存储格式的表,
4.之后把结果插入这个表中。
1、
create table table_parquet as
select j30.* from jpush.tags_30_mac j30 join
(select
case when
substring(d.mac,2,1) in (0,2,4,8)
then d.mac
end as mac
from (
select distinct mac from wifi_location_sniffer3 where y=2017 and m=12 and d=2
) d
) huanp
on huanp.mac=j30.key
2、查看表结构
show create table table_parquet
对步骤2查看的表结构进行修改 主要是stored as parquet 并指定文件存放到hdfs中的位置
CREATE TABLE `20171202_parquet`(
`key` string,
`fosun` map<string,string>,
`jpush` map<string,string>,
`tags` map<string,string>)
stored as parquet
location “hdfs://user/zhangshk/test/hive_to_hdfs”
3、删除之前创建的表
drop table table_parquet
4、hive 查询结果 存入hdfs中 ,到hdfs中查看发现 为parquet文件
insert into `20171202_parquet`
select j30.* from jpush.tags_30_mac j30 join
(select
case when
substring(d.mac,2,1) in (0,2,4,8)
then d.mac
end as mac
from (
select distinct mac from wifi_location_sniffer3 where y=2017 and m=12 and d=2
) d
) huanp
on huanp.mac=j30.key