hive进阶学习

hive数据压缩 snappy

mkdir 2.5.0-native-snappy
tar -zxvf 2.5.0-native-snappy.tar.gz -C 2.5.0-native-snappy
cd hadoop-2.5.0/lib
#改变原来的native
mv native/ 250native
mkdir native
cp /home/soft/2.5.0-native-snappy/* native/
bin/hadoop checknative
#结果如下
18/04/16 12:30:25 INFO bzip2.Bzip2Factory: Successfully loaded & initialized native-bzip2 library system-native
18/04/16 12:30:25 INFO zlib.ZlibFactory: Successfully loaded & initialized native-zlib library
Native library checking:
hadoop: true /opt/modules/hadoop-2.5.0/lib/native/libhadoop.so
zlib:   true /lib64/libz.so.1
snappy: true /opt/modules/hadoop-2.5.0/lib/native/libsnappy.so.1
lz4:    true revision:99
bzip2:  true /lib64/libbz2.so.1

运行一个mapreduce程序

 #在hdfs上创建文件
 bin/hdfs dfs -mkdir -p /user/jianxin/mapreduce/wordcount/input
 #上传文件
 bin/hdfs dfs -put /home/datas/mc.input  /user/jianxin/mapreduce/wordcount/input

 bin/yarn jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.0.jar wordcount /user/jianxin/mapreduce/wordcount/input  /user/jianxin/mapreduce/wordcount/output
 
bin/hdfs dfs -cat  /user/jianxin/mapreduce/wordcount/output/part-r-00000
#压缩形式
bin/yarn jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.0.jar wordcount -D mapreduce.map.output.compress=true -D mapreduce.map.output.compress.codec=org.apache.hadoop.io.compress.SnappyCodec /user/jianxin/mapreduce/wordcount/input /user/jianxin/mapreduce/wordcount/output22
#查看历史记录服务器
http://hadoop.jianxin.com:19888/jobhistory

hive数据表的存储

  • TEXTFILE

create table page_views(
track_time string,
url string,
session_id string,
referer string,
ip string,
end_user_id string,
city_id string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE ;

hive (default)> load data local  inpath  '/home/datas/page_views.data' into table  page_views;

#查看文件的大小
hive (default)> dfs -du -h /user/hive/warehouse/page_views

18.1 M  /user/hive/warehouse/page_views/page_views.data
  • orc
create table page_views_orc(
track_time string,
url string,
session_id string,
referer string,
ip string,
end_user_id string,
city_id string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS orc ;


hive (default)> load data local  inpath  '/home/datas/page_views.data' into table  page_views_orc;
#或者
insert into table page_views_orc select * from page_views ;

hive (default)> dfs -du -h /user/hive/warehouse/page_views_orc

2.6 M  /user/hive/warehouse/page_views_orc/000000_0
  • parquet
create table page_views_parquet(
track_time string,
url string,
session_id string,
referer string,
ip string,
end_user_id string,
city_id string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS PARQUET ;

load data local  inpath  '/home/datas/page_views.data' into table  page_views_parquet;

dfs -du -h /user/hive/warehouse/page_views_parquet
18.1 M  /user/hive/warehouse/page_views_parquet/page_views.data
  • snappy
create table page_views_orc_snappy(
track_time string,
url string,
session_id string,
referer string,
ip string,
end_user_id string,
city_id string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS orc tblproperties ("orc.compress"="SNAPPY");

load data local  inpath  '/home/datas/page_views.data' into table  page_views_orc_snappy;

dfs -du -h /user/hive/warehouse/page_views_orc_snappy

18.1 M  /user/hive/warehouse/page_views_orc_snappy/page_views.data


create table page_views_orc_none(
track_time string,
url string,
session_id string,
referer string,
ip string,
end_user_id string,
city_id string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS orc tblproperties ("orc.compress"="NONE");

load data local  inpath  '/home/datas/page_views.data' into table  page_views_orc_none;

dfs -du -h /user/hive/warehouse/page_views_orc_none

hive (default)> dfs -du -h /user/hive/warehouse/page_views_orc_none;
18.1 M  /user/hive/warehouse/page_views_orc_none/page_views.data


set parquet.compression=SNAPPY ;
create table page_views_parquet_snappy(
track_time string,
url string,
session_id string,
referer string,
ip string,
end_user_id string,
city_id string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS parquet;
insert into table page_views_parquet_snappy select * from page_views ;
dfs -du -h /user/hive/warehouse/page_views_parquet_snappy/ ;

hive (default)> dfs -du -h /user/hive/warehouse/page_views_parquet_snappy/ ;
6.4 M  /user/hive/warehouse/page_views_parquet_snappy/000000_0

总结

在实际的项目开发当中,hive表的数据,存储格式选用orcfile 或者 qarquet 数据压缩采用snappy。

hive企业优化

转载于:https://my.oschina.net/jiansin/blog/1796433

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值