hive学习笔记1

最新推荐文章于 2024-07-03 13:19:29 发布

qq_38599944

最新推荐文章于 2024-07-03 13:19:29 发布

阅读量169

点赞数

分类专栏：学习毕节文章标签： hive

本文链接：https://blog.csdn.net/qq_38599944/article/details/85638489

版权

学习毕节专栏收录该内容

1 篇文章 0 订阅

订阅专栏

hive学习笔记
1.简单的 wordcount

select word,count(1) from
(
select explode(split(sentence,' ')) as word from t2
) t
group by word;

对t2表中的sentence列进行空格分隔，统计单词出现的数量

select word,count(1) as n from
(
select explode(split(sentence,' ')) as word from t2
) t
group by word
order by n desc;

对单词进行倒序排列，order by只产生一个reduce

2.建表，内部表，外部表

create table t3(sentence string)
partitioned by(dt string) //分区
row format delimited fields terminated by '\n';  //创建内部表
load data local inpath '本地路径' into table t3;  //把本地数据导入内部表

create external  table t2(sentence string)
row format delimited fields terminated by '\n'
stored as textfile
location '/file'; //把hdfs中的file目录下的数据导入外部表

查看表中的分区

show partitions tablename

插入分区数据

insert overwrite table t3 partition(dt='201911')
select * from t2 limit 100;
//把t2中的100行数据插入到t3表中的201911的分区中

分区筛选数据

select * from t3 where dt between '201911' and '201912'
//显示分区在201911和101912间的数据

表的分桶,建立4个桶的表

set hive.enforce.bucketing = true;
create table t1(
user_id int,
item_id string,
rating string
)
clustered by(user_id)
into 4 bucket;

分桶取样1/4

select * from t1 tablesample(bucket 1 out of 4 on user_id);

桶中取样建表t2

create table t2 as select * from t1 tablesample(bucket 1 out of 4 on user_id);

qq_38599944

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
hive学习笔记1

hive学习笔记1.简单的 wordcountselect word,count(1) from(select explode(split(sentence,' ')) as word from t2) tgroup by word;对t2表中的sentence列进行空格分隔，统计单词出现的数量select word,count(1) as n from(select exp...
复制链接

扫一扫