使⽤
Yum
⽅式安装
Impala
后,
impala-shell
可以全局使⽤;进⼊
impala-shell
命令⾏
impala-shell
进⼊到
impala
的交互窗⼝
impala-shell
mkdir -p /home/impala/data
vim user.csv
392456197008193000,张三,20,0
267456198006210000,李四,25,1
892456199007203000,王五,24,1
492456198712198000,赵六,26,2
392456197008193000,张三,20,0
392456197008193000,张三,20,0
hdfs dfs -mkdir -p /user/impala/t1
hdfs dfs -put user.csv /user/impala/t1/
创建数据库
create database impala;
#表如果存在则删除
drop table if exists t1;
#执⾏创建
create external table t1
(id string,
name string,
age int,
gender int
)row format delimited fields terminated by ','
location '/user/impala/t1';
create table t2(id string,name string,age int,gender int)
row format delimited fields terminated by ',';
vim t3.csv
A,2020-05-15 01:30:00
A,2020-05-15 01:35:00
A,2020-05-15 02:00:00
A,2020-05-15 03:00:10
A,2020-05-15 03:05:00
B,2020-05-15 02:03:00
B,2020-05-15 02:29:40
B,2020-05-15 04:00:00
hdfs dfs -put t3.csv /user/impala/t1/
--创建表
drop table if exists user_clicklog;
create table user_clicklog (
user_id string,
click_time string
) row format delimited fields terminated by",";
--加载数据
load data local inpath '/home/impala/data/t3.csv' into table user_clicklog;
load data local inpath '/home/impala/data/t3.csv' into table user_clicklog;
思路:分区函数、累计求和、排名函数
with t as (
--每行累加求和,让开始有‘1’标记的后面的行都变为相同数字
select id,click_time,sum(flag) over(partition by id order by click_time) flag from (
--把超过30分钟的进行标记
select id,click_time,case when dt>30 then 1 else 0 end flag from (
--按id分区,求上下时间差值
select id,click_time,nvl(dt-lag(dt) over(partition by id order by dt),0)/60 dt from (
--转时间戳
select user_id id,click_time,unix_timestamp(click_time) dt from user_clicklog
) tmp
) tmp2
) tmp3
)
--最后对(id和标记flag)进行分组排序
select id ,click_time,flag ,row_number() over (partition by id,flag order by flag) from t;
impala同步hive原数据改动(hive进行操作的改动,impala无法感知)
invalidate metadata
注:以上为本人小小总结,如果对您起到了一点点帮助,请给予我一点鼓励,在下方点个小小的赞,谢谢,如有错误之处,望不吝指出,非常感谢!