准备数据
select uid,name from table a where (select count(*) from table b where a.id=b.id and a.time > b.time )<=1;
CREATE TABLE user_pro (
uid INT PRIMARY KEY AUTO_INCREMENT,
uname VARCHAR(20),
project VARCHAR(20),
score INT
)
INSERT INTO user_pro (uname,project,score) VALUES ('张三','语文',55),('张三','数学',45),('李四','语文',79),('李四','数学',99),('王五','数学',79),('王五','语文',86),('赵六','数学',77),('赵六','语文',88),('田七','数学',99),('田七','语文',49);
需求1:求出统计分数段的结果,显示人数.
SELECT
ELT(INTERVAL(score,0,60,80,100),'0<score<60','60<score<80','80<score<100') AS score,COUNT(1) AS num
FROM user_pro
GROUP BY ELT(INTERVAL(score,0,60,80,100),'0<score<60','60<score<80','80<score<100') ;
-- 此语法要用到sql里面.
需求2:查询没门成绩都大于80的学生姓名
方案1;
select uname,score
from user_pro
group by uname
having MIN(score)>60;
方案2
SELECT DISTINCT uname
from user_pro
WHERE uname not IN (SELECT DISTINCT uname from user_pro WHERE score <60);
需求3:以下表
year month amount
1991 1 1.1
1991 2 1.2
1991 3 1.3
1991 4 1.4
1992 1 2.1
1992 2 2.2
1992 3 2.3
1992 4 2.4
查询出这样的结果
year m1 m2 m3 m4
1991 1.1 1.2 1.3 1.4
1992 2.1 2.2 2.3 2.4
select year,
(select amount from table a where month =1 and a.year= table.year)m1,
(select amount from table a where month =2 and a.year= table.year)m1,
(select amount from table a where month =3 and a.year= table.year)m1,
(select amount from table a where month =4 and a.year= table.year)m1
from table
group by year;
需求2:每日活跃用户明细
创建表:
(由于业务中产品经理会要求根据不同的渠道统计活跃用户)
drop table if exists dws_uv_detail_day;
create external table dws_uv_detail_day
('mid_id' string comment '设备唯一标识',
'user_id' string comment '程序版本',
`version_code` string comment '程序版本号',
`lang` string comment '系统语言',
`source` string comment '渠道号',
`os` string comment '安卓系统版本',
`area` string comment '区域',
`model` string comment '收集型号',
`model` string COMMENT '手机型号',
`brand` string COMMENT '手机品牌',
`sdk_version` string COMMENT 'sdkVersion',
`gmail` string COMMENT 'gmail',
`height_width` string COMMENT '屏幕宽高',
`app_time` string COMMENT '客户端日志产生时的时间',
`network` string COMMENT '网络模式',
`lng` string COMMENT '经度',
`lat` string COMMENT '纬度'
partitioned by(dt string)
stored as parquet
location '/warehouse/gmall/dws/dws_uv_detail_day';
统计日活用的是启动日志
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table dws_uv_detail_day
partition(dt='2019-02-10')
select
mid_id,
concat_ws('|', collect_set(user_id)) user_id,
concat_ws('|', collect_set(version_code)) version_code,
concat_ws('|', collect_set(version_name)) version_name,
concat_ws('|', collect_set(lang))lang,
concat_ws('|', collect_set(source)) source,
concat_ws('|', collect_set(os)) os,
concat_ws('|', collect_set(area)) area,
concat_ws('|', collect_set(model)) model,
concat_ws('|', collect_set(brand)) brand,
concat_ws('|', collect_set(sdk_version)) sdk_version,
concat_ws('|', collect_set(gmail)) gmail,
concat_ws('|', collect_set(height_width)) height_width,
concat_ws('|', collect_set(app_time)) app_time,
concat_ws('|', collect_set(network)) network,
concat_ws('|', collect_set(lng)) lng,
concat_ws('|', collect_set(lat)) lat
from dwd_start_log #启动日志
where dt='2019-02-10'
group by mid_id;
统计周活
#创建表
drop table if exists dws_uv_detail_wk;
create external table dws_uv_detail_wk(
`mid_id` string comment '设备唯一标识',
`user_id` string COMMENT '用户标识',
`version_code` string COMMENT '程序版本号',
`version_name` string COMMENT '程序版本名',
`lang` string COMMENT '系统语言',
`source` string COMMENT '渠道号',
`os` string COMMENT '安卓系统版本',
`area` string COMMENT '区域',
`model` string COMMENT '手机型号',
`brand` string COMMENT '手机品牌',
`sdk_version` string COMMENT 'sdkVersion',
`gmail` string COMMENT 'gmail',
`height_width` string COMMENT '屏幕宽高',
`app_time` string COMMENT '客户端日志产生时的时间',
`network` string COMMENT '网络模式',
`lng` string COMMENT '经度',
`lat` string COMMENT '纬度',
`monday_date` string comment '周一日期',
`sunday_date` string comment '周日日期'
) comment '活跃用户按周统计'
partitioned by (`wk_dt` string)
stored as parquet
插入表数据
insert overwrite table dws_uv_detail_wk partition(wk_dt)
select
mid_id ,
concat_ws('|', collect_set(user_id)) user_id,
concat_ws('|', collect_set(version_code)) version_code,
concat_ws('|', collect_set(version_name)) version_name,
concat_ws('|', collect_set(lang))lang,
concat_ws('|', collect_set(source)) source,
concat_ws('|', collect_set(os)) os,
concat_ws('|', collect_set(area)) area,
concat_ws('|', collect_set(model)) model,
concat_ws('|', collect_set(brand)) brand,
concat_ws('|', collect_set(sdk_version)) sdk_version,
concat_ws('|', collect_set(gmail)) gmail,
concat_ws('|', collect_set(height_width)) height_width,
concat_ws('|', collect_set(app_time)) app_time,
concat_ws('|', collect_set(network)) network,
concat_ws('|', collect_set(lng)) lng,
concat_ws('|', collect_set(lat)) lat
data_add(next_day('2019-02-10','mo'),-7) ,
data_add(next_day('2019-02-10','mo'),-1) ,
concat(data_add(next_day('2019-02-10','mo'),-7) ,'_',data_add(next_day('2019-02-10','mo'),-1)
)
from dws_uv_detail_wk
where dt>=date_add(next_day('2019-02-10','MO'),-7) and dt<=date_add(next_day('2019-02-10','MO'),-1)
#下一个周的周一
date_add(next_day('2019-12-18','mo'),-7)
#本周的周日==下一个周的周一减去一
date_add(next_day('2019-12-18','mo'),-1)
运行表脚本
#!/bin/bash
# 定义变量方便修改
APP=gmall
hive=/opt/module/hive/bin/hive
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$1" ] ;then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
sql="
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table "$APP".dws_uv_detail_day partition(dt='$do_date')
select
mid_id,
concat_ws('|', collect_set(user_id)) user_id,
concat_ws('|', collect_set(version_code)) version_code,
concat_ws('|', collect_set(version_name)) version_name,
concat_ws('|', collect_set(lang)) lang,
concat_ws('|', collect_set(source)) source,
concat_ws('|', collect_set(os)) os,
concat_ws('|', collect_set(area)) area,
concat_ws('|', collect_set(model)) model,
concat_ws('|', collect_set(brand)) brand,
concat_ws('|', collect_set(sdk_version)) sdk_version,
concat_ws('|', collect_set(gmail)) gmail,
concat_ws('|', collect_set(height_width)) height_width,
concat_ws('|', collect_set(app_time)) app_time,
concat_ws('|', collect_set(network)) network,
concat_ws('|', collect_set(lng)) lng,
concat_ws('|', collect_set(lat)) lat
from "$APP".dwd_start_log
where dt='$do_date'
group by mid_id;
insert overwrite table "$APP".dws_uv_detail_wk partition(wk_dt)
select
mid_id,
concat_ws('|', collect_set(user_id)) user_id,
concat_ws('|', collect_set(version_code)) version_code,
concat_ws('|', collect_set(version_name)) version_name,
concat_ws('|', collect_set(lang)) lang,
concat_ws('|', collect_set(source)) source,
concat_ws('|', collect_set(os)) os,
concat_ws('|', collect_set(area)) area,
concat_ws('|', collect_set(model)) model,
concat_ws('|', collect_set(brand)) brand,
concat_ws('|', collect_set(sdk_version)) sdk_version,
concat_ws('|', collect_set(gmail)) gmail,
concat_ws('|', collect_set(height_width)) height_width,
concat_ws('|', collect_set(app_time)) app_time,
concat_ws('|', collect_set(network)) network,
concat_ws('|', collect_set(lng)) lng,
concat_ws('|', collect_set(lat)) lat,
date_add(next_day('$do_date','MO'),-7),
date_add(next_day('$do_date','MO'),-1),
concat(date_add( next_day('$do_date','MO'),-7), '_' , date_add(next_day('$do_date','MO'),-1)
)
from "$APP".dws_uv_detail_day
where dt>=date_add(next_day('$do_date','MO'),-7) and dt<=date_add(next_day('$do_date','MO'),-1)
group by mid_id;
insert overwrite table "$APP".dws_uv_detail_mn partition(mn)
select
mid_id,
concat_ws('|', collect_set(user_id)) user_id,
concat_ws('|', collect_set(version_code)) version_code,
concat_ws('|', collect_set(version_name)) version_name,
concat_ws('|', collect_set(lang))lang,
concat_ws('|', collect_set(source)) source,
concat_ws('|', collect_set(os)) os,
concat_ws('|', collect_set(area)) area,
concat_ws('|', collect_set(model)) model,
concat_ws('|', collect_set(brand)) brand,
concat_ws('|', collect_set(sdk_version)) sdk_version,
concat_ws('|', collect_set(gmail)) gmail,
concat_ws('|', collect_set(height_width)) height_width,
concat_ws('|', collect_set(app_time)) app_time,
concat_ws('|', collect_set(network)) network,
concat_ws('|', collect_set(lng)) lng,
concat_ws('|', collect_set(lat)) lat,
date_format('$do_date','yyyy-MM')
from "$APP".dws_uv_detail_day
where date_format(dt,'yyyy-MM') = date_format('$do_date','yyyy-MM')
group by mid_id;
"
#执行SQL语句.
$hive -e "$sql"