hivesql

蚂蚁蚂蚁蚂蚁没问题

已于 2022-11-03 10:08:39 修改

阅读量2.6k

点赞数 1

文章标签： hive hadoop 数据仓库

于 2022-11-03 09:50:05 首次发布

本文链接：https://blog.csdn.net/wanquan2001/article/details/127664300

版权

参考资料：https://dblab.xmu.edu.cn/blog/1005/

1、Hive数据导入
（1）将small_user.csv文件导入到hive中

（2）根据导入数据格式特征，创建名为user的外部表

create external table if not exists users(
user_id string,
item_id string,
behaviour_type string,
user_geohash string,
item_category int,
time string)
row format delimited fields terminated by ',';

load data inpath 'file:///home/shiyanlou/Code/1.csv' overwrite into table users;

数据字段解释：
user_id（用户id）
item_id(商品id)
behaviour_type（包括浏览、收藏、加购物车、购买，对应取值分别是1、2、3、4）
user_geohash(用户地理位置哈希值，有些记录中没有这个字段值，可以删除此字段)
item_category（商品分类）
time（该记录产生时间）

2、Hive数据分析
（1）查询前10条数据

select * from user limit 10;

（2）查询2014年11月18日到2014年11月20日有多少人浏览了商品

select count(*) from user where behavior_type='1' and substring(time,0,10)<='2014-11-20' and substring(time,0,10)>='2014-11-18';

（3）查询2014年12月12日“湖南”当天发出到该地点的货物的数量

select count(*) from user where user_geohash='湖南' and substring(time,0,10)='2014-12-12' and behavior_type='4';

（4）查询2014年12月12日这一天在该网站浏览商品超过5次的用户id

select user_id from user where behavior_type='1' and substring(time,0,10)='2014-12-12' group by user_id having count(behavior_type='1')>5;

（5）统计2014年11月18日一天的用户行为，即分别有多少人浏览、收藏、加购物车、购买商品。

select count(*) from user where substring(time,0,10)='2014-11-18' group by behavior_type;

（7）统计2014年12月12日购物力最强的前10位客户

select user_id, count(behavior_type='4') as num from user where substring(time,0,10)='2014-12-12' group by user_id order by num desc limit 10;

（8）统计网站每个月卖出的商品的个数

select count(*) from user where behavior_type='4' group by substring(time,5,7);

（9）查询2014年12月12日所有地区用户行为分布情况

create table scan(province STRING,behaviour_type string,scan INT) COMMENT 'This is the search of bigdataday' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE;//创建新的数据表进行存储

insert overwrite table scan select province,behavior_type,count(behavior_type) from user group by (province,behavior_type);

select * from scan;

（10）请自拟一个问题，并给出解决的代码
查看user表的简单结构
desc user;