hive
create database db_hcd
use db_hcd
#创建外部表
CREATE EXTERNAL TABLE hadoop_course_design(id INT,user_id STRING,item_id STRING,behavior_type INT,item_category STRING,the_date DATE,province STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE LOCATION '/hadoop_course_design'; #这是hadoop上面的目录
#查看表结构
desc hadoop_course_design;
#查看前10行
select * from hadoop_course_design limit 10;
Hive数据分析
(1) 根据user_id查询不重复的数据有多少行。
#select count (*) from hadoop_course_design;查看有多少条数据
#select distinct user_id from hadoop_course_design; #将该字段去重
select count(distinct user_id) from hadoop_course_design;
(2)查询不重复的数据有多少行。
select count(*) from (select user_id,item_id,behavior_type,item_category,the_date,province from hadoop_course_design group by user_id,item_id,behavior_type,item_category,the_date,province)a;
#或者
select count(distinct user_id,item_id,behavior_type,item_category,the_date,province) from hadoop_course_design;
(3)统计时间在2014-12-11和2014-12-12这两天商品售出总和。
select count(*) from hadoop_course_design where behavior_type='4' and the_date >= '2014-12-11' and the_date <= '2014-12-12';
(4)以月的第n天为统计单位,依次显示第n天网站卖出去的商品的个数。
select day(the_date),count(*) from hadoop_course_design where behavior_type='4' group by day (the_date) ;
#下面是排序
#select day(the_date) as mydate,count(*) from hadoop_course_design where #behavior_type='4' group by day(the_date) order by mydate ASC;
select count(*) from hadoop_course_design where province = '江西' and the_date = '2014-12-12' and behavior_type='4';
(6)统计出2014-12-11的购买数、浏览数,分析购买率
SELECT count(if(behavior_type='4',1,NULL)),COUNT(if(behavior_type='1',1,null)), count(if(behavior_type='4',1,NULL))/COUNT(if(behavior_type='1',1,null))*100 from hadoop_course_design where the_date ='2014-12-11';
SELECT COUNT(if(user_id='10001082',1,NULL)),COUNT(user_id),COUNT(if(user_id='10001082',1,NULL))/COUNT(user_id) FROM hadoop_course_design WHERE behavior_type='1' and the_date ='2014-12-12';
select user_id,count(behavior_type) as number from hadoop_course_design where behavior_type='4' and the_date='2014-12-12' group by user_id having count(behavior_type)>5 order by number DESC;
(9)查询时间为2014-12-12每个地区当天的浏览数。
select province , count(behavior_type) from hadoop_course_design where the_date='2014-12-12' and behavior_type='1' group by province;
(10)取时间为2014-12-12,求当天各地区的购买货物的数量。
select province , count(behavior_type) from hadoop_course_design where the_date='2014-12-12' and behavior_type='4' group by province;
Hive数据导出到mysql
创建数据库,赋权限
#源数据加个_raw
create database hive_hcd;
grant all privileges on hive_hcd.* to 'hive'@'%';
flush privileges; /*刷新权限*/
show variables like 'character%'; #显示编码格式
set character_set_database=utf8;
set character_set_server=utf8;
#导出前更改一下编码,不然中文不显示
#源数据加个_raw
sqoop export --connect "jdbc:mysql://192.168.207.130:3306/hive_hcd?useUnicode=true&characterEncoding=utf-8" --username hive --password 123456 --table table_01 --export-dir '/hadoop_course_design/result.txt' --fields-terminated-by '\t'
#useUnicode=true&characterEncoding=utf-8作用是:指定字符的编码、解码格式
在hive上依次对各个问题筛选出最终数据,并建立相关的内部表
1.双十二当天的各个省的发货量。
create table question1 as select province , count(behavior_type) as total_shipment from hadoop_course_design where the_date='2014-12-12' and behavior_type='4' group by province;
2.每天的浏览量和购买量。
create table question2 as SELECT the_date,count(if(behavior_type='1',1,null))as liulan_count,count(if(behavior_type='4',1,null))as goumai_count from hadoop_course_design group by the_date ;
3. 每天的购买率。
create table question3 as select the_date,round(goumai_count/liulan_count,7)*100 as rate from question2;
4. 双十二当天top10的用户信息(包括:user_id,购买数量,省份)
create table question4 as select user_id,province,count(behavior_type='4') as goumai_count from hadoop_course_design where the_date='2014-12-12' group by user_id,province order by goumai_count desc limit 10;
#查看本地secure-file-priv变量配置(只能在其路径下保存文件)
mysql> show variables like '%secure%';
select * from q1 into outfile '/var/lib/mysql-files/q1.csv' fields terminated by "," escaped by '' optionally enclosed by '' lines terminated by '\n' ;
select * from q2 into outfile '/var/lib/mysql-files/q2.csv' fields terminated by "," escaped by '' optionally enclosed by '' lines terminated by '\n' ;
select * from q3 into outfile '/var/lib/mysql-files/q3.csv' fields terminated by "," escaped by '' optionally enclosed by '' lines terminated by '\n' ;
select * from q4 into outfile '/var/lib/mysql-files/q4.csv' fields terminated by "," escaped by '' optionally enclosed by '' lines terminated by '\n' ;