which hive
cd /usr/local/hive/
ls
cd conf
ls
vi hive-site.xml
查找mysql中的内容
ssh bigdata003
mysql -u hive -phive2017 -D hive
show tables;
select * from DBS \G;
exit
ssh bigdata001
//创建自己的数据库
create databases luolin;
use luolin;
//创建一个citys表
CREATE TABLE citys(
province_code INT,
province_name string,
city_code INT,
city_name string
)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n';
//加载需要处理的数据文件
load data local inpath '/home/bigdata/hive/city.txt' into table citys;
//sql基础回顾
select province_name,count(city_name) as total from citys group by province_name order by total desc limit 10;
select distinct province_name from citys;
select count(*) from ( select province_name from citys group by province_name ) a;
select count(distinct province_name) from citys;
//创建一个city_ex表
CREATE TABLE city_ex(
province_code int,
province_name string,
city_code int,
city_name string
)
ROW FORMAT DELIMITED FIELDS
TERMINATED BY ','
LINES TERMINATED BY '\n'
LOCATION '/user/luo/city/';
//检查四川的数据
select * from city_ex where province_name = '四川';
// 创建一个user表
CREATE TABLE user(
uid INT,
city_code INT,
model string,
access string
)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n';
//加载远程数据
load data local inpath '/home/bigdata/tanqi/hive/user.txt' into table user;
//创建分区表
CREATE TABLE user_daily(
uid INT,
city_code INT,
model string,
access string
)
partitioned by (p_date string);
//插入分区
INSERT OVERWRITE TABLE user_daily PARTITION (p_date='2017-09-01') SELECT * FROM user;
//显示分区
show partitions user_daily;
//动态插入分区
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table user_daily partition(p_date)
select *,'2017-09-02' from user union all select *,'2017-09-03' from user;
//修改分区
alter table user_daily partition (p_date = '2017-09-01') rename to partition (p_date = '20170901');
//删除分区
alter table user_daily drop partition ( p_date = '2017-09-02');
//作业解答
select * from user where access='WIFI';
select city_code,count(access) as total from user where access = 'WIFI' group by city_code order by total desc limit 5;
select access,city_code,count(*) as total from user group by access,city_code having total>= 3 order by total desc;
select access ,count(*) as total from user where lower (model) like '%ne%' group by access order by total desc;
//各中if case when collect
//求各占用的百分比
select sum(if(access=='WIFI' ,1,0)) / count(*) from user;
//各段号的分组统计
select
case
when uid % 10 in (0,1,2,3) then '0-3'
when uid % 10 in (4,5,6,7) then '4-7'
else '8-9'
end as interval,
count(*) as cnt
from user
group by
case
when uid % 10 in (0,1,2,3) then '0-3'
when uid % 10 in (4,5,6,7) then '4-7'
else '8-9'
end;
//去重
select collect_set(access) from user;
//统计user表city_code最多的4个城市的access分布情况,存储为map
select city_code,count(*) as cnt,
map(
'WIFI',sum(if(access='WIFI',1,0)),
'2G',sum(if(access='2G',1,0)),
'3G',sum(if(access='3G',1,0)),
'4G',sum(if(access='4G',1,0))
) as map_reten_acc
from user
group by city_code
order by cnt desc limit 4;
//左连接 left join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
left join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//右连接 right join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
right join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//内连接inner join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
inner join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//全连接 full join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
full join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//窗口函数
//分组topN
select access,city_code, uid
from
(
select uid, access, city_code,
row_number() over (partition by access order by city_code desc) as row_num
from user
) a
where row_num = 1;
//累计和
select p_date,
sum(cnt) over(order by p_date asc rows between unbounded preceding and current row)
from
(
select p_date, count(*) as cnt
from user_daily
where p_date between '2017-09-01' and '2017-09-30'
group by p_date
)a
;
cd /usr/local/hive/
ls
cd conf
ls
vi hive-site.xml
查找mysql中的内容
ssh bigdata003
mysql -u hive -phive2017 -D hive
show tables;
select * from DBS \G;
exit
ssh bigdata001
//创建自己的数据库
create databases luolin;
use luolin;
//创建一个citys表
CREATE TABLE citys(
province_code INT,
province_name string,
city_code INT,
city_name string
)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n';
//加载需要处理的数据文件
load data local inpath '/home/bigdata/hive/city.txt' into table citys;
//sql基础回顾
select province_name,count(city_name) as total from citys group by province_name order by total desc limit 10;
select distinct province_name from citys;
select count(*) from ( select province_name from citys group by province_name ) a;
select count(distinct province_name) from citys;
//创建一个city_ex表
CREATE TABLE city_ex(
province_code int,
province_name string,
city_code int,
city_name string
)
ROW FORMAT DELIMITED FIELDS
TERMINATED BY ','
LINES TERMINATED BY '\n'
LOCATION '/user/luo/city/';
//检查四川的数据
select * from city_ex where province_name = '四川';
// 创建一个user表
CREATE TABLE user(
uid INT,
city_code INT,
model string,
access string
)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n';
//加载远程数据
load data local inpath '/home/bigdata/tanqi/hive/user.txt' into table user;
//创建分区表
CREATE TABLE user_daily(
uid INT,
city_code INT,
model string,
access string
)
partitioned by (p_date string);
//插入分区
INSERT OVERWRITE TABLE user_daily PARTITION (p_date='2017-09-01') SELECT * FROM user;
//显示分区
show partitions user_daily;
//动态插入分区
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table user_daily partition(p_date)
select *,'2017-09-02' from user union all select *,'2017-09-03' from user;
//修改分区
alter table user_daily partition (p_date = '2017-09-01') rename to partition (p_date = '20170901');
//删除分区
alter table user_daily drop partition ( p_date = '2017-09-02');
//作业解答
select * from user where access='WIFI';
select city_code,count(access) as total from user where access = 'WIFI' group by city_code order by total desc limit 5;
select access,city_code,count(*) as total from user group by access,city_code having total>= 3 order by total desc;
select access ,count(*) as total from user where lower (model) like '%ne%' group by access order by total desc;
//各中if case when collect
//求各占用的百分比
select sum(if(access=='WIFI' ,1,0)) / count(*) from user;
//各段号的分组统计
select
case
when uid % 10 in (0,1,2,3) then '0-3'
when uid % 10 in (4,5,6,7) then '4-7'
else '8-9'
end as interval,
count(*) as cnt
from user
group by
case
when uid % 10 in (0,1,2,3) then '0-3'
when uid % 10 in (4,5,6,7) then '4-7'
else '8-9'
end;
//去重
select collect_set(access) from user;
//统计user表city_code最多的4个城市的access分布情况,存储为map
select city_code,count(*) as cnt,
map(
'WIFI',sum(if(access='WIFI',1,0)),
'2G',sum(if(access='2G',1,0)),
'3G',sum(if(access='3G',1,0)),
'4G',sum(if(access='4G',1,0))
) as map_reten_acc
from user
group by city_code
order by cnt desc limit 4;
//左连接 left join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
left join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//右连接 right join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
right join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//内连接inner join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
inner join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//全连接 full join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
full join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//窗口函数
//分组topN
select access,city_code, uid
from
(
select uid, access, city_code,
row_number() over (partition by access order by city_code desc) as row_num
from user
) a
where row_num = 1;
//累计和
select p_date,
sum(cnt) over(order by p_date asc rows between unbounded preceding and current row)
from
(
select p_date, count(*) as cnt
from user_daily
where p_date between '2017-09-01' and '2017-09-30'
group by p_date
)a
;