hive的简单语句

create table bike (duration int,startdate TIMESTAMP,enddate TIMESTAMP, startnum int,startstation string,endnum int,endstation string,bikenum string,type string) row format delimited fields terminated by ‘,’;
1
insert overwrite local directory ‘/root/bike01/000000_0/’ row format delimited fields terminated by ‘\t’ select count(distinct bikenum) from bike;

2
insert overwrite local directory ‘/root/bike02/000000_0/’ row format delimited fields terminated by ‘\t’ select round(avg(duration)/60000) from bike;

3
insert overwrite local directory ‘/root/bike03/000000_0/’ row format delimited fields terminated by ‘\t’ select startstation,count(*) as sum from bike group by startstation order by sum desc ,startstation asc limit 10;

5,题给出共享单车单日租赁排行榜

mkdir - p /root/bike04/000000_0/

create table day as select to_date(startdate) startdate from bike;

insert overwrite local directory ‘/root/bike04/000000_0/’ row format delimited fields terminated by ‘\t’ select startdate ,count(*) as sum from day group by startdate order by sum desc limit 5;

6,修车

mkdir -p /root/bike05/000000_0

insert overwrite local directory ‘/root/bike05/000000_0/’ row format delimited fields terminated by ‘\t’ select bikenum ,count(*) as sum from bike group by bikenum order by sum desc ,bikenum asc limit 10;

7 可以会员推广的地区

mkdir -p /root/bike06/000000_0

insert overwrite local directory ‘/root/bike06/000000_0/’ row format delimited fields terminated by ‘\t’ select startstation, count(*) as sum from bike group by startstation order by sum desc ,startstation asc limit 10;

insert overwrite local directory ‘/root/bike06/’ row format delimited fields terminated by ‘\t’ select startstation, count(*) as sum from bike group by startstation order by sum desc ,startstation asc limit 10;

8,
mkdir -p /root/bike07/000000_0/

insert overwrite local directory ‘/root/bike07/’ row format delimited fields terminated by ‘\t’ select endstation, count(*) as sum from bike group by endstation order by sum desc ,endstation asc limit 10;

insert overwrite local directory ‘/root/bike07/000000_0/’ row format delimited fields terminated by ‘\t’ select endstation, count(*) as sum from bike group by endstation order by sum desc ,endstation asc limit 10;

============================================
1,将数据上传

创建HDFS下路径/college

hadoop fs -mkdir -p /college #补充命令

将数据上传至HDFS

hadoop fs -put /root/college/person.csv /college/

查看HDFS文件

hadoop fs -ls /college/

2,建表 导数
进入hive客户端,创建数据库hive,并使用;
创建person表
create table person (
age double,
workclass string,
fnlwgt string,
edu string,
edu_num double,
marital_status string,
occupation string,
relationship string,
race string,
sex string,
gain string,
loss string,
hours double,
native string,
income string
) ??? ; # 补充命令,设置分隔符

load data local inpath ‘/root/college/person.csv’ into table ???;

insert overwrite local directory ‘/root/college000/01’
row format delimited fields ??? by ‘\t’ # 补充命令
select count(*) from person;

3,计算较高收入人群占整体数据的比例,结果写入本地/root/college011/。

set hive.strict.checks.cartesian.product=false; # 默认不允许笛卡尔操作,这里允许操作
insert overwrite local directory ‘/root/college011/’
SELECT round((t2.v / t4.s),2)
FROM (SELECT count() AS v FROM person t1 where t1.income = ‘>50K’) t2
JOIN (SELECT count(
) as s FROM person t3) t4;

4,计算学历为本科的人员在调查中的占比,结果写入本地/root/college012/。

insert overwrite local directory ‘/root/college012/’
SELECT round((t2.v / t4.s),2)
FROM (SELECT count() AS v FROM person t1 where t1.edu=‘Bachelors’) t2
JOIN (SELECT count(
) as s FROM person t3) t4;

5,计算青年群体中高收入年龄层排行

insert overwrite local directory ‘/root/college013/’
row format delimited fields terminated by ‘\t’
select age ,count(*) as sum from person
where age>=15 and age <=34 and income = ‘>50K’
group by age
order by sum desc,age asc limit 10;

6,计算男性群体中高收入职业排行,

insert overwrite local directory ‘/root/college014/’
row format delimited fields terminated by ‘\t’
select occupation,count(*) as sum from person
where sex = ‘Male’ and income = ‘>50K’
group by occupation
order by sum desc,occupation asc limit 5;

7,对未婚人群高收入职业排行,

insert overwrite local directory ‘/root/college015/’
row format delimited fields terminated by ‘\t’
select occupation,count(*) as sum from person
where marital_status = ‘Never-married’ and income = ‘>50K’
group by occupation
order by sum desc,occupation asc limit 5;

8,统计性别对于收入的影响,

insert overwrite local directory ‘/root/college016/’
row format delimited fields terminated by ‘\t’
SELECT t2.sex ,round((t2.v / t4.s),2)
FROM (SELECT t1.sex as sex , count() AS v FROM person t1 where t1.income = ‘>50K’ group by sex order by v desc ) t2
JOIN (SELECT count(
) as s FROM person t3 where t3.income = ‘>50K’) t4;

9,统计教育程度对于收入的影响,
先按照数量进行降序排列,再按照教育程度名称升序排列;

insert overwrite local directory ‘/root/college017/’
row format delimited fields terminated by ‘\t’
select edu ,count(*) as sum from person
where income = ‘>50K’
group by edu
order by sum desc ,edu asc ;

10,计算不同收入的平均工作时间,
格式:对于结果中的二维数据,要求使用“\t”作为声明文件分隔符;
复合排列:先按照数量进行降序排列,再按照教育程度名称升序排列;
其他说明:根据收入类型对平均工时进行计算,结果四舍五入,保留整数。

insert overwrite local directory ‘/root/college018/’
row format delimited fields terminated by ‘\t’
select income ,round(avg(hours)) as sum from person
group by income
order by sum desc ;

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

醉卧千山下,诗酒趁年华。

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值