大数据从入门到实战 - Hive基本查询操作(二)
叮嘟!这里是小啊呜的学习课程资料整理。好记性不如烂笔头,今天也是努力进步的一天。一起加油进阶吧!
一、关于此次实践
1、实战简介
本实训介绍的Hive排序、Hive数据类型和类型转换和Hive抽样查询等内容。
2、全部任务
二、实践详解
1、第1关:Hive排序
----------禁止修改----------
create database if not exists mydb;
use mydb;
create table if not exists total(
tradedate string,
tradetime string,
securityid string,
bidpx1 string,
bidsize1 int,
offerpx1 string,
bidsize2 int)
row format delimited fields terminated by ','
stored as textfile;
truncate table total;
load data local inpath '/root/files' into table total;
----------禁止修改----------
----------begin----------
select securityid,sum(bidsize1) s from total where tradedate ='20130722' group by securityid order by s desc limit 3;
----------end----------
评测
2、第2关:Hive数据类型和类型转换
----------禁止修改----------
create database if not exists mydb;
use mydb;
create table if not exists total(
tradedate string,
tradetime string,
securityid string,
bidpx1 string,
bidsize1 int,
offerpx1 string,
bidsize2 int)
row format delimited fields terminated by ','
stored as textfile;
truncate table total;
load data local inpath '/root/files' into table total;
----------禁止修改----------
----------begin----------
select securityid,sum(bidsize1*cast(bidpx1 as float)) from total where tradedate='20130725' group by securityid;
----------end----------
评测
3、第3关:Hive抽样查询
----------禁止修改----------
create database if not exists mydb;
use mydb;
create table if not exists total(
tradedate string,
tradetime string,
securityid string,
bidpx1 string,
bidsize1 int,
offerpx1 string,
bidsize2 int)
row format delimited fields terminated by ','
stored as textfile;
truncate table total;
load data local inpath '/root/files' into table total;
drop table if exists total_bucket;
----------禁止修改----------
----------begin----------
create table if not exists total_bucket(
tradedate string,
securityid string,
bidsize1 int,
bidsize2 int
)clustered by(securityid) into 6 buckets
row format delimited fields terminated by ','
stored as textfile;
set hive.enforce.bucketing = true;
insert overwrite table total_bucket
select tradedate,securityid,bidsize1,bidsize2
from total;
select tradedate,securityid,sum(bidsize1+bidsize2)
from total_bucket tablesample(bucket 2 out of 2 on securityid)
group by tradedate,securityid;
----------end----------
评测
Ending!
更多课程知识学习记录随后再来吧!
就酱,嘎啦!
注:
人生在勤,不索何获。