1
create table if not exists stocks
(
`exchange` string,
`symbol` string,
`ymd` string,
`price_open` float,
`price_high` float,
`price_low` float,
`price_close` float,
`volume` int,
`price_adj_close` float
)
row format delimited fields terminated by ',';
2
create external table if not exists dividends
(
`ymd` string,
`dividend` float
)
partitioned by(`exchange` string ,`symbol` string)
row format delimited fields terminated by ',';
3
load data local inpath '/home/hadoop/data/stocks/stocks.csv' overwrite into table stocks;
4
create external table if not exists dividends_unpartitioned
(
`exchange` string ,
`symbol` string,
`ymd` string,
`dividend` float
)
row format delimited fields terminated by ',';
load data local inpath '/home/hadoop/data/dividends/dividends.csv' overwrite into table dividends_unpartitioned;
5
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=1000;
insert overwrite table dividends partition(`exchange`,`symbol`) select `ymd`,`dividend`,`exchange`,`symbol` from dividends_unpartitioned;
6
select s.ymd,s.symbol,s.price_close
from stocks s
LEFT SEMI JOIN
dividends d
ON s.ymd=d.ymd and s.symbol=d.symbol
where s.symbol='IBM' and year(ymd)>=2000;
7
select ymd,
case
when price_close-price_open>0 then 'rise'
when price_close-price_open<0 then 'fall'
else 'unchanged'
end as situation
from stocks
where symbol='AAPL' and substring(ymd,0,7)='2008-10';
8
select `exchange`,symbol,ymd,price_close-price_open as `diff`
from
(
select *
from stocks
order by price_close-price_open desc
limit 1
)t;
9
select
year(ymd) as `year`,
avg(price_adj_close) as avg_price from stocks
where `exchange`='NASDAQ' and symbol='AAPL'
group by year(ymd)
having avg_price > 50;
10
select t2.`year`,symbol,t2.avg_price
from
(
select
*,row_number() over(partition by t1.`year` order by t1.avg_price desc) as `rank`
from
(
select
year(ymd) as `year`,
symbol,
avg(price_adj_close) as avg_price
from stocks
group by year(ymd),symbol
)t1
)t2
where t2.`rank`<=3;