Zeppelin建表查表练习

%sh
mkdir -p /opt/hivedemo/data/
show databases;

%sh
cd /opt/hivedemo/data
ls
echo '---------------------------'
wc -l transaction_details.csv
echo '--------------------------'
wc -l store_review.csv
head -2 store_review.csv
echo '--------------------------'
wc -l store_details.csv
head -6 store_details.csv
echo '---------------------------'
wc -l customer_details.csv
head -3 customer_details.csv

%sh
cd /opt/hivedemo/data
hdfs dfs -mkdir -p /opt/hivedemo/data/customer
hdfs dfs -mkdir -p /opt/hivedemo/data/transaction
hdfs dfs -mkdir -p /opt/hivedemo/data/store
hdfs dfs -mkdir -p /opt/hivedemo/data/review
hdfs dfs -ls /opt/hivedemo/data
hdfs dfs -put ./customer_details.csv /opt/hivedemo/data/customer
hdfs dfs -put ./transaction_details.csv /opt/hivedemo/data/transaction
hdfs dfs -put ./store_details.csv /opt/hivedemo/data/store
hdfs dfs -put ./store_review.csv /opt/hivedemo/data/review

%hive
--create database shopping;
show databases;

%hive
use shopping;
create external table if not exists ext_customer_details(
customer_id string,
first_name string,
last_name string,
email string,
gender string,
address string,
country string,
language string,
job string,
credit_type string,
credit_no string
)
row format delimited fields terminated by ','
location  '/opt/hivedemo/data/customer/'
tblproperties("skip.header.line.count"="1")

%hive
create external table if not exists ext_store_details(
store_id string,
store_name string,
employee_number int
)
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
location  '/opt/hivedemo/data/store/'
tblproperties("skip.header.line.count"="1")

%hive
create external table if not exists ext_store_review(
transaction_id string,
store_id string,
review_score int
)
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
location  '/opt/hivedemo/data/review/'
tblproperties("skip.header.line.count"="1")

%hive
--ext_transaction_details
create external table if not exists ext_transaction_details(
transaction_id string,
customer_id string,
store_id string,
price decimal(8,2),
product string,
date string,
time string
)
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
location  '/opt/hivedemo/data/transaction/'
tblproperties("skip.header.line.count"="1")

%hive
with
t1 as (select *,row_number() over(partition by transaction_id  order by 8 asc) as rn   from ext_transaction_details) select *  from t1 where rn>1

%hive
with
t1 as (select row_number() over(partition by transaction_id  order by transaction_id  asc) as rn,*   from ext_transaction_details) select *  from t1 where rn>1

%hive
create view if not exists vw_store_review as 
select * from ext_store_review where review_score='';

%hive
create view if not exists vw_customer_details as 
select customer_id,
first_name,
unbase64(last_name)  last_name,
unbase64(email)  email,
gender,
unbase64(address)  address,
country,
language,
job,
credit_type,
unbase64(concat(unbase64(credit_no),"kb15")) credit_no
from ext_customer_details

%hive
create table if not exists transaction_details(
transaction_id string,
customer_id string,
store_id string,
price decimal(8,2),
product string,
purchase_time string,
purchase_date date
)
partitioned by (purchase_month string)

%hive
select 
transaction_id,
customer_id,
store_id,
price,
product,
time,
date,
from_unixtime(unix_timestamp(date,'yyyy-MM-dd'),'yyyy-MM') as purchase_month,
row_number() over(partition by transaction_id order by store_id ) as rn
from ext_transaction_details

%hive
set hive.exec.dynamic.partition.mode=nonstrict

%hive
select from_unixtime(unix_timestamp("2021-11-16",'yyyy-MM-dd'),'yyyy-MM') as purchase_month

%hive
with
t1 as (
select 
transaction_id,
customer_id,
store_id,
price,
product,
time,
date,
from_unixtime(unix_timestamp(date,'yyyy-MM-dd'),'yyyy-MM') as purchase_month,
row_number() over(partition by transaction_id order by store_id ) as rn
from ext_transaction_details
)
insert overwrite table transaction_details partition(purchase_month)
select 
if(rn=1,transaction_id,concat(transaction_id,'_rep',rn)),
customer_id,
store_id,
price,
product,
time,
date,
purchase_month
from t1

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值