综合案例二

创建表并插入数据

在mysql创建表并导入数据

create table user_click (
uid varchar(255),
sku varchar(255),
os varchar(255)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
insert into user_click(uid,sku,os) values ('u01','鼠标','ios');
insert into user_click(uid,sku,os) values ('u02','键盘','android');
insert into user_click(uid,sku,os) values ('u03','显示器','ios');
insert into user_click(uid,sku,os) values ('u04','托特包','ios');
create table user_info (
uid varchar(255),
name varchar(255)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
insert into user_info(uid,name) values ('u01','子航');
insert into user_info(uid,name) values ('u02','祖安');
insert into user_info(uid,name) values ('u03','海哥');
insert into user_info(uid,name) values ('u04','轩轩');

在hive上建表

create table user_click (
uid string,
sku string,
os string
)
row format delimited fields terminated by ','
create table user_info (
uid string,
name string
)
row format delimited fields terminated by ','

将mysql上的数据导入到hive上

sqoop import \
--connect jdbc:mysql://bigdata12:3306/test01 \
--username root  \
--password 123456 \
--mapreduce-job-name 'user_click' \
--direct \
--fields-terminated-by ',' \
-m 1 \
--columns "uid,sku,os" \
--table "user_click" \
--hive-import \
--hive-overwrite \
--hive-database test01 \
--hive-table user_click
sqoop import \
--connect jdbc:mysql://bigdata12:3306/test01 \
--username root \
--password 123456 \
--mapreduce-job-name 'user_info' \
--direct \
--fields-terminated-by ',' \
-m 1 \
--columns "uid,name" \
--table "user_info" \
--hive-import \
--hive-overwrite \
--hive-database test01 \
--hive-table user_info

数据分析

1.统计uid、name、sku,os,每个用户点击商品次数

select
a.uid as uid,
name,
sku,
os,
cnt
from(
select
a.uid as uid,
sku,
os,
cnt
from(
select
uid,
count(sku) as cnt
from user_click
group by uid
) as a inner join(
select *
from user_click
) as b
on a.uid=b.uid
) as a left join(
select *
from user_info
) as b
on a.uid=b.uid
group by a.uid,name,sku,os,cnt

2.统计uid、name、sku,os,取出table中重复数据,额外添加一个字段排序,不重复数据 标识1

select
a.uid as uid,
name,
sku,
os,
rn
from(
select
uid,
sku,
os,
row_number() over(partition by uid order by sku) as rn
from user_click
) as a left join(
select *
from user_info
) as b
on a.uid=b.uid

3.统计uid、name、sku,os,取出table中重复数据,额外添加一个字段排序,不重复数据table中数据也做排序

select
a.uid as uid,
name,
sku,
os,
rn
from(
select
uid,
sku,
os,
rn
from(
select
uid,
sku,
os,
row_number() over(partition by uid order by uid) as rn,
count(sku) over(partition by uid) as cnt
from user_click
) as a
where cnt != 1
union all
select
uid,
sku,
os,
row_number() over(order by uid) as rn
from(
select
uid,
sku,
os,
count(sku) over(partition by uid) as cnt
from user_click
) as a
where cnt=1
) as a left join(
select *
from user_info
) as b
on a.uid=b.uid

将数据分析的结果抽成一张表

抽成的表需要指定分隔符

create table m1
row format delimited fields terminated by ','
as
select
a.uid as uid,
name,
sku,
os,
cnt
from(
select
a.uid as uid,
sku,
os,
cnt
from(
select
uid,
count(sku) as cnt
from user_click
group by uid
) as a inner join(
select *
from user_click
) as b
on a.uid=b.uid
) as a left join(
select *
from user_info
) as b
on a.uid=b.uid;
create table m2
row format delimited fields terminated by ','
as
select
a.uid as uid,
name,
sku,
os,
rn
from(
select
uid,
sku,
os,
row_number() over(partition by uid order by sku) as rn
from user_click
) as a left join(
select *
from user_info
) as b
on a.uid=b.uid
create table m3
row format delimited fields terminated by ','
as
select
a.uid as uid,
name,
sku,
os,
rn
from(
select
uid,
sku,
os,
rn
from(
select
uid,
sku,
os,
row_number() over(partition by uid order by uid) as rn,
count(sku) over(partition by uid) as cnt
from user_click
) as a
where cnt != 1
union all
select
uid,
sku,
os,
row_number() over(order by uid) as rn
from(
select
uid,
sku,
os,
count(sku) over(partition by uid) as cnt
from user_click
) as a
where cnt=1
) as a left join(
select *
from user_info
) as b
on a.uid=b.uid

将处理完需求抽成的那张表导出到mysql

导出之前,mysql需要有表

create table m1 (
uid varchar(255),
name varchar(255),
sku varchar(255),
os varchar(255),
cnt int(15)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
sqoop export \
--connect "jdbc:mysql://bigdata12:3306/test01" \
--username root \
--password 123456 \
--table m1 \
--mapreduce-job-name 'mask1' \
--fields-terminated-by ',' \
--export-dir /user/hive/warehouse/test01.db/m1/*
create table m2 (
uid varchar(255),
name varchar(255),
sku varchar(255),
os varchar(255),
rn int(15)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
sqoop export \
--connect "jdbc:mysql://bigdata12:3306/test01" \
--username root  \
--password 123456 \
--table m2 \
--mapreduce-job-name 'mask2' \
--fields-terminated-by ',' \
--export-dir /user/hive/warehouse/test01.db/m2/*
create table m3 (
uid varchar(255),
name varchar(255),
sku varchar(255),
os varchar(255),
rn int(15)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
sqoop export \
--connect "jdbc:mysql://bigdata12:3306/test01" \
--username root  \
--password 123456 \
--table m3 \
--mapreduce-job-name 'mask3' \
--fields-terminated-by ',' \
--export-dir /user/hive/warehouse/test01.db/m3/*

编写xxl里面的脚本

use test01;

drop table m1;

create table m1
row format delimited fields terminated by ','
as
select
a.uid as uid,
name,
sku,
os,
cnt
from(
select
a.uid as uid,
sku,
os,
cnt
from(
select
uid,
count(sku) as cnt
from user_click
group by uid
) as a inner join(
select *
from user_click
) as b
on a.uid=b.uid
) as a left join(
select *
from user_info
) as b
on a.uid=b.uid
group by a.uid,name,sku,os,cnt;
use test01;
truncate table m1;
#!/bin/bash
#报警参数
xxl_number=xxl_3
token="小红"
name="mask1"
ip="bigdata12"
phones="15124172559"
hive -f "/home/hadoop/data/mask1"
if [ $? -eq 0 ];then
	echo "数据处理成功"
else
	echo "数据处理失败"
	ding_warning.sh "${token}" "${name}_${number}" "数据处理失败" "$ip" "$phones"
	exit;
fi
mysql -uroot -p123456 < "/home/hadoop/data/m1.sql"
if [ $? -eq 0 ];then
	echo "清空表内容成功"
else
	echo "清空表内容失败"
	ding_warning.sh "${token}" "${name}_${number}" "清空表内容失败" "$ip" "$phones"
	exit;
fi
sqoop export \
--connect "jdbc:mysql://bigdata12:3306/test01" \
--username root  \
--password 123456 \
--table m1 \
--mapreduce-job-name 'mask1' \
--fields-terminated-by ',' \
--export-dir /user/hive/warehouse/test01.db/m1/*
if [ $? -eq 0 ];then
	echo "表导出成功"
else
	echo "表导出失败"
	ding_warning.sh "${token}" "${name}_${number}" "表导出失败" "$ip" "$phones"
	exit;
fi
use test01;

drop table m2;

create table m2 
row format delimited fields terminated by ','
as
select
a.uid as uid,
name,
sku,
os,
rn
from(
select
uid,
sku,
os,
row_number() over(partition by uid order by sku) as rn
from user_click
) as a left join(
select *
from user_info
) as b
on a.uid=b.uid;
use test01;
truncate table m2;
#!/bin/bash
#报警参数
xxl_number=xxl_4
token="小强"
name="mask2"
ip="bigdata12"
phones="15124172559"
hive -f "/home/hadoop/data/mask2"
if [ $? -eq 0 ];then
	echo "数据处理成功"
else
	echo "数据处理失败"
	ding_warning.sh "${token}" "${name}_${number}" "数据处理失败" "$ip" "$phones"
	exit;
fi
mysql -uroot -p123456 < "/home/hadoop/data/m2.sql"
if [ $? -eq 0 ];then
	echo "表内容清空成功"
else
	echo "表内容清空失败"
	ding_warning.sh "${token}" "${name}_${number}" "表内容清空失败" "$ip" "$phones"
	exit;
fi
sqoop export \
--connect "jdbc:mysql://bigdata12:3306/test01" \
--username root  \
--password 123456 \
--table m2 \
--mapreduce-job-name 'mask2' \
--fields-terminated-by ',' \
--export-dir /user/hive/warehouse/test01.db/m2/*
if [ $? -eq 0 ];then
	echo "导出成功"
else
	echo "导出失败"
	ding_warning.sh "${token}" "${name}_${number}" "导出失败" "$ip" "$phones"
	exit;
fi
use test01;

drop table m3;

create table m3 
row format delimited fields terminated by ','
as
select
a.uid as uid,
name,
sku,
os,
rn
from(
select
uid,
sku,
os,
rn
from(
select
uid,
sku,
os,
row_number() over(partition by uid order by uid) as rn,
count(sku) over(partition by uid) as cnt
from user_click
) as a
where cnt != 1
union all
select
uid,
sku,
os,
row_number() over(order by uid) as rn
from(
select
uid,
sku,
os,
count(sku) over(partition by uid) as cnt
from user_click
) as a
where cnt=1
) as a left join(
select *
from user_info
) as b
on a.uid=b.uid;

use test01;
truncate table m3;
#!/bin/bash
#报警参数
xxl_number=xxl_5
token="阿珍"
name="mask3"
ip="bigdata12"
phones="15124172559"
hive -f "/home/hadoop/data/mask3"
if [ $? -eq 0 ];then
	echo "数据处理成功"
else
	echo "数据处理失败"
	ding_warning.sh "${token}" "${name}_${number}" "数据处理失败" "$ip" "$phones"
	exit;
fi
mysql -uroot -p123456 < "/home/hadoop/data/m3.sql"
if [ $? -eq 0 ];then
	echo "表内容清空成功"
else
	echo "表内容清空失败"
	ding_warning.sh "${token}" "${name}_${number}" "表内容清空失败" "$ip" "$phones"
	exit;
fi
sqoop export \
--connect "jdbc:mysql://bigdata12:3306/test01" \
--username root  \
--password 123456 \
--table m3 \
--mapreduce-job-name 'mask3' \
--fields-terminated-by ',' \
--export-dir /user/hive/warehouse/test01.db/m3/*
if [ $? -eq 0 ];then
	echo "导出成功"
else
	echo "导出失败"
	ding_warning.sh "${token}" "${name}_${number}" "导出失败" "$ip" "$phones"
	exit;
fi
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值