创建表并插入数据
在mysql创建表并导入数据
create table user_click (
uid varchar(255),
sku varchar(255),
os varchar(255)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
insert into user_click(uid,sku,os) values ('u01','鼠标','ios');
insert into user_click(uid,sku,os) values ('u02','键盘','android');
insert into user_click(uid,sku,os) values ('u03','显示器','ios');
insert into user_click(uid,sku,os) values ('u04','托特包','ios');
create table user_info (
uid varchar(255),
name varchar(255)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
insert into user_info(uid,name) values ('u01','子航');
insert into user_info(uid,name) values ('u02','祖安');
insert into user_info(uid,name) values ('u03','海哥');
insert into user_info(uid,name) values ('u04','轩轩');
在hive上建表
create table user_click (
uid string,
sku string,
os string
)
row format delimited fields terminated by ','
create table user_info (
uid string,
name string
)
row format delimited fields terminated by ','
将mysql上的数据导入到hive上
sqoop import \
--connect jdbc:mysql://bigdata12:3306/test01 \
--username root \
--password 123456 \
--mapreduce-job-name 'user_click' \
--direct \
--fields-terminated-by ',' \
-m 1 \
--columns "uid,sku,os" \
--table "user_click" \
--hive-import \
--hive-overwrite \
--hive-database test01 \
--hive-table user_click
sqoop import \
--connect jdbc:mysql://bigdata12:3306/test01 \
--username root \
--password 123456 \
--mapreduce-job-name 'user_info' \
--direct \
--fields-terminated-by ',' \
-m 1 \
--columns "uid,name" \
--table "user_info" \
--hive-import \
--hive-overwrite \
--hive-database test01 \
--hive-table user_info
数据分析
1.统计uid、name、sku,os,每个用户点击商品次数
select
a.uid as uid,
name,
sku,
os,
cnt
from(
select
a.uid as uid,
sku,
os,
cnt
from(
select
uid,
count(sku) as cnt
from user_click
group by uid
) as a inner join(
select *
from user_click
) as b
on a.uid=b.uid
) as a left join(
select *
from user_info
) as b
on a.uid=b.uid
group by a.uid,name,sku,os,cnt
2.统计uid、name、sku,os,取出table中重复数据,额外添加一个字段排序,不重复数据 标识1
select
a.uid as uid,
name,
sku,
os,
rn
from(
select
uid,
sku,
os,
row_number() over(partition by uid order by sku) as rn
from user_click
) as a left join(
select *
from user_info
) as b
on a.uid=b.uid
3.统计uid、name、sku,os,取出table中重复数据,额外添加一个字段排序,不重复数据table中数据也做排序
select
a.uid as uid,
name,
sku,
os,
rn
from(
select
uid,
sku,
os,
rn
from(
select
uid,
sku,
os,
row_number() over(partition by uid order by uid) as rn,
count(sku) over(partition by uid) as cnt
from user_click
) as a
where cnt != 1
union all
select
uid,
sku,
os,
row_number() over(order by uid) as rn
from(
select
uid,
sku,
os,
count(sku) over(partition by uid) as cnt
from user_click
) as a
where cnt=1
) as a left join(
select *
from user_info
) as b
on a.uid=b.uid
将数据分析的结果抽成一张表
抽成的表需要指定分隔符
create table m1
row format delimited fields terminated by ','
as
select
a.uid as uid,
name,
sku,
os,
cnt
from(
select
a.uid as uid,
sku,
os,
cnt
from(
select
uid,
count(sku) as cnt
from user_click
group by uid
) as a inner join(
select *
from user_click
) as b
on a.uid=b.uid
) as a left join(
select *
from user_info
) as b
on a.uid=b.uid;
create table m2
row format delimited fields terminated by ','
as
select
a.uid as uid,
name,
sku,
os,
rn
from(
select
uid,
sku,
os,
row_number() over(partition by uid order by sku) as rn
from user_click
) as a left join(
select *
from user_info
) as b
on a.uid=b.uid
create table m3
row format delimited fields terminated by ','
as
select
a.uid as uid,
name,
sku,
os,
rn
from(
select
uid,
sku,
os,
rn
from(
select
uid,
sku,
os,
row_number() over(partition by uid order by uid) as rn,
count(sku) over(partition by uid) as cnt
from user_click
) as a
where cnt != 1
union all
select
uid,
sku,
os,
row_number() over(order by uid) as rn
from(
select
uid,
sku,
os,
count(sku) over(partition by uid) as cnt
from user_click
) as a
where cnt=1
) as a left join(
select *
from user_info
) as b
on a.uid=b.uid
将处理完需求抽成的那张表导出到mysql
导出之前,mysql需要有表
create table m1 (
uid varchar(255),
name varchar(255),
sku varchar(255),
os varchar(255),
cnt int(15)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
sqoop export \
--connect "jdbc:mysql://bigdata12:3306/test01" \
--username root \
--password 123456 \
--table m1 \
--mapreduce-job-name 'mask1' \
--fields-terminated-by ',' \
--export-dir /user/hive/warehouse/test01.db/m1/*
create table m2 (
uid varchar(255),
name varchar(255),
sku varchar(255),
os varchar(255),
rn int(15)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
sqoop export \
--connect "jdbc:mysql://bigdata12:3306/test01" \
--username root \
--password 123456 \
--table m2 \
--mapreduce-job-name 'mask2' \
--fields-terminated-by ',' \
--export-dir /user/hive/warehouse/test01.db/m2/*
create table m3 (
uid varchar(255),
name varchar(255),
sku varchar(255),
os varchar(255),
rn int(15)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
sqoop export \
--connect "jdbc:mysql://bigdata12:3306/test01" \
--username root \
--password 123456 \
--table m3 \
--mapreduce-job-name 'mask3' \
--fields-terminated-by ',' \
--export-dir /user/hive/warehouse/test01.db/m3/*
编写xxl里面的脚本
use test01;
drop table m1;
create table m1
row format delimited fields terminated by ','
as
select
a.uid as uid,
name,
sku,
os,
cnt
from(
select
a.uid as uid,
sku,
os,
cnt
from(
select
uid,
count(sku) as cnt
from user_click
group by uid
) as a inner join(
select *
from user_click
) as b
on a.uid=b.uid
) as a left join(
select *
from user_info
) as b
on a.uid=b.uid
group by a.uid,name,sku,os,cnt;
use test01;
truncate table m1;
#!/bin/bash
#报警参数
xxl_number=xxl_3
token="小红"
name="mask1"
ip="bigdata12"
phones="15124172559"
hive -f "/home/hadoop/data/mask1"
if [ $? -eq 0 ];then
echo "数据处理成功"
else
echo "数据处理失败"
ding_warning.sh "${token}" "${name}_${number}" "数据处理失败" "$ip" "$phones"
exit;
fi
mysql -uroot -p123456 < "/home/hadoop/data/m1.sql"
if [ $? -eq 0 ];then
echo "清空表内容成功"
else
echo "清空表内容失败"
ding_warning.sh "${token}" "${name}_${number}" "清空表内容失败" "$ip" "$phones"
exit;
fi
sqoop export \
--connect "jdbc:mysql://bigdata12:3306/test01" \
--username root \
--password 123456 \
--table m1 \
--mapreduce-job-name 'mask1' \
--fields-terminated-by ',' \
--export-dir /user/hive/warehouse/test01.db/m1/*
if [ $? -eq 0 ];then
echo "表导出成功"
else
echo "表导出失败"
ding_warning.sh "${token}" "${name}_${number}" "表导出失败" "$ip" "$phones"
exit;
fi
use test01;
drop table m2;
create table m2
row format delimited fields terminated by ','
as
select
a.uid as uid,
name,
sku,
os,
rn
from(
select
uid,
sku,
os,
row_number() over(partition by uid order by sku) as rn
from user_click
) as a left join(
select *
from user_info
) as b
on a.uid=b.uid;
use test01;
truncate table m2;
#!/bin/bash
#报警参数
xxl_number=xxl_4
token="小强"
name="mask2"
ip="bigdata12"
phones="15124172559"
hive -f "/home/hadoop/data/mask2"
if [ $? -eq 0 ];then
echo "数据处理成功"
else
echo "数据处理失败"
ding_warning.sh "${token}" "${name}_${number}" "数据处理失败" "$ip" "$phones"
exit;
fi
mysql -uroot -p123456 < "/home/hadoop/data/m2.sql"
if [ $? -eq 0 ];then
echo "表内容清空成功"
else
echo "表内容清空失败"
ding_warning.sh "${token}" "${name}_${number}" "表内容清空失败" "$ip" "$phones"
exit;
fi
sqoop export \
--connect "jdbc:mysql://bigdata12:3306/test01" \
--username root \
--password 123456 \
--table m2 \
--mapreduce-job-name 'mask2' \
--fields-terminated-by ',' \
--export-dir /user/hive/warehouse/test01.db/m2/*
if [ $? -eq 0 ];then
echo "导出成功"
else
echo "导出失败"
ding_warning.sh "${token}" "${name}_${number}" "导出失败" "$ip" "$phones"
exit;
fi
use test01;
drop table m3;
create table m3
row format delimited fields terminated by ','
as
select
a.uid as uid,
name,
sku,
os,
rn
from(
select
uid,
sku,
os,
rn
from(
select
uid,
sku,
os,
row_number() over(partition by uid order by uid) as rn,
count(sku) over(partition by uid) as cnt
from user_click
) as a
where cnt != 1
union all
select
uid,
sku,
os,
row_number() over(order by uid) as rn
from(
select
uid,
sku,
os,
count(sku) over(partition by uid) as cnt
from user_click
) as a
where cnt=1
) as a left join(
select *
from user_info
) as b
on a.uid=b.uid;
use test01;
truncate table m3;
#!/bin/bash
#报警参数
xxl_number=xxl_5
token="阿珍"
name="mask3"
ip="bigdata12"
phones="15124172559"
hive -f "/home/hadoop/data/mask3"
if [ $? -eq 0 ];then
echo "数据处理成功"
else
echo "数据处理失败"
ding_warning.sh "${token}" "${name}_${number}" "数据处理失败" "$ip" "$phones"
exit;
fi
mysql -uroot -p123456 < "/home/hadoop/data/m3.sql"
if [ $? -eq 0 ];then
echo "表内容清空成功"
else
echo "表内容清空失败"
ding_warning.sh "${token}" "${name}_${number}" "表内容清空失败" "$ip" "$phones"
exit;
fi
sqoop export \
--connect "jdbc:mysql://bigdata12:3306/test01" \
--username root \
--password 123456 \
--table m3 \
--mapreduce-job-name 'mask3' \
--fields-terminated-by ',' \
--export-dir /user/hive/warehouse/test01.db/m3/*
if [ $? -eq 0 ];then
echo "导出成功"
else
echo "导出失败"
ding_warning.sh "${token}" "${name}_${number}" "导出失败" "$ip" "$phones"
exit;
fi