本文承接我的《大数据项目离线数仓》把ADS层的数据导出到MySQL,用于可视化的制作。
《大数据项目离线数仓》的其它文章在我的专栏《项目》中。
目录
1.1创建MySQL数据库
创建gmall_report数据库
CREATE DATABASE `gmall_report` CHARACTER SET 'utf8' COLLATE 'utf8_general_ci';
1.2创建MySQL表
创建表(mysql表和ADS中的表同步,但不需要全部创建,根据自己的可视化内容创建)
下面给出我的一些mysql表:
###创建用户主题表
CREATE TABLE `ads_user_topic` (
`dt` date NOT NULL,
`day_users` bigint(255) NULL DEFAULT NULL,
`day_new_users` bigint(255) NULL DEFAULT NULL,
`day_new_payment_users` bigint(255) NULL DEFAULT NULL,
`payment_users` bigint(255) NULL DEFAULT NULL,
`users` bigint(255) NULL DEFAULT NULL,
`day_users2users` double(255, 2) NULL DEFAULT NULL,
`payment_users2users` double(255, 2) NULL DEFAULT NULL,
`day_new_users2users` double(255, 2) NULL DEFAULT NULL,
PRIMARY KEY (`dt`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;
###创建地区主题表
CREATE TABLE `ads_area_topic` (
`dt` date NOT NULL,
`id` int(11) NULL DEFAULT NULL,
`province_name` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`area_code` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`iso_code` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
`region_id` int(11) NULL DEFAULT NULL,
`region_name` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`login_day_count` bigint(255) NULL DEFAULT NULL,
`order_day_count` bigint(255) NULL DEFAULT NULL,
`order_day_amount` double(255, 2) NULL DEFAULT NULL,
`payment_day_count` bigint(255) NULL DEFAULT NULL,
`payment_day_amount` double(255, 2) NULL DEFAULT NULL,
PRIMARY KEY (`dt`, `iso_code`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;
###创建活跃设备数表
create table `ads_uv_count` (
`dt` date NOT NULL,
`day_count` bigint(255) NULL DEFAULT NULL,
`wk_count` bigint(255) NULL DEFAULT NULL,
`mn_count` bigint(255) NULL DEFAULT NULL,
`is_weekend` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`is_monthend` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
PRIMARY KEY (`dt`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;
###创建漏斗分析表
CREATE TABLE `ads_user_action_convert_day` (
`dt` DATE NOT NULL,
`home_count` BIGINT(255) NULL DEFAULT NULL,
`good_detail_count` BIGINT(255) NULL DEFAULT NULL,
`home2good_detail_convert_ratio` DOUBLE(255, 2) NULL DEFAULT NULL,
`cart_count` BIGINT(255) NULL DEFAULT NULL,
`good_detail2cart_convert_ratio` DOUBLE(255, 2) NULL DEFAULT NULL,
`order_count` BIGINT(255) NULL DEFAULT NULL,
`cart2order_convert_ratio` DOUBLE(255, 2) NULL DEFAULT NULL,
`payment_amount` BIGINT(255) NULL DEFAULT NULL,
`order2payment_convert_ratio` DOUBLE(255, 2) NULL DEFAULT NULL,
PRIMARY KEY (`dt`) USING BTREE
) ENGINE = INNODB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = COMPACT;
###创建每日新增设备表
CREATE TABLE `ads_new_mid_count` (
`create_date` date NOT NULL,
`new_mid_count` bigint(255) NULL DEFAULT NULL,
PRIMARY KEY (`create_date`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;
###创建留存率表
CREATE TABLE `ads_user_retention_day_rate` (
`stat_date` date NOT NULL,
`create_date` date NOT NULL,
`retention_day` int(11) NULL DEFAULT NULL,
`retention_count` bigint(255) NULL DEFAULT NULL,
`new_mid_count` bigint(255) NULL DEFAULT NULL,
`retention_ratio` double(255, 2) NULL DEFAULT NULL,
PRIMARY KEY (`stat_date`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;
###创建沉默用户数表
CREATE TABLE `ads_silent_count` (
`dt` date NOT NULL,
`silent_count` bigint(255) NULL DEFAULT NULL,
PRIMARY KEY (`dt`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;
###创建商品主题表
CREATE TABLE `ads_product_info` (
`dt` date NOT NULL,
`sku_num` bigint(255) NULL DEFAULT NULL,
`spu_num` bigint(255) NULL DEFAULT NULL,
PRIMARY KEY (`dt`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;
###创建商品营销排名表
CREATE TABLE `ads_product_sale_topN` (
`dt` date NOT NULL,
`sku_id` bigint(255) NULL DEFAULT NULL,
`payment_amount` bigint(255) NULL DEFAULT NULL,
PRIMARY KEY (`dt`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;
###创建商品收藏排名表
CREATE TABLE `ads_product_favor_topN` (
`dt` date NOT NULL,
`sku_id` bigint(255) NULL DEFAULT NULL,
`favor_count` bigint(255) NULL DEFAULT NULL,
PRIMARY KEY (`dt`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;
###创建商品加入购物车排名表
CREATE TABLE `ads_product_cart_topN` (
`dt` date NOT NULL,
`sku_id` bigint(255) NULL DEFAULT NULL,
`cart_count` bigint(255) NULL DEFAULT NULL,
PRIMARY KEY (`dt`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;
###创建商品退款率排名表
CREATE TABLE `ads_product_refund_topN` (
`dt` date NOT NULL,
`sku_id` bigint(255) NULL DEFAULT NULL,
`refund_ratio` double(255, 2) NULL DEFAULT NULL,
PRIMARY KEY (`dt`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;
###创建下单数目统计表
CREATE TABLE `ads_order_daycount` (
`dt` date NOT NULL,
`order_count` bigint(255) NULL DEFAULT NULL,
`order_amount` bigint(255) NULL DEFAULT NULL,
`order_users` bigint(255) NULL DEFAULT NULL,
PRIMARY KEY (`dt`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;
###创建支付信息统计表
CREATE TABLE `ads_payment_daycount` (
`dt` date NOT NULL,
`order_count` bigint(255) NULL DEFAULT NULL,
`order_amount` bigint(255) NULL DEFAULT NULL,
`payment_user_count` bigint(255) NULL DEFAULT NULL,
`payment_sku_count` bigint(255) NULL DEFAULT NULL,
`payment_avg_time` double(255, 2) NULL DEFAULT NULL,
PRIMARY KEY (`dt`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;
1.3sqoop导出脚本
1)编写Sqoop导出脚本
在/bin目录下创建脚本hdfs_to_mysql.sh
vim hdfs_to_mysql.sh
在脚本中填写如下内容:
#!/bin/bash
hive_db_name=default
mysql_db_name=gmall_report
export_data() {
/training/sqoop-1.4.6.bin__hadoop-2.0.4-alpha/bin/sqoop export \
-Dmapreduce.job.queuename=default \
--connect "jdbc:mysql://hadoop01:3306/${mysql_db_name}?useUnicode=true&characterEncoding=utf-8" \
--username root \
--password 123456 \
--table $1 \
--num-mappers 1 \
--export-dir /warehouse/$hive_db_name/ads/$1 \
--input-fields-terminated-by "\t" \
--update-mode allowinsert \
--update-key $2 \
--input-null-string '\\N' \
--input-null-non-string '\\N'
}
case $1 in
"ads_uv_count")
export_data "ads_uv_count" "dt"
;;
"ads_user_action_convert_day")
export_data "ads_user_action_convert_day" "dt"
;;
"ads_user_topic")
export_data "ads_user_topic" "dt"
;;
"ads_area_topic")
export_data "ads_area_topic" "dt,iso_code"
;;
"all")
export_data "ads_user_topic" "dt"
export_data "ads_area_topic" "dt,iso_code"
export_data "ads_uv_count" "dt"
export_data "ads_user_action_convert_day" "dt"
export_data "ads_new_mid_count" "dt"
export_data "ads_user_retention_day_rate" "dt"
export_data "ads_silent_count" "dt"
export_data "ads_product_info" "dt"
export_data "ads_product_sale_topN" "dt"
export_data "ads_product_favor_topN" "dt"
export_data "ads_product_cart_topN" "dt"
export_data "ads_product_refund_topN" "dt"
export_data "ads_order_daycount" "dt"
export_data "ads_payment_daycount" "dt"
;;
esac
直接执行hdfs_to_mysql.sh
注:
(1)导出选择update还是insert
--update-mode:updateonly 只更新,无法插入新数据;
--update-mode:allowinsert 允许新增;
(2)--update-key:
允许更新的情况下,指定哪些字段匹配视为同一条数据,进行更新而不增加。多个字段用逗号分隔。
(3)--input-null-string和--input-null-non-string:
分别表示字符串列和非字符串列的空串和“null”转义。
Hive中的Null在底层是以“\N”来存储,而MySQL中的Null在底层就是Null,为了保证数据两端的一致性。在导出数据时采用--input-null-string和--input-null-non-string两个参数。导入数据时采用--null-string和--null-non-string。
查看数据:
导出成功!!!!
作者水平低,如有错误,恳请指正!谢谢!!!!!