sqoop导出数据到MySQL

本文承接我的《大数据项目离线数仓》把ADS层的数据导出到MySQL,用于可视化的制作。

《大数据项目离线数仓》的其它文章在我的专栏《项目》中。

目录

1.1创建MySQL数据库

1.2创建MySQL表

1.3sqoop导出脚本


1.1创建MySQL数据库

创建gmall_report数据库

CREATE DATABASE `gmall_report` CHARACTER SET 'utf8' COLLATE 'utf8_general_ci';

1.2创建MySQL表

创建表(mysql表和ADS中的表同步,但不需要全部创建,根据自己的可视化内容创建)

下面给出我的一些mysql表:

###创建用户主题表

CREATE TABLE `ads_user_topic`  (
  `dt` date NOT NULL,
  `day_users` bigint(255) NULL DEFAULT NULL,
  `day_new_users` bigint(255) NULL DEFAULT NULL,
  `day_new_payment_users` bigint(255) NULL DEFAULT NULL,
  `payment_users` bigint(255) NULL DEFAULT NULL,
  `users` bigint(255) NULL DEFAULT NULL,
  `day_users2users` double(255, 2) NULL DEFAULT NULL,
  `payment_users2users` double(255, 2) NULL DEFAULT NULL,
  `day_new_users2users` double(255, 2) NULL DEFAULT NULL,
  PRIMARY KEY (`dt`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;

###创建地区主题表

CREATE TABLE `ads_area_topic`  (
  `dt` date NOT NULL,
  `id` int(11) NULL DEFAULT NULL,
  `province_name` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
  `area_code` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
  `iso_code` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
  `region_id` int(11) NULL DEFAULT NULL,
  `region_name` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
  `login_day_count` bigint(255) NULL DEFAULT NULL,
  `order_day_count` bigint(255) NULL DEFAULT NULL,
  `order_day_amount` double(255, 2) NULL DEFAULT NULL,
  `payment_day_count` bigint(255) NULL DEFAULT NULL,
  `payment_day_amount` double(255, 2) NULL DEFAULT NULL,
  PRIMARY KEY (`dt`, `iso_code`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;

###创建活跃设备数表

create table `ads_uv_count`  (
    `dt` date NOT NULL,
    `day_count` bigint(255) NULL DEFAULT NULL,
    `wk_count` bigint(255) NULL DEFAULT NULL,
    `mn_count` bigint(255) NULL DEFAULT NULL,
    `is_weekend` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
    `is_monthend` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
    PRIMARY KEY (`dt`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;

###创建漏斗分析表

CREATE TABLE `ads_user_action_convert_day`  (
    `dt` DATE  NOT NULL,
    `home_count` BIGINT(255) NULL DEFAULT NULL,
    `good_detail_count` BIGINT(255) NULL DEFAULT NULL,
    `home2good_detail_convert_ratio` DOUBLE(255, 2) NULL DEFAULT NULL,
    `cart_count` BIGINT(255) NULL DEFAULT NULL,
    `good_detail2cart_convert_ratio` DOUBLE(255, 2) NULL DEFAULT NULL,
    `order_count` BIGINT(255) NULL DEFAULT NULL,
    `cart2order_convert_ratio` DOUBLE(255, 2) NULL DEFAULT NULL,
    `payment_amount` BIGINT(255) NULL DEFAULT NULL,
    `order2payment_convert_ratio` DOUBLE(255, 2) NULL DEFAULT NULL,
    PRIMARY KEY (`dt`) USING BTREE
) ENGINE = INNODB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = COMPACT;

###创建每日新增设备表

CREATE TABLE `ads_new_mid_count`  (
    `create_date` date NOT NULL,
    `new_mid_count` bigint(255) NULL DEFAULT NULL,
    PRIMARY KEY (`create_date`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;

###创建留存率表

CREATE TABLE `ads_user_retention_day_rate`  (
    `stat_date` date NOT NULL,
    `create_date` date NOT NULL,
    `retention_day` int(11) NULL DEFAULT NULL,
    `retention_count` bigint(255) NULL DEFAULT NULL,
    `new_mid_count` bigint(255) NULL DEFAULT NULL,
    `retention_ratio` double(255, 2) NULL DEFAULT NULL,
    PRIMARY KEY (`stat_date`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;

###创建沉默用户数表

CREATE TABLE `ads_silent_count`  (
    `dt` date NOT NULL,
    `silent_count` bigint(255) NULL DEFAULT NULL,
    PRIMARY KEY (`dt`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;

###创建商品主题表

CREATE TABLE `ads_product_info`  (
    `dt` date NOT NULL,
    `sku_num` bigint(255) NULL DEFAULT NULL,
    `spu_num` bigint(255) NULL DEFAULT NULL,
    PRIMARY KEY (`dt`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;

###创建商品营销排名表

CREATE TABLE `ads_product_sale_topN`  (
    `dt` date NOT NULL,
    `sku_id` bigint(255) NULL DEFAULT NULL,
    `payment_amount` bigint(255) NULL DEFAULT NULL,
    PRIMARY KEY (`dt`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;


###创建商品收藏排名表

CREATE TABLE `ads_product_favor_topN`  (
    `dt` date NOT NULL,
    `sku_id` bigint(255) NULL DEFAULT NULL,
    `favor_count` bigint(255) NULL DEFAULT NULL,
    PRIMARY KEY (`dt`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;

###创建商品加入购物车排名表

CREATE TABLE `ads_product_cart_topN`  (
    `dt` date NOT NULL,
    `sku_id` bigint(255) NULL DEFAULT NULL,
    `cart_count` bigint(255) NULL DEFAULT NULL,
     PRIMARY KEY (`dt`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;

###创建商品退款率排名表

CREATE TABLE `ads_product_refund_topN`  (
    `dt` date NOT NULL,
    `sku_id` bigint(255) NULL DEFAULT NULL,
    `refund_ratio` double(255, 2) NULL DEFAULT NULL,
    PRIMARY KEY (`dt`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;

###创建下单数目统计表

CREATE TABLE `ads_order_daycount`  (
    `dt` date NOT NULL,
    `order_count` bigint(255) NULL DEFAULT NULL,
    `order_amount` bigint(255) NULL DEFAULT NULL,
    `order_users` bigint(255) NULL DEFAULT NULL,
    PRIMARY KEY (`dt`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;

###创建支付信息统计表

CREATE TABLE `ads_payment_daycount`  (
    `dt` date NOT NULL,
    `order_count` bigint(255) NULL DEFAULT NULL,
    `order_amount` bigint(255) NULL DEFAULT NULL,
    `payment_user_count` bigint(255) NULL DEFAULT NULL,
    `payment_sku_count` bigint(255) NULL DEFAULT NULL,
    `payment_avg_time` double(255, 2) NULL DEFAULT NULL,
    PRIMARY KEY (`dt`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;

1.3sqoop导出脚本

1)编写Sqoop导出脚本

在/bin目录下创建脚本hdfs_to_mysql.sh

vim hdfs_to_mysql.sh

在脚本中填写如下内容:

#!/bin/bash

hive_db_name=default
mysql_db_name=gmall_report

export_data() {
/training/sqoop-1.4.6.bin__hadoop-2.0.4-alpha/bin/sqoop export \
-Dmapreduce.job.queuename=default \
--connect "jdbc:mysql://hadoop01:3306/${mysql_db_name}?useUnicode=true&characterEncoding=utf-8"  \
--username root \
--password 123456 \
--table $1 \
--num-mappers 1 \
--export-dir /warehouse/$hive_db_name/ads/$1 \
--input-fields-terminated-by "\t" \
--update-mode allowinsert \
--update-key $2 \
--input-null-string '\\N'    \
--input-null-non-string '\\N'
}

case $1 in
  "ads_uv_count")
     export_data "ads_uv_count" "dt"
;;
  "ads_user_action_convert_day") 
     export_data "ads_user_action_convert_day" "dt"
;;
  "ads_user_topic")
     export_data "ads_user_topic" "dt"
;;
  "ads_area_topic")
     export_data "ads_area_topic" "dt,iso_code"
;;
   "all")
     export_data "ads_user_topic" "dt"
     export_data "ads_area_topic" "dt,iso_code"
     export_data "ads_uv_count" "dt"
     export_data "ads_user_action_convert_day" "dt"
     export_data "ads_new_mid_count" "dt"
     export_data "ads_user_retention_day_rate" "dt"
     export_data "ads_silent_count" "dt"
     export_data "ads_product_info" "dt"
     export_data "ads_product_sale_topN" "dt"
     export_data "ads_product_favor_topN" "dt"
     export_data "ads_product_cart_topN" "dt"
     export_data "ads_product_refund_topN" "dt"
     export_data "ads_order_daycount" "dt"
     export_data "ads_payment_daycount" "dt"

;;
esac

直接执行hdfs_to_mysql.sh

注:

(1)导出选择update还是insert

--update-mode:updateonly 只更新,无法插入新数据;

--update-mode:allowinsert 允许新增;

(2)--update-key:

允许更新的情况下,指定哪些字段匹配视为同一条数据,进行更新而不增加。多个字段用逗号分隔。

(3)--input-null-string和--input-null-non-string:

分别表示字符串列和非字符串列的空串和“null”转义。

Hive中的Null在底层是以“\N”来存储,而MySQL中的Null在底层就是Null,为了保证数据两端的一致性。在导出数据时采用--input-null-string和--input-null-non-string两个参数。导入数据时采用--null-string和--null-non-string。

查看数据:

导出成功!!!! 

作者水平低,如有错误,恳请指正!谢谢!!!!!

  • 6
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值