数据仓库项目——业务数据生成

使用MySQL链接工具链接MySQL(我使用的是Navicat)
  1. 创建一个名为gmall的数据库
  2. 导入SQL脚本生成对应的表以及数据

文件已分享,可自行下载
链接: https://pan.baidu.com/s/1rrbM4FGChktr3wQxm8QN7g
提取码: 5x8e

生成业务数据
  1. 在/opt/module/创建一个db_log文件夹
mkdir db_log/
  1. 上传gmall-mock-db-2020-03-16-SNAPSHOT.jar 和 application.properties

文件已分享,可自行下载
链接:https://pan.baidu.com/s/1mc-6mICHuYaLIWeRRJr9mg
提取码:7v7w

将业务数据导入到HDFS中
  1. 在/bin目录下编写脚本文件
vim mysql_to_hdfs.sh

脚本代码如下

#! /bin/bash

sqoop=/opt/module/sqoop/bin/sqoop
do_date=`date -d '-1 day' +%F`

if [[ -n "$2" ]]; then
	do_date=$2
fi

import_data(){
$sqoop import \
--connect jdbc:mysql://hadoop102:3306/gmall \
--username root \
--password 000000 \
--target-dir /origin_data/gmall/db/$1/$do_date \
--delete-target-dir \
--query "$2 and \$CONDITIONS" \
--num-mappers 1 \
--fields-terminated-by '\t' \
--compress \
--compression-codec lzop \
--null-string '\\N' \
--null-non-string '\\N'
hadoop jar /opt/module/hadoop-2.7.2/share/hadoop/common/hadoop-lzo-0.4.20.jar com.hadoop.compression.lzo.DistributedLzoIndexer /origin_data/gmall/db/$1/$do_date
}

import_order_info(){
	import_data order_info "select
				id,
				final_total_amount,
				order_status,
				user_id,
				out_trade_no,
				create_time,
				operate_time,
				province_id,
				benefit_reduce_amount,
				original_total_amount,
				feight_fee
				from order_info
				where (date_format(create_time,'%Y-%m-%d')='$do_date'
				or date_format(operate_time,'%Y-%m-%d')='$do_date')"
}
	
import_coupon_use(){
	import_data coupon_use "select
				id,
				coupon_id,
				user_id,
				order_id,
				coupon_status,
				get_time,
				using_time,
				used_time
				from coupon_use
				where (date_format(get_time,'%Y-%m-%d')='$do_date'
				or date_format(using_time,'%Y-%m-%d')='$do_date'
				or date_format(used_time,'%Y-%m-%d')='$do_date')"
}

import_order_status_log(){
	import_data order_status_log "select
				id,
				order_id,
				order_status,
				operate_time
				from order_status_log
				where date_format(operate_time,'%Y-%m-%d')='$do_date'"
}

import_activity_order(){
	import_data activity_order "select
				id,
				activity_id,
				order_id,
				create_time
				from activity_order
				where date_format(create_time,'%Y-%m-%d')='$do_date'"
}

import_user_info(){
	import_data "user_info" "select
				id,
				name,
				birthday,
				gender,
				email,
				user_level,
				create_time,
				operate_time
				from user_info
				where (DATE_FORMAT(create_time,'%Y-%m-%d')='$do_date'
				or DATE_FORMAT(operate_time,'%Y-%m-%d')='$do_date')"
}

import_order_detail(){
	import_data order_detail "select
				od.id,
				order_id,
				user_id,
				sku_id,
				sku_name,
				order_price,
				sku_num,
				od.create_time
				from order_detail od join order_info oi
				on od.order_id=oi.id
				where DATE_FORMAT(od.create_time,'%Y-%m-%d')='$do_date'"
}

import_payment_info(){
	import_data "payment_info" "select
				id,
				out_trade_no,
				order_id,
				user_id,
				alipay_trade_no,
				total_amount,
				subject,
				payment_type,
				payment_time
				from payment_info
				where DATE_FORMAT(payment_time,'%Y-%m-%d')='$do_date'"
}

import_comment_info(){
	import_data comment_info "select
				id,
				user_id,
				sku_id,
				spu_id,
				order_id,
				appraise,
				comment_txt,
				create_time
				from comment_info
				where date_format(create_time,'%Y-%m-%d')='$do_date'"
}

import_order_refund_info(){
	import_data order_refund_info "select
				id,
				user_id,
				order_id,
				sku_id,
				refund_type,
				refund_num,
				refund_amount,
				refund_reason_type,
				create_time
				from order_refund_info
				where date_format(create_time,'%Y-%m-%d')='$do_date'"
}

import_sku_info(){
	import_data sku_info "select
				id,
				spu_id,
				price,
				sku_name,
				sku_desc,
				weight,
				tm_id,
				category3_id,
				create_time
				from sku_info where 1=1"
}

import_base_category1(){
				import_data "base_category1" "select
				id,
				name
				from base_category1
				where 1=1"
}

import_base_category2(){
	import_data "base_category2" "select
				id,
				name,
				category1_id
				from base_category2
				where 1=1"
}

import_base_category3(){
	import_data "base_category3" "select
				id,
				name,
				category2_id
				from base_category3
				where 1=1"
}

import_base_province(){
	import_data base_province "select
				id,
				name,
				region_id,
				area_code,
				iso_code
				from base_province
				where 1=1"
}

import_base_region(){
	import_data base_region "select
				id,
				region_name
				from base_region
				where 1=1"
}

import_base_trademark(){
	import_data base_trademark "select
				tm_id,
				tm_name
				from base_trademark
				where 1=1"
}

import_spu_info(){
	import_data spu_info "select
				id,
				spu_name,
				category3_id,
				tm_id
				from spu_info
				where 1=1"
}

import_favor_info(){
	import_data favor_info "select
				id,
				user_id,
				sku_id,
				spu_id,
				is_cancel,
				create_time,
				cancel_time
				from favor_info
				where 1=1"
}

import_cart_info(){
	import_data cart_info "select
				id,
				user_id,
				sku_id,
				cart_price,
				sku_num,
				sku_name,
				create_time,
				operate_time,
				is_ordered,
				order_time
				from cart_info
				where 1=1"
}

import_coupon_info(){
	import_data coupon_info "select
				id,
				coupon_name,
				coupon_type,
				condition_amount,
				condition_num,
				activity_id,
				benefit_amount,
				benefit_discount,
				create_time,
				range_type,
				spu_id,tm_id,
				category3_id,
				limit_num,
				operate_time,
				expire_time
				from coupon_info
				where 1=1"
}

import_activity_info(){
	import_data activity_info "select
				id,
				activity_name,
				activity_type,
				start_time,
				end_time,
				create_time
				from activity_info
				where 1=1"
}

import_activity_rule(){
	import_data activity_rule "select
				id,
				activity_id,
				condition_amount,
				condition_num,
				benefit_amount,
				benefit_discount,
				benefit_level
				from activity_rule
				where 1=1"
}

import_base_dic(){
	import_data base_dic "select
				dic_code,
				dic_name,
				parent_code,
				create_time,
				operate_time
				from base_dic
				where 1=1"
}

case $1 in
	"order_info")
		import_order_info
;;
	"base_category1")
		import_base_category1
;;
	"base_category2")
		import_base_category2
;;
	"base_category3")
		import_base_category3
;;
	"order_detail")
		import_order_detail
;;
	"sku_info")
		import_sku_info
;;
	"user_info")
		import_user_info
;;
	"payment_info")
		import_payment_info
;;
	"base_province")
		import_base_province
;;
	"base_region")
		import_base_region
;;
	"base_trademark")
		import_base_trademark
;;
	"activity_info")
		import_activity_info
;;
	"activity_order")
		import_activity_order
;;
	"cart_info")
		import_cart_info
;;
	"comment_info")
		import_comment_info
;;
	"coupon_info")
		import_coupon_info
;;
	"coupon_use")
		import_coupon_use
;;
	"favor_info")
		import_favor_info
;;
	"order_refund_info")
		import_order_refund_info
;;
	"order_status_log")
		import_order_status_log
;;
	"spu_info")
		import_spu_info
;;
	"activity_rule")
		import_activity_rule
;;
	"base_dic")
		import_base_dic
;;
	
"first")
		import_base_category1
		import_base_category2
		import_base_category3
		import_order_info
		import_order_detail
		import_sku_info
		import_user_info
		import_payment_info
		import_base_provinceimport_base_region
		import_base_trademark
		import_activity_info
		import_activity_order
		import_cart_info
		import_comment_info
		import_coupon_use
		import_coupon_info
		import_favor_info
		import_order_refund_info
		import_order_status_log
		import_spu_info
		import_activity_rule
		import_base_dic
;;

"all")
		import_base_category1
		import_base_category2
		import_base_category3
		import_order_info
		import_order_detail
		import_sku_info import_user_info
		import_payment_info
		import_base_trademark
		import_activity_info
		import_activity_order
		import_cart_info
		import_comment_info
		import_coupon_use
		import_coupon_info
		import_favor_info
		import_order_refund_info
		import_order_status_log
		import_spu_info
		import_activity_rule
		import_base_dic
;;
esac
  1. 数据迁移
//初次导入
mysql_to_hdfs.sh first 2020-03-10
//每日导入
mysql_to_hdfs.sh all 2020-03-11
  • 2
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值