电商数仓3.0 DWD层用户行为数据处理

启动日志格式

启动日志表中每行数据对应一个启动记录,一个启动记录应该包含日志中的公共信息和启动信息。

// 一条启动日志JSON格式
{
	"common": {
		"ar": "420000",
		"ba": "Xiaomi",
		"ch": "xiaomi",
		"md": "Xiaomi 9",
		"mid": "mid_896",
		"os": "Android 11.0",
		"uid": "130",
		"vc": "v2.1.111"
	},
	"start": {
		"entry": "notice",
		"loading_time": 4243,
		"open_ad_id": 11,
		"open_ad_ms": 8423,
		"open_ad_skip_ms": 0
	},
	"ts": 1583721930000
}

在这里插入图片描述

创建启动日志表

-- 数据采用parquet存储方式,是可以支持切片的,不需要再对数据创建索引。如果单纯的text方式存储数据,需要采用支持切片的,lzop压缩方式并创建索引。
-- 启动日志表
drop table if exists dwd_start_log;
CREATE EXTERNAL TABLE dwd_start_log(
    `area_code` string COMMENT '地区编码',
    `brand` string COMMENT '手机品牌',
    `channel` string COMMENT '渠道',
    `model` string COMMENT '手机型号',
    `mid_id` string COMMENT '设备id',
    `os` string COMMENT '操作系统',
    `user_id` string COMMENT '会员id',
    `version_code` string COMMENT 'app版本号',
    `entry` string COMMENT ' icon手机图标  notice 通知   install 安装后启动',
    `loading_time` bigint COMMENT '启动加载时间',
    `open_ad_id` string COMMENT '广告页ID ',
    `open_ad_ms` bigint COMMENT '广告总共播放时间',
    `open_ad_skip_ms` bigint COMMENT '用户跳过广告时点',
    `ts` bigint COMMENT '时间'
) COMMENT '启动日志表'
PARTITIONED BY (dt string) -- 按照时间创建分区
stored as parquet -- 采用parquet列式存储
LOCATION '/warehouse/gmall/dwd/dwd_start_log' -- 指定在HDFS上存储位置
TBLPROPERTIES('parquet.compression'='lzo') -- 采用LZO压缩
;

在这里插入图片描述

启动日志表加载数据

-- 启动日志表加载数据
SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
insert overwrite table dwd_start_log partition(dt='2020-06-14')
select
    get_json_object(line,'$.common.ar'),
    get_json_object(line,'$.common.ba'),
    get_json_object(line,'$.common.ch'),
    get_json_object(line,'$.common.md'),
    get_json_object(line,'$.common.mid'),
    get_json_object(line,'$.common.os'),
    get_json_object(line,'$.common.uid'),
    get_json_object(line,'$.common.vc'),
    get_json_object(line,'$.start.entry'),
    get_json_object(line,'$.start.loading_time'),
    get_json_object(line,'$.start.open_ad_id'),
    get_json_object(line,'$.start.open_ad_ms'),
    get_json_object(line,'$.start.open_ad_skip_ms'),
    get_json_object(line,'$.ts')
from ods_log
where dt='2020-06-14'
and get_json_object(line,'$.start') is not null;

在这里插入图片描述

页面日志表格式

页面日志表中每行数据对应一个页面访问记录,一个页面访问记录应该包含日志中的公共信息和页面信息。

{
	"common": {
		"ar": "110000",
		"ba": "iPhone",
		"ch": "Appstore",
		"md": "iPhone X",
		"mid": "mid_601",
		"os": "iOS 12.4.1",
		"uid": "19",
		"vc": "v2.1.132"
	},
	"displays": [{
		"displayType": "promotion",
		"item": "7",
		"item_type": "sku_id",
		"order": 1
	}, {
		"displayType": "query",
		"item": "1",
		"item_type": "sku_id",
		"order": 2
	}, {
		"displayType": "promotion",
		"item": "9",
		"item_type": "sku_id",
		"order": 3
	}, {
		"displayType": "promotion",
		"item": "6",
		"item_type": "sku_id",
		"order": 4
	}, {
		"displayType": "promotion",
		"item": "3",
		"item_type": "sku_id",
		"order": 5
	}, {
		"displayType": "query",
		"item": "3",
		"item_type": "sku_id",
		"order": 6
	}, {
		"displayType": "query",
		"item": "6",
		"item_type": "sku_id",
		"order": 7
	}, {
		"displayType": "query",
		"item": "1",
		"item_type": "sku_id",
		"order": 8
	}, {
		"displayType": "query",
		"item": "6",
		"item_type": "sku_id",
		"order": 9
	}, {
		"displayType": "query",
		"item": "7",
		"item_type": "sku_id",
		"order": 10
	}],
	"page": {
		"during_time": 14283,
		"item": "7",
		"item_type": "sku_id",
		"last_page_id": "good_detail",
		"page_id": "good_spec",
		"sourceType": "promotion"
	},
	"ts": 1583721987346
}

创建页面日志表

-- dwd-用户行为-页面日志表
drop table if exists dwd_page_log;
CREATE EXTERNAL TABLE dwd_page_log(
    `area_code` string COMMENT '地区编码',
    `brand` string COMMENT '手机品牌',
    `channel` string COMMENT '渠道',
    `model` string COMMENT '手机型号',
    `mid_id` string COMMENT '设备id',
    `os` string COMMENT '操作系统',
    `user_id` string COMMENT '会员id',
    `version_code` string COMMENT 'app版本号',
    `during_time` bigint COMMENT '持续时间毫秒',
    `page_item` string COMMENT '目标id ',
    `page_item_type` string COMMENT '目标类型',
    `last_page_id` string COMMENT '上页类型',
    `page_id` string COMMENT '页面ID ',
    `source_type` string COMMENT '来源类型',
    `ts` bigint
) COMMENT '页面日志表'
PARTITIONED BY (dt string)
stored as parquet
LOCATION '/warehouse/gmall/dwd/dwd_page_log'
TBLPROPERTIES('parquet.compression'='lzo');

在这里插入图片描述

页面日志表加载数据

-- dwd-用户行为-页面日志加载数据
SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
insert overwrite table dwd_page_log partition(dt='2020-06-14')
select
    get_json_object(line,'$.common.ar'),
    get_json_object(line,'$.common.ba'),
    get_json_object(line,'$.common.ch'),
    get_json_object(line,'$.common.md'),
    get_json_object(line,'$.common.mid'),
    get_json_object(line,'$.common.os'),
    get_json_object(line,'$.common.uid'),
    get_json_object(line,'$.common.vc'),
    get_json_object(line,'$.page.during_time'),
    get_json_object(line,'$.page.item'),
    get_json_object(line,'$.page.item_type'),
    get_json_object(line,'$.page.last_page_id'),
    get_json_object(line,'$.page.page_id'),
    get_json_object(line,'$.page.sourceType'),
    get_json_object(line,'$.ts')
from ods_log
where dt='2020-06-14'
and get_json_object(line,'$.page') is not null;

在这里插入图片描述

动作日志表格式

动作日志表中每行数据对应用户的一个动作记录,一个动作记录应当包含公共信息、页面信息以及动作信息。

-- 包含actions字段,唯一麻烦的是,actions 字段为有一个json array 需要进行解析
{
	"actions": [{
		"action_id": "favor_add",
		"item": "7",
		"item_type": "sku_id",
		"ts": 1583721974269
	}],
	"common": {
		"ar": "530000",
		"ba": "vivo",
		"ch": "xiaomi",
		"md": "vivo iqoo3",
		"mid": "mid_602",
		"os": "Android 8.1",
		"uid": "273",
		"vc": "v2.1.134"
	},
	"displays": [{
		"displayType": "query",
		"item": "4",
		"item_type": "sku_id",
		"order": 1
	}, {
		"displayType": "query",
		"item": "5",
		"item_type": "sku_id",
		"order": 2
	}, {
		"displayType": "promotion",
		"item": "3",
		"item_type": "sku_id",
		"order": 3
	}, {
		"displayType": "query",
		"item": "7",
		"item_type": "sku_id",
		"order": 4
	}, {
		"displayType": "query",
		"item": "5",
		"item_type": "sku_id",
		"order": 5
	}, {
		"displayType": "promotion",
		"item": "5",
		"item_type": "sku_id",
		"order": 6
	}, {
		"displayType": "query",
		"item": "6",
		"item_type": "sku_id",
		"order": 7
	}],
	"page": {
		"during_time": 6123,
		"item": "7",
		"item_type": "sku_id",
		"last_page_id": "favor",
		"page_id": "good_detail",
		"sourceType": "promotion"
	},
	"ts": 1583721971208
}

创建动作日志表

-- dwd-用户行为-动作日志表
drop table if exists dwd_action_log;
CREATE EXTERNAL TABLE dwd_action_log(
    `area_code` string COMMENT '地区编码',
    `brand` string COMMENT '手机品牌',
    `channel` string COMMENT '渠道',
    `model` string COMMENT '手机型号',
    `mid_id` string COMMENT '设备id',
    `os` string COMMENT '操作系统',
    `user_id` string COMMENT '会员id',
    `version_code` string COMMENT 'app版本号',
    `during_time` bigint COMMENT '持续时间毫秒',
    `page_item` string COMMENT '目标id ',
    `page_item_type` string COMMENT '目标类型',
    `last_page_id` string COMMENT '上页类型',
    `page_id` string COMMENT '页面id ',
    `source_type` string COMMENT '来源类型',
    `action_id` string COMMENT '动作id',
    `item` string COMMENT '目标id ',
    `item_type` string COMMENT '目标类型',
    `ts` bigint COMMENT '时间'
) COMMENT '动作日志表'
PARTITIONED BY (dt string)
stored as parquet
LOCATION '/warehouse/gmall/dwd/dwd_action_log'
TBLPROPERTIES('parquet.compression'='lzo');

在这里插入图片描述

编写UDTF代码

解析思路:将json数组进行遍历输出

完整代码:https://github.com/SmallScorpion/hive-udtf-function.git

// 1. 将jar包上传到/opt/module/hive-3.1.2

// 2. 创建 jar 包存放路径
[scorpion@warehouse102 hive-3.1.2]$ hadoop fs -mkdir -p /user/hive/jars

// 3. 将 jar 包上传
[scorpion@warehouse102 hive-3.1.2]$ hadoop fs -put hive-udtf-function-1.0-SNAPSHOT.jar /user/hive/jars

在这里插入图片描述

// 4. 创建永久函数与开发号的java class关联
create function explode_json_array as 'com.warehouse.hive.udtf.ExplodeJSONArray' using jar 'hdfs://warehouse102:8020/user/hive/jars/hive-udtf-function-1.0-SNAPSHOT.jar';

在这里插入图片描述

加载数据

-- dwd-用户行为-动作日志表加载数据
SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
insert overwrite table dwd_action_log partition(dt='2020-06-14')
select
    get_json_object(line,'$.common.ar'),
    get_json_object(line,'$.common.ba'),
    get_json_object(line,'$.common.ch'),
    get_json_object(line,'$.common.md'),
    get_json_object(line,'$.common.mid'),
    get_json_object(line,'$.common.os'),
    get_json_object(line,'$.common.uid'),
    get_json_object(line,'$.common.vc'),
    get_json_object(line,'$.page.during_time'),
    get_json_object(line,'$.page.item'),
    get_json_object(line,'$.page.item_type'),
    get_json_object(line,'$.page.last_page_id'),
    get_json_object(line,'$.page.page_id'),
    get_json_object(line,'$.page.sourceType'),
    get_json_object(action,'$.action_id'),
    get_json_object(action,'$.item'),
    get_json_object(action,'$.item_type'),
    get_json_object(action,'$.ts')
from ods_log lateral view explode_json_array(get_json_object(line,'$.actions')) tmp as action
where dt='2020-06-14'
and get_json_object(line,'$.actions') is not null;

在这里插入图片描述

曝光日志表格式

曝光日志表中每行数据对应一个曝光记录,一个曝光记录应当包含公共信息、页面信息以及曝光信息。

-- 可以看到 displays 也是 json array 同样需要UDTF解开
{
	"common": {
		"ar": "110000",
		"ba": "Xiaomi",
		"ch": "xiaomi",
		"md": "Xiaomi Mix2 ",
		"mid": "mid_991",
		"os": "Android 11.0",
		"uid": "27",
		"vc": "v2.1.132"
	},
	"displays": [{
		"displayType": "activity",
		"item": "2",
		"item_type": "activity_id",
		"order": 1
	}, {
		"displayType": "recommend",
		"item": "10",
		"item_type": "sku_id",
		"order": 2
	}, {
		"displayType": "promotion",
		"item": "4",
		"item_type": "sku_id",
		"order": 3
	}, {
		"displayType": "promotion",
		"item": "4",
		"item_type": "sku_id",
		"order": 4
	}, {
		"displayType": "query",
		"item": "9",
		"item_type": "sku_id",
		"order": 5
	}],
	"page": {
		"during_time": 8059,
		"page_id": "home"
	},
	"ts": 1583721931902
}

创建曝光日志表

drop table if exists dwd_display_log;
CREATE EXTERNAL TABLE dwd_display_log(
    `area_code` string COMMENT '地区编码',
    `brand` string COMMENT '手机品牌', 
    `channel` string COMMENT '渠道', 
    `model` string COMMENT '手机型号', 
    `mid_id` string COMMENT '设备id', 
    `os` string COMMENT '操作系统', 
    `user_id` string COMMENT '会员id', 
    `version_code` string COMMENT 'app版本号', 
    `during_time` bigint COMMENT 'app版本号',
    `page_item` string COMMENT '目标id ', 
    `page_item_type` string COMMENT '目标类型', 
    `last_page_id` string COMMENT '上页类型', 
    `page_id` string COMMENT '页面ID ',
    `source_type` string COMMENT '来源类型', 
    `ts` bigint COMMENT 'app版本号',
    `display_type` string COMMENT '曝光类型',
    `item` string COMMENT '曝光对象id ',
    `item_type` string COMMENT 'app版本号', 
    `order` bigint COMMENT '出现顺序'
) COMMENT '曝光日志表'
PARTITIONED BY (dt string)
stored as parquet
LOCATION '/warehouse/gmall/dwd/dwd_display_log'
TBLPROPERTIES('parquet.compression'='lzo');

在这里插入图片描述

曝光日志表加载数据

-- dwd-用户行为-曝光日志表加载数据
SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
insert overwrite table dwd_display_log partition(dt='2020-06-14')
select
    get_json_object(line,'$.common.ar'),
    get_json_object(line,'$.common.ba'),
    get_json_object(line,'$.common.ch'),
    get_json_object(line,'$.common.md'),
    get_json_object(line,'$.common.mid'),
    get_json_object(line,'$.common.os'),
    get_json_object(line,'$.common.uid'),
    get_json_object(line,'$.common.vc'),
    get_json_object(line,'$.page.during_time'),
    get_json_object(line,'$.page.item'),
    get_json_object(line,'$.page.item_type'),
    get_json_object(line,'$.page.last_page_id'),
    get_json_object(line,'$.page.page_id'),
    get_json_object(line,'$.page.sourceType'),
    get_json_object(line,'$.ts'),
    get_json_object(display,'$.displayType'),
    get_json_object(display,'$.item'),
    get_json_object(display,'$.item_type'),
    get_json_object(display,'$.order')
from ods_log lateral view explode_json_array(get_json_object(line,'$.displays')) tmp as display
where dt='2020-06-14'
and get_json_object(line,'$.displays') is not null;

在这里插入图片描述

错误日志表格式

错误日志表中每行数据对应一个错误记录,为方便定位错误,一个错误记录应当包含与之对应的公共信息、页面信息、曝光信息、动作信息、启动信息以及错误信息。

{
	"common": {
		"ar": "500000",
		"ba": "iPhone",
		"ch": "Appstore",
		"md": "iPhone Xs",
		"mid": "mid_700",
		"os": "iOS 13.3.1",
		"uid": "287",
		"vc": "v2.0.1"
	},
	"displays": [{
		"displayType": "activity",
		"item": "1",
		"item_type": "activity_id",
		"order": 1
	}, {
		"displayType": "query",
		"item": "3",
		"item_type": "sku_id",
		"order": 2
	}, {
		"displayType": "promotion",
		"item": "5",
		"item_type": "sku_id",
		"order": 3
	}, {
		"displayType": "query",
		"item": "6",
		"item_type": "sku_id",
		"order": 4
	}, {
		"displayType": "promotion",
		"item": "4",
		"item_type": "sku_id",
		"order": 5
	}, {
		"displayType": "promotion",
		"item": "10",
		"item_type": "sku_id",
		"order": 6
	}, {
		"displayType": "query",
		"item": "10",
		"item_type": "sku_id",
		"order": 7
	}, {
		"displayType": "query",
		"item": "4",
		"item_type": "sku_id",
		"order": 8
	}, {
		"displayType": "promotion",
		"item": "10",
		"item_type": "sku_id",
		"order": 9
	}],
	"err": {
		"error_code": 1140,
		"msg": " Exception in thread \\  java.net.SocketTimeoutException\\n \\tat com.atgugu.gmall2020.mock.log.bean.AppError.main(AppError.java:xxxxxx)"
	},
	"page": {
		"during_time": 17530,
		"last_page_id": "activity",
		"page_id": "home"
	},
	"ts": 1583722009590
}

创建错误日志表

-- dwd-用户行为-错误日志表创建
drop table if exists dwd_error_log;
CREATE EXTERNAL TABLE dwd_error_log(
    `area_code` string COMMENT '地区编码',
    `brand` string COMMENT '手机品牌', 
    `channel` string COMMENT '渠道', 
    `model` string COMMENT '手机型号', 
    `mid_id` string COMMENT '设备id', 
    `os` string COMMENT '操作系统', 
    `user_id` string COMMENT '会员id', 
    `version_code` string COMMENT 'app版本号', 
    `page_item` string COMMENT '目标id ', 
    `page_item_type` string COMMENT '目标类型', 
    `last_page_id` string COMMENT '上页类型', 
    `page_id` string COMMENT '页面ID ',
    `source_type` string COMMENT '来源类型', 
    `entry` string COMMENT ' icon手机图标  notice 通知 install 安装后启动',
    `loading_time` string COMMENT '启动加载时间',
    `open_ad_id` string COMMENT '广告页ID ',
    `open_ad_ms` string COMMENT '广告总共播放时间', 
    `open_ad_skip_ms` string COMMENT '用户跳过广告时点',
    `actions` string COMMENT '动作',
    `displays` string COMMENT '曝光',
    `ts` string COMMENT '时间',
    `error_code` string COMMENT '错误码',
    `msg` string COMMENT '错误信息'
) COMMENT '错误日志表'
PARTITIONED BY (dt string)
stored as parquet
LOCATION '/warehouse/gmall/dwd/dwd_error_log'
TBLPROPERTIES('parquet.compression'='lzo');

在这里插入图片描述

加载数据

-- dwd-用户行为-错误日志表加载数据
SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
insert overwrite table dwd_error_log partition(dt='2020-06-14')
select
    get_json_object(line,'$.common.ar'),
    get_json_object(line,'$.common.ba'),
    get_json_object(line,'$.common.ch'),
    get_json_object(line,'$.common.md'),
    get_json_object(line,'$.common.mid'),
    get_json_object(line,'$.common.os'),
    get_json_object(line,'$.common.uid'),
    get_json_object(line,'$.common.vc'),
    get_json_object(line,'$.page.item'),
    get_json_object(line,'$.page.item_type'),
    get_json_object(line,'$.page.last_page_id'),
    get_json_object(line,'$.page.page_id'),
    get_json_object(line,'$.page.sourceType'),
    get_json_object(line,'$.start.entry'),
    get_json_object(line,'$.start.loading_time'),
    get_json_object(line,'$.start.open_ad_id'),
    get_json_object(line,'$.start.open_ad_ms'),
    get_json_object(line,'$.start.open_ad_skip_ms'),
    get_json_object(line,'$.actions'),
    get_json_object(line,'$.displays'),
    get_json_object(line,'$.ts'),
    get_json_object(line,'$.err.error_code'),
    get_json_object(line,'$.err.msg')
from ods_log
where dt='2020-06-14'
and get_json_object(line,'$.err') is not null;

在这里插入图片描述

DWD层用户行为数据加载脚本

[scorpion@warehouse102 bin]$ vim dwd_ods_to_dwd_log.sh
#!/bin/bash
# dwd层 从ods到dwd数据导入-用户行为数据 > .sh 2020-06-15
hive=/opt/module/hive-3.1.2/bin/hive
APP=gmall
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$1" ] ;then
    do_date=$1
else 
    do_date=`date -d "-1 day" +%F`
fi

sql="
SET mapreduce.job.queuename=hive;
SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
insert overwrite table ${APP}.dwd_start_log partition(dt='$do_date')
select 
    get_json_object(line,'$.common.ar'),
    get_json_object(line,'$.common.ba'),
    get_json_object(line,'$.common.ch'),
    get_json_object(line,'$.common.md'),
    get_json_object(line,'$.common.mid'),
    get_json_object(line,'$.common.os'),
    get_json_object(line,'$.common.uid'),
    get_json_object(line,'$.common.vc'),
    get_json_object(line,'$.start.entry'),
    get_json_object(line,'$.start.loading_time'),
    get_json_object(line,'$.start.open_ad_id'),
    get_json_object(line,'$.start.open_ad_ms'),
    get_json_object(line,'$.start.open_ad_skip_ms'),
    get_json_object(line,'$.ts')
from ${APP}.ods_log
where dt='$do_date'
and get_json_object(line,'$.start') is not null;


insert overwrite table ${APP}.dwd_action_log partition(dt='$do_date')
select
    get_json_object(line,'$.common.ar'),
    get_json_object(line,'$.common.ba'),
    get_json_object(line,'$.common.ch'),
    get_json_object(line,'$.common.md'),
    get_json_object(line,'$.common.mid'),
    get_json_object(line,'$.common.os'),
    get_json_object(line,'$.common.uid'),
    get_json_object(line,'$.common.vc'),
    get_json_object(line,'$.page.during_time'),
    get_json_object(line,'$.page.item'),
    get_json_object(line,'$.page.item_type'),
    get_json_object(line,'$.page.last_page_id'),
    get_json_object(line,'$.page.page_id'),
    get_json_object(line,'$.page.sourceType'),
    get_json_object(action,'$.action_id'),
    get_json_object(action,'$.item'),
    get_json_object(action,'$.item_type'),
    get_json_object(action,'$.ts')
from ${APP}.ods_log lateral view ${APP}.explode_json_array(get_json_object(line,'$.actions')) tmp as action
where dt='$do_date'
and get_json_object(line,'$.actions') is not null;


insert overwrite table ${APP}.dwd_display_log partition(dt='$do_date')
select
    get_json_object(line,'$.common.ar'),
    get_json_object(line,'$.common.ba'),
    get_json_object(line,'$.common.ch'),
    get_json_object(line,'$.common.md'),
    get_json_object(line,'$.common.mid'),
    get_json_object(line,'$.common.os'),
    get_json_object(line,'$.common.uid'),
    get_json_object(line,'$.common.vc'),
    get_json_object(line,'$.page.during_time'),
    get_json_object(line,'$.page.item'),
    get_json_object(line,'$.page.item_type'),
    get_json_object(line,'$.page.last_page_id'),
    get_json_object(line,'$.page.page_id'),
    get_json_object(line,'$.page.sourceType'),
    get_json_object(line,'$.ts'),
    get_json_object(display,'$.displayType'),
    get_json_object(display,'$.item'),
    get_json_object(display,'$.item_type'),
    get_json_object(display,'$.order')
from ${APP}.ods_log lateral view ${APP}.explode_json_array(get_json_object(line,'$.displays')) tmp as display
where dt='$do_date'
and get_json_object(line,'$.displays') is not null;

insert overwrite table ${APP}.dwd_page_log partition(dt='$do_date')
select
    get_json_object(line,'$.common.ar'),
    get_json_object(line,'$.common.ba'),
    get_json_object(line,'$.common.ch'),
    get_json_object(line,'$.common.md'),
    get_json_object(line,'$.common.mid'),
    get_json_object(line,'$.common.os'),
    get_json_object(line,'$.common.uid'),
    get_json_object(line,'$.common.vc'),
    get_json_object(line,'$.page.during_time'),
    get_json_object(line,'$.page.item'),
    get_json_object(line,'$.page.item_type'),
    get_json_object(line,'$.page.last_page_id'),
    get_json_object(line,'$.page.page_id'),
    get_json_object(line,'$.page.sourceType'),
    get_json_object(line,'$.ts')
from ${APP}.ods_log
where dt='$do_date'
and get_json_object(line,'$.page') is not null;


insert overwrite table ${APP}.dwd_error_log partition(dt='$do_date')
select
    get_json_object(line,'$.common.ar'),
    get_json_object(line,'$.common.ba'),
    get_json_object(line,'$.common.ch'),
    get_json_object(line,'$.common.md'),
    get_json_object(line,'$.common.mid'),
    get_json_object(line,'$.common.os'),
    get_json_object(line,'$.common.uid'),
    get_json_object(line,'$.common.vc'),
    get_json_object(line,'$.page.item'),
    get_json_object(line,'$.page.item_type'),
    get_json_object(line,'$.page.last_page_id'),
    get_json_object(line,'$.page.page_id'),
    get_json_object(line,'$.page.sourceType'),
    get_json_object(line,'$.start.entry'),
    get_json_object(line,'$.start.loading_time'),
    get_json_object(line,'$.start.open_ad_id'),
    get_json_object(line,'$.start.open_ad_ms'),
    get_json_object(line,'$.start.open_ad_skip_ms'),
    get_json_object(line,'$.actions'),
    get_json_object(line,'$.displays'),
    get_json_object(line,'$.ts'),
    get_json_object(line,'$.err.error_code'),
    get_json_object(line,'$.err.msg')
from ${APP}.ods_log 
where dt='$do_date'
and get_json_object(line,'$.err') is not null;
"

$hive -e "$sql"

// 增加权限
[scorpion@warehouse102 bin]$ chmod 777 dwd_ods_to_dwd_log.sh

在这里插入图片描述

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值