CC00036.pbpositions——|Hadoop&PB级数仓.V03|——|PB数仓.v03|广告分析|DWD层数据处理|

本文介绍了Hadoop环境下的PB级数仓建设,重点聚焦在DWD层的建表与数据加载过程。通过事件json串解析程序,详细阐述了如何获取并处理广告点击事件,以实现全面的广告分析。
摘要由CSDN通过智能技术生成
一、DWD层建表和数据加载
### --- DWD层建表和数据加载

~~~     ODS:分区;事件的主要信息在json串中(json数组),公共信息在另外一个json串中;
~~~     ODS => 解析json,从json串中,提取jsonArray数据;将公共信息从json串中解析出来 =>
### --- 所有事件的明细所有事件的明细,包括:

~~~     分区
~~~     事件(json串)
~~~     公共信息字段
~~~     # 所有事件的明细 => 广告json串解析 => 广告事件的明细广告事件的明细:

~~~     分区
~~~     广告信息字段
~~~     公共信息字段
### --- 数据加载流程

~~~     日志 => Flume => ODS => 清洗、转换 => 广告事件详细信息
二、DWD层建表
### --- DWD层建表:语法
~~~     # 语法:
~~~     所有事件明细

drop table if exists dwd.dwd_event_log;

CREATE EXTERNAL TABLE dwd.dwd_event_log(
`device_id` string,
`uid` string,
`app_v` string,
    `os_type` string,
`event_type` string,
`language` string,
`channel` string,
`area` string,
`brand` string,
`name` string,
`event_json` string,
`report_time` string)
PARTITIONED BY (`dt` string)
stored as parquet;
~~~     # 与广告点击明细

drop table if exists dwd.dwd_ad;

CREATE TABLE dwd.dwd_ad(
`device_id` string,
`uid` string,
`app_v` string,
`os_type` string,
`event_type` string,
    `language` string,
`channel` string,
`area` string,
`brand` string,
`report_time` string,
`duration` int,
`ad_action` int,
`shop_id` int,
`ad_type` int,
`show_style` smallint,
`product_id` int,
`place` string,
`sort` int,
`hour` string
)
PARTITIONED BY (`dt` string)
stored as parquet;
### --- DWD层建表:实例操作
~~~     # 所有事件明细

hive (default)> drop table if exists dwd.dwd_event_log;

hive (default)> 
              > CREATE EXTERNAL TABLE dwd.dwd_event_log(
              > `device_id` string,
              > `uid` string,
              > `app_v` string,
              >     `os_type` string,
              > `event_type` string,
              > `language` string,
              > `channel` string,
              > `area` string,
              > `brand` string,
              > `name` string,
              > `event_json` string,
              > `report_time` string)
              > PARTITIONED BY (`dt` string)
              > stored as parquet;
~~~     # 广告点击明细

hive (default)> drop table if exists dwd.dwd_ad;

hive (default)> 
              > CREATE TABLE dwd.dwd_ad(
              > `device_id` string,
              > `uid` string,
              > `app_v` string,
              > `os_type` string,
              > `event_type` string,
              >     `language` string,
              > `channel` string,
              > `area` string,
              > `brand` string,
              > `report_time` string,
              > `duration` int,
              > `ad_action` int,
              > `shop_id` int,
              > `ad_type` int,
              > `show_style` smallint,
              > `product_id` int,
              > `place` string,
              > `sort` int,
              > `hour` string
              > )
              > PARTITIONED BY (`dt` string)
              > stored as parquet;
三、事件json串解析程序开发
### --- 事件json串解析

~~~     内建函数、UDF、SerDe(json是所有的信息)
~~~     详细内容参见 第三部分 电商分析之--会员活跃度 => 
~~~     第4节 json数据处理 => 使用UDF(处理jsonArray)
### --- 创建json解析程序包:PareJsonArrayWithKey

package cn.yanqi.dw.hive.udf;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONException;
import com.alibaba.fastjson.JSONObject;
import com.google.common.base.Strings;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.junit.Test;
import java.util.ArrayList;

public class PareJsonArrayWithKey extends UDF{
    public ArrayList<String> evaluate(String jsonStr) {
        //  广告分析;DWD层数据加载NO.H.00036
        // 传入空字符串,返回null
        if (Strings.isNullOrEmpty(jsonStr)){
            return null;
        }
        try{
            // 获取jsonArray
            JSONArray jsonArray = JSON.parseArray(jsonStr);
            ArrayList<String> lst = new ArrayList<>();
            for(Object o: jsonArray) {
                lst.add(o.toString());
            }
            return lst;
        }catch (JSONException e){
            return null;
        }
    }
    @Test
    public void JunitParseJsonArray() {
        String jsonStr = " [{\"name\":\"goods_detail_loading\",\"json\": {\"entry\":\"1\",\"goodsid\":\"0\",\"loading_time\":\"93\",\"a ction\":\"3\",\"staytime\":\"56\",\"showtype\":\"2\"},\"time\":1596343881690},{\"name\":\"loading\",\"json\": {\"loading_time\":\"15\",\"action\":\"3\",\"loading_type\":\"3\",\"type\":\"1\"},\"time\":1596356988428}, {\"name\":\"notification\",\"json\": {\"action\":\"1\",\"type\":\"2\"},\"time\":1596374167278}, {\"name\":\"favorites\",\"json\": {\"course_id\":1,\"id\":0,\"userid\":0},\"time\":1596350933962 }]";
        ArrayList<String> result = evaluate(jsonStr);
        System.out.println(result.size());
        System.out.println(JSON.toJSONString(result));
    }
}
### --- 编译打印

D:\JAVA\jdk1.8.0_231\bin\java.exe -ea -Didea.test.cyclic.buffer.size=1048576 "-javaagent:D:\IntelliJIDEA\IntelliJ IDEA 2019.3.3\lib\idea_rt.jar=61891:D:\IntelliJIDEA\IntelliJ IDEA 2019.3.3\bin" -Dfile.encoding=UTF-8 -classpath "D:\IntelliJIDEA\IntelliJ IDEA 2019.3.3\lib\idea_rt.jar;D:\IntelliJIDEA\IntelliJ IDEA 2019.3.3\plugins\junit\lib\junit5-rt.jar;D:\IntelliJIDEA\IntelliJ IDEA 2019.3.3\plugins\junit\lib\junit-rt.jar;D:\JAVA\jdk1.8.0_231\jre\lib\charsets.jar;D:\JAVA\jdk1.8.0_231\jre\lib\deploy.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\access-bridge-64.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\cldrdata.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\dnsns.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\jaccess.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\jfxrt.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\localedata.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\nashorn.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\sunec.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\sunjce_provider.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\sunmscapi.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\sunpkcs11.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\zipfs.jar;D:\JAVA\jdk1.8.0_231\jre\lib\javaws.jar;D:\JAVA\jdk1.8.0_231\jre\lib\jce.jar;D:\JAVA\jdk1.8.0_231\jre\lib\jfr.jar;D:\JAVA\jdk1.8.0_231\jre\lib\jfxswt.jar;D:\JAVA\jdk1.8.0_231\jre\lib\jsse.jar;D:\JAVA\jdk1.8.0_231\jre\lib\management-agent.jar;D:\JAVA\jdk1.8.0_231\jre\lib\plugin.jar;D:\JAVA\jdk1.8.0_231\jre\lib\resources.jar;D:\JAVA\jdk1.8.0_231\jre\lib\rt.jar;E:\NO.Z.10000——javaproject\NO.Z.00002.Hadoop\cn.yanqi.dw\target\classes;C:\Users\Administrator\.m2\repository\org\apache\flume\flume-ng-core\1.9.0\flume-ng-core-1.9.0.jar;C:\Users\Administrator\.m2\repository\org\apache\flume\flume-ng-sdk\1.9.0\flume-ng-sdk-1.9.0.jar;C:\Users\Administrator\.m2\repository\org\apache\flume\flume-ng-configuration\1.9.0\flume-ng-configuration-1.9.0.jar;C:\Users\Administrator\.m2\repository\org\apache\flume\flume-ng-configfilters\flume-ng-config-filter-api\1.9.0\flume-ng-config-filter-api-1.9.0.jar;C:\Users\Administrator\.m2\repository\org\apache\flume\flume-ng-auth\1.9.0\flume-ng-auth-1.9.0.jar;C:\Users\Administrator\.m2\repository\org\slf4j\slf4j-api\1.7.25\slf4j-api-1.7.25.jar;C:\Users\Administrator\.m2\repository\com\google\guava\guava\11.0.2\guava-11.0.2.jar;C:\Users\Administrator\.m2\repository\com\google\code\findbugs\jsr305\1.3.9\jsr305-1.3.9.jar;C:\Users\Administrator\.m2\repository\commons-io\commons-io\2.1\commons-io-2.1.jar;C:\Users\Administrator\.m2\repository\commons-codec\commons-codec\1.8\commons-codec-1.8.jar;C:\Users\Administrator\.m2\repository\commons-cli\commons-cli\1.2\commons-cli-1.2.jar;C:\Users\Administrator\.m2\repository\commons-lang\commons-lang\2.5\commons-lang-2.5.jar;C:\Users\Administrator\.m2\repository\org\apache\avro\avro\1.7.4\avro-1.7.4.jar;C:\Users\Administrator\.m2\repository\org\codehaus\jackson\jackson-core-asl\1.8.8\jackson-core-asl-1.8.8.jar;C:\Users\Administrator\.m2\repository\org\codehaus\jackson\jackson-mapper-asl\1.8.8\jackson-mapper-asl-1.8.8.jar;C:\Users\Administrator\.m2\repository\com\thoughtworks\paranamer\paranamer\2.3\paranamer-2.3.jar;C:\Users\Administrator\.m2\repository\org\xerial\snappy\snappy-java\1.0.4.1\snappy-java-1.0.4.1.jar;C:\Users\Administrator\.m2\repository\org\apache\avro\avro-ipc\1.7.4\avro-ipc-1.7.4.jar;C:\Users\Administrator\.m2\repository\org\mortbay\jetty\jetty\6.1.26\jetty-6.1.26.jar;C:\Users\Administrator\.m2\repository\org\mortbay\jetty\jetty-util\6.1.26\jetty-util-6.1.26.jar;C:\Users\Administrator\.m2\repository\org\apache\velocity\velocity\1.7\velocity-1.7.jar;C:\Users\Administrator\.m2\repository\commons-collections\commons-collections\3.2.1\commons-collections-3.2.1.jar;C:\Users\Administrator\.m2\repository\io\netty\netty\3.10.6.Final\netty-3.10.6.Final.jar;C:\Users\Administrator\.m2\repository\joda-time\joda-time\2.9.9\joda-time-2.9.9.jar;C:\Users\Administrator\.m2\repository\org\eclipse\jetty\jetty-servlet\9.4.6.v20170531\jetty-servlet-9.4.6.v20170531.jar;C:\Users\Administrator\.m2\repository\org\eclipse\jetty\jetty-security\9.4.6.v20170531\jetty-security-9.4.6.v20170531.jar;C:\Users\Administrator\.m2\repository\org\eclipse\jetty\jetty-util\9.4.6.v20170531\jetty-util-9.4.6.v20170531.jar;C:\Users\Administrator\.m2\repository\org\eclipse\jetty\jetty-server\9.4.6.v20170531\jetty-server-9.4.6.v20170531.jar;C:\Users\Administrator\.m2\repository\javax\servlet\javax.servlet-api\3.1.0\javax.servlet-api-3.1.0.jar;C:\Users\Administrator\.m2\repository\org\eclipse\jetty\jetty-http\9.4.6.v20170531\jetty-http-9.4.6.v20170531.jar;C:\Users\Administrator\.m2\repository\org\eclipse\jetty\jetty-io\9.4.6.v20170531\jetty-io-9.4.6.v20170531.jar;C:\Users\Administrator\.m2\repository\org\eclipse\jetty\jetty-jmx\9.4.6.v20170531\jetty-jmx-9.4.6.v20170531.jar;C:\Users\Administrator\.m2\repository\com\google\code\gson\gson\2.2.2\gson-2.2.2.jar;C:\Users\Administrator\.m2\repository\org\apache\thrift\libthrift\0.9.3\libthrift-0.9.3.jar;C:\Users\Administrator\.m2\repository\org\apache\httpcomponents\httpclient\4.4.1\httpclient-4.4.1.jar;C:\Users\Administrator\.m2\repository\org\apache\httpcomponents\httpcore\4.4.1\httpcore-4.4.1.jar;C:\Users\Administrator\.m2\repository\org\apache\mina\mina-core\2.0.4\mina-core-2.0.4.jar;C:\Users\Administrator\.m2\repository\com\alibaba\fastjson\1.1.23\fastjson-1.1.23.jar;C:\Users\Administrator\.m2\repository\junit\junit\4.12\junit-4.12.jar;C:\Users\Administrator\.m2\repository\org\hamcrest\hamcrest-core\1.3\hamcrest-core-1.3.jar;C:\Users\Administrator\.m2\repository\org\apache\hive\hive-exec\2.3.7\hive-exec-2.3.7.jar;C:\Users\Administrator\.m2\repository\org\apache\hive\hive-vector-code-gen\2.3.7\hive-vector-code-gen-2.3.7.jar;C:\Users\Administrator\.m2\repository\org\apache\hive\hive-llap-tez\2.3.7\hive-llap-tez-2.3.7.jar;C:\Users\Administrator\.m2\repository\org\apache\hive\hive-common\2.3.7\hive-common-2.3.7.jar;C:\Users\Administrator\.m2\repository\org\apache\hive\hive-storage-api\2.4.0\hive-storage-api-2.4.0.jar;C:\Users\Administrator\.m2\repository\org\apache\orc\orc-core\1.3.4\orc-core-1.3.4.jar;C:\Users\Administrator\.m2\repository\io\airlift\aircompressor\0.8\aircompressor-0.8.jar;C:\Users\Administrator\.m2\repository\io\airlift\slice\0.29\slice-0.29.jar;C:\Users\Administrator\.m2\repository\org\openjdk\jol\jol-core\0.2\jol-core-0.2.jar;C:\Users\Administrator\.m2\repository\org\eclipse\jetty\aggregate\jetty-all\7.6.0.v20120127\jetty-all-7.6.0.v20120127.jar;C:\Users\Administrator\.m2\repository\org\apache\geronimo\specs\geronimo-jta_1.1_spec\1.1.1\geronimo-jta_1.1_spec-1.1.1.jar;C:\Users\Administrator\.m2\repository\javax\mail\mail\1.4.1\mail-1.4.1.jar;C:\Users\Administrator\.m2\repository\javax\activation\activation\1.1\activation-1.1.jar;C:\Users\Administrator\.m2\repository\org\apache\geronimo\specs\geronimo-jaspic_1.0_spec\1.0\geronimo-jaspic_1.0_spec-1.0.jar;C:\Users\Administrator\.m2\repository\org\apache\geronimo\specs\geronimo-annotation_1.0_spec\1.1.1\geronimo-annotation_1.0_spec-1.1.1.jar;C:\Users\Administrator\.m2\repository\asm\asm-commons\3.1\asm-commons-3.1.jar;C:\Users\Administrator\.m2\repository\asm\asm-tree\3.1\asm-tree-3.1.jar;C:\Users\Administrator\.m2\repository\asm\asm\3.1\asm-3.1.jar;C:\Users\Administrator\.m2\repository\org\eclipse\jetty\orbit\javax.servlet\3.0.0.v201112011016\javax.servlet-3.0.0.v201112011016.jar;C:\Users\Administrator\.m2\repository\org\apache\logging\log4j\log4j-web\2.6.2\log4j-web-2.6.2.jar;C:\Users\Administrator\.m2\repository\com\tdunning\json\1.8\json-1.8.jar;C:\Users\Administrator\.m2\repository\io\dropwizard\metrics\metrics-core\3.1.0\metrics-core-3.1.0.jar;C:\Users\Administrator\.m2\repository\io\dropwizard\metrics\metrics-jvm\3.1.0\metrics-jvm-3.1.0.jar;C:\Users\Administrator\.m2\repository\io\dropwizard\metrics\metrics-json\3.1.0\metrics-json-3.1.0.jar;C:\Users\Administrator\.m2\repository\com\fasterxml\jackson\core\jackson-databind\2.6.5\jackson-databind-2.6.5.jar;C:\Users\Administrator\.m2\repository\com\fasterxml\jackson\core\jackson-annotations\2.6.0\jackson-annotations-2.6.0.jar;C:\Users\Administrator\.m2\repository\com\fasterxml\jackson\core\jackson-core\2.6.5\jackson-core-2.6.5.jar;C:\Users\Administrator\.m2\repository\com\github\joshelser\dropwizard-metrics-hadoop-metrics2-reporter\0.1.2\dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar;C:\Users\Administrator\.m2\repository\org\apache\hadoop\hadoop-common\2.6.0\hadoop-common-2.6.0.jar;C:\Users\Administrator\.m2\repository\org\apache\commons\commons-math3\3.1.1\commons-math3-3.1.1.jar;C:\Users\Administrator\.m2\repository\xmlenc\xmlenc\0.52\xmlenc-0.52.jar;C:\Users\Administrator\.m2\repository\commons-net\commons-net\3.1\commons-net-3.1.jar;C:\Users\Administrator\.m2\repository\javax\servlet\servlet-api\2.5\servlet-api-2.5.jar;C:\Users\Administrator\.m2\repository\com\sun\jersey\jersey-server\1.9\jersey-server-1.9.jar;C:\Users\Administrator\.m2\repository\tomcat\jasper-compiler\5.5.23\jasper-compiler-5.5.23.jar;C:\Users\Administrator\.m2\repository\tomcat\jasper-runtime\5.5.23\jasper-runtime-5.5.23.jar;C:\Users\Administrator\.m2\repository\javax\servlet\jsp\jsp-api\2.1\jsp-api-2.1.jar;C:\Users\Administrator\.m2\repository\commons-el\commons-el\1.0\commons-el-1.0.jar;C:\Users\Administrator\.m2\repository\net\java\dev\jets3t\jets3t\0.9.0\jets3t-0.9.0.jar;C:\Users\Administrator\.m2\repository\com\jamesmurty\utils\java-xmlbuilder\0.4\java-xmlbuilder-0.4.jar;C:\Users\Administrator\.m2\repository\commons-configuration\commons-configuration\1.6\commons-configuration-1.6.jar;C:\Users\Administrator\.m2\repository\commons-digester\commons-digester\1.8\commons-digester-1.8.jar;C:\Users\Administrator\.m2\repository\commons-beanutils\commons-beanutils\1.7.0\commons-beanutils-1.7.0.jar;C:\Users\Administrator\.m2\repository\commons-beanutils\commons-beanutils-core\1.8.0\commons-beanutils-core-1.8.0.jar;C:\Users\Administrator\.m2\repository\org\slf4j\slf4j-log4j12\1.7.5\slf4j-log4j12-1.7.5.jar;C:\Users\Administrator\.m2\repository\org\apache\hadoop\hadoop-auth\2.6.0\hadoop-auth-2.6.0.jar;C:\Users\Administrator\.m2\repository\org\apache\directory\server\apacheds-kerberos-codec\2.0.0-M15\apacheds-kerberos-codec-2.0.0-M15.jar;C:\Users\Administrator\.m2\repository\org\apache\directory\server\apacheds-i18n\2.0.0-M15\apacheds-i18n-2.0.0-M15.jar;C:\Users\Administrator\.m2\repository\org\apache\directory\api\api-asn1-api\1.0.0-M20\api-asn1-api-1.0.0-M20.jar;C:\Users\Administrator\.m2\repository\org\apache\directory\api\api-util\1.0.0-M20\api-util-1.0.0-M20.jar;C:\Users\Administrator\.m2\repository\com\jcraft\jsch\0.1.42\jsch-0.1.42.jar;C:\Users\Administrator\.m2\repository\org\apache\curator\curator-recipes\2.6.0\curator-recipes-2.6.0.jar;C:\Users\Administrator\.m2\repository\org\htrace\htrace-core\3.0.4\htrace-core-3.0.4.jar;C:\Users\Administrator\.m2\repository\org\apache\hive\hive-llap-client\2.3.7\hive-llap-client-2.3.7.jar;C:\Users\Administrator\.m2\repository\org\apache\hive\hive-llap-common\2.3.7\hive-llap-common-2.3.7.jar;C:\Users\Administrator\.m2\repository\org\apache\hive\hive-serde\2.3.7\hive-serde-2.3.7.jar;C:\Users\Administrator\.m2\repository\org\apache\hive\hive-service-rpc\2.3.7\hive-service-rpc-2.3.7.jar;C:\Users\Administrator\.m2\repository\org\apache\thrift\libfb303\0.9.3\libfb303-0.9.3.jar;C:\Users\Administrator\.m2\repository\net\sf\opencsv\opencsv\2.3\opencsv-2.3.jar;C:\Users\Administrator\.m2\repository\org\apache\parquet\parquet-hadoop-bundle\1.8.1\parquet-hadoop-bundle-1.8.1.jar;C:\Users\Administrator\.m2\repository\org\apache\commons\commons-lang3\3.1\commons-lang3-3.1.jar;C:\Users\Administrator\.m2\repository\org\apache\hive\hive-shims\2.3.7\hive-shims-2.3.7.jar;C:\Users\Administrator\.m2\repository\org\apache\hive\shims\hive-shims-common\2.3.7\hive-shims-common-2.3.7.jar;C:\Users\Administrator\.m2\repository\org\apache\hive\shims\hive-shims-0.23\2.3.7\hive-shims-0.23-2.3.7.jar;C:\Users\Administrator\.m2\repository\org\apache\hadoop\hadoop-yarn-server-resourcemanager\2.7.2\hadoop-yarn-server-resourcemanager-2.7.2.jar;C:\Users\Administrator\.m2\repository\org\apache\hadoop\hadoop-annotations\2.7.2\hadoop-annotations-2.7.2.jar;C:\Users\Administrator\.m2\repository\com\google\inject\extensions\guice-servlet\3.0\guice-servlet-3.0.jar;C:\Users\Administrator\.m2\repository\com\google\inject\guice\3.0\guice-3.0.jar;C:\Users\Administrator\.m2\repository\javax\inject\javax.inject\1\javax.inject-1.jar;C:\Users\Administrator\.m2\repository\aopalliance\aopalliance\1.0\aopalliance-1.0.jar;C:\Users\Administrator\.m2\repository\com\sun\jersey\jersey-json\1.9\jersey-json-1.9.jar;C:\Users\Administrator\.m2\repository\com\sun\xml\bind\jaxb-impl\2.2.3-1\jaxb-impl-2.2.3-1.jar;C:\Users\Administrator\.m2\repository\org\codehaus\jackson\jackson-jaxrs\1.8.3\jackson-jaxrs-1.8.3.jar;C:\Users\Administrator\.m2\repository\org\codehaus\jackson\jackson-xc\1.8.3\jackson-xc-1.8.3.jar;C:\Users\Administrator\.m2\repository\com\sun\jersey\contribs\jersey-guice\1.9\jersey-guice-1.9.jar;C:\Users\Administrator\.m2\repository\org\apache\hadoop\hadoop-yarn-common\2.7.2\hadoop-yarn-common-2.7.2.jar;C:\Users\Administrator\.m2\repository\org\apache\hadoop\hadoop-yarn-api\2.7.2\hadoop-yarn-api-2.7.2.jar;C:\Users\Administrator\.m2\repository\javax\xml\bind\jaxb-api\2.2.2\jaxb-api-2.2.2.jar;C:\Users\Administrator\.m2\repository\javax\xml\stream\stax-api\1.0-2\stax-api-1.0-2.jar;C:\Users\Administrator\.m2\repository\org\codehaus\jettison\jettison\1.1\jettison-1.1.jar;C:\Users\Administrator\.m2\repository\com\sun\jersey\jersey-core\1.9\jersey-core-1.9.jar;C:\Users\Administrator\.m2\repository\com\sun\jersey\jersey-client\1.9\jersey-client-1.9.jar;C:\Users\Administrator\.m2\repository\org\apache\hadoop\hadoop-yarn-server-common\2.7.2\hadoop-yarn-server-common-2.7.2.jar;C:\Users\Administrator\.m2\repository\org\apache\hadoop\hadoop-yarn-server-applicationhistoryservice\2.7.2\hadoop-yarn-server-applicationhistoryservice-2.7.2.jar;C:\Users\Administrator\.m2\repository\org\apache\hadoop\hadoop-yarn-server-web-proxy\2.7.2\hadoop-yarn-server-web-proxy-2.7.2.jar;C:\Users\Administrator\.m2\repository\org\fusesource\leveldbjni\leveldbjni-all\1.8\leveldbjni-all-1.8.jar;C:\Users\Administrator\.m2\repository\org\apache\zookeeper\zookeeper\3.4.6\zookeeper-3.4.6-tests.jar;C:\Users\Administrator\.m2\repository\org\apache\hive\shims\hive-shims-scheduler\2.3.7\hive-shims-scheduler-2.3.7.jar;C:\Users\Administrator\.m2\repository\commons-httpclient\commons-httpclient\3.0.1\commons-httpclient-3.0.1.jar;C:\Users\Administrator\.m2\repository\commons-logging\commons-logging\1.0.3\commons-logging-1.0.3.jar;C:\Users\Administrator\.m2\repository\org\apache\logging\log4j\log4j-1.2-api\2.6.2\log4j-1.2-api-2.6.2.jar;C:\Users\Administrator\.m2\repository\org\apache\logging\log4j\log4j-api\2.6.2\log4j-api-2.6.2.jar;C:\Users\Administrator\.m2\repository\org\apache\logging\log4j\log4j-core\2.6.2\log4j-core-2.6.2.jar;C:\Users\Administrator\.m2\repository\org\apache\logging\log4j\log4j-slf4j-impl\2.6.2\log4j-slf4j-impl-2.6.2.jar;C:\Users\Administrator\.m2\repository\org\antlr\antlr-runtime\3.5.2\antlr-runtime-3.5.2.jar;C:\Users\Administrator\.m2\repository\org\antlr\ST4\4.0.4\ST4-4.0.4.jar;C:\Users\Administrator\.m2\repository\org\apache\ant\ant\1.9.1\ant-1.9.1.jar;C:\Users\Administrator\.m2\repository\org\apache\ant\ant-launcher\1.9.1\ant-launcher-1.9.1.jar;C:\Users\Administrator\.m2\repository\org\apache\commons\commons-compress\1.9\commons-compress-1.9.jar;C:\Users\Administrator\.m2\repository\org\apache\ivy\ivy\2.4.0\ivy-2.4.0.jar;C:\Users\Administrator\.m2\repository\org\apache\zookeeper\zookeeper\3.4.6\zookeeper-3.4.6.jar;C:\Users\Administrator\.m2\repository\log4j\log4j\1.2.16\log4j-1.2.16.jar;C:\Users\Administrator\.m2\repository\jline\jline\0.9.94\jline-0.9.94.jar;C:\Users\Administrator\.m2\repository\org\apache\curator\curator-framework\2.7.1\curator-framework-2.7.1.jar;C:\Users\Administrator\.m2\repository\org\apache\curator\curator-client\2.7.1\curator-client-2.7.1.jar;C:\Users\Administrator\.m2\repository\org\codehaus\groovy\groovy-all\2.4.4\groovy-all-2.4.4.jar;C:\Users\Administrator\.m2\repository\org\datanucleus\datanucleus-core\4.1.17\datanucleus-core-4.1.17.jar;C:\Users\Administrator\.m2\repository\org\apache\calcite\calcite-core\1.10.0\calcite-core-1.10.0.jar;C:\Users\Administrator\.m2\repository\org\apache\calcite\calcite-linq4j\1.10.0\calcite-linq4j-1.10.0.jar;C:\Users\Administrator\.m2\repository\commons-dbcp\commons-dbcp\1.4\commons-dbcp-1.4.jar;C:\Users\Administrator\.m2\repository\commons-pool\commons-pool\1.5.4\commons-pool-1.5.4.jar;C:\Users\Administrator\.m2\repository\net\hydromatic\eigenbase-properties\1.1.5\eigenbase-properties-1.1.5.jar;C:\Users\Administrator\.m2\repository\org\codehaus\janino\janino\2.7.6\janino-2.7.6.jar;C:\Users\Administrator\.m2\repository\org\codehaus\janino\commons-compiler\2.7.6\commons-compiler-2.7.6.jar;C:\Users\Administrator\.m2\repository\org\pentaho\pentaho-aggdesigner-algorithm\5.1.5-jhyde\pentaho-aggdesigner-algorithm-5.1.5-jhyde.jar;C:\Users\Administrator\.m2\repository\org\apache\calcite\calcite-druid\1.10.0\calcite-druid-1.10.0.jar;C:\Users\Administrator\.m2\repository\org\apache\calcite\avatica\avatica\1.8.0\avatica-1.8.0.jar;C:\Users\Administrator\.m2\repository\org\apache\calcite\avatica\avatica-metrics\1.8.0\avatica-metrics-1.8.0.jar;C:\Users\Administrator\.m2\repository\com\google\protobuf\protobuf-java\3.0.0-beta-1\protobuf-java-3.0.0-beta-1.jar;C:\Users\Administrator\.m2\repository\stax\stax-api\1.0.1\stax-api-1.0.1.jar" com.intellij.rt.junit.JUnitStarter -ideVersion5 -junit4 cn.yanqi.dw.hive.udf.PareJsonArrayWithKey
4
["{\"json\":{\"a ction\":\"3\",\"entry\":\"1\",\"goodsid\":\"0\",\"loading_time\":\"93\",\"showtype\":\"2\",\"staytime\":\"56\"},\"name\":\"goods_detail_loading\",\"time\":1596343881690}","{\"json\":{\"action\":\"3\",\"loading_time\":\"15\",\"loading_type\":\"3\",\"type\":\"1\"},\"name\":\"loading\",\"time\":1596356988428}","{\"json\":{\"action\":\"1\",\"type\":\"2\"},\"name\":\"notification\",\"time\":1596374167278}","{\"json\":{\"course_id\":1,\"id\":0,\"userid\":0},\"name\":\"favorites\",\"time\":1596350933962}"]
### --- 打成jar包并上传到服务下/data/yanqidw/jars/PareJsonArrayWithKey

[root@hadoop02 PareJsonArrayWithKey]# ll
total 360
-rw-r--r-- 1 root root 361799 Oct  4 17:58 cn.yanqi.dw-1.0-SNAPSHOT-jar-with-dependencies.jar
四、DWD层数据加载:得到全部的事件日志
### --- 主要功能:解析json串;

[root@hadoop02 ~]# vim /data/yanqidw/script/advertisement/dwd_load_event_log.sh
#!/bin/bash

source /etc/profile

if [ -n "$1" ] ;then
    do_date=$1
else
    do_date=`date -d "-1 day" +%F`
fi

sql="
use dwd;
add jar /data/yanqidw/jars/PareJsonArrayWithKey/cn.yanqi.dw-1.0-SNAPSHOT-jar-with-dependencies.jar;
create temporary function json_array as 'cn.yanqi.dw.hive.udf.PareJsonArrayWithKey';
with tmp_start as
(
select split(str, ' ')[7] as line
from ods.ods_log_event where dt='$do_date'
)

insert overwrite table dwd.dwd_event_log
PARTITION (dt='$do_date')
select
    device_id,
    uid,
    app_v,
    os_type,
    event_type,
    language,
    channel,
    area,
    brand,
    get_json_object(k,'$.name') as name,
    get_json_object(k,'$.json') as json,
    get_json_object(k,'$.time') as time
    from
(
select
    get_json_object(line,'$.attr.device_id') as device_id,
    get_json_object(line,'$.attr.uid') as uid,
    get_json_object(line,'$.attr.app_v') as app_v,
    get_json_object(line,'$.attr.os_type') as os_type,
    get_json_object(line,'$.attr.event_type') as event_type,
    get_json_object(line,'$.attr.language') as language,
    get_json_object(line,'$.attr.channel') as channel,
    get_json_object(line,'$.attr.area') as area,
    get_json_object(line,'$.attr.brand') as brand,
    get_json_object(line,'$.yanqi_event') as yanqi_event
    from tmp_start
) A lateral view explode(json_array(yanqi_event)) B as k
"

hive -e "$sql"
### --- 将全部事件日志加载到dwd层

[root@hadoop02 ~]# sh /data/yanqidw/script/advertisement/dwd_load_event_log.sh 2020-07-21
### --- 查看全部事件日志是否解析到dwd层

hive (default)> show partitions dwd.dwd_event_log;
partition
dt=2020-07-21

hive (default)> select count(*) from dwd.dwd_event_log where dt='2020-07-21';
8951
五、DWD层数据加载:获取广告点击事件
### --- 从志部的事件日志中获取广告点击事件:

[root@hadoop02 ~]# vim /data/yanqidw/script/advertisement/dwd_load_ad_log.sh
#!/bin/bash

source /etc/profile
if [ -n "$1" ] ;
then
    do_date=$1
else
    do_date=`date -d "-1 day" +%F`
fi

sql="
insert overwrite table dwd.dwd_ad
PARTITION (dt='$do_date')
select
    device_id,
    uid,
    app_v,
    os_type,
    event_type,
    language,
    channel,
    area,
    brand,
    report_time,
    get_json_object(event_json,'$.duration') ,
    get_json_object(event_json,'$.ad_action') ,
    get_json_object(event_json,'$.shop_id') ,
    get_json_object(event_json,'$.ad_type'),
    get_json_object(event_json,'$.show_style'),
    get_json_object(event_json,'$.product_id'),
    get_json_object(event_json,'$.place'),
    get_json_object(event_json,'$.sort'),
    from_unixtime(ceil(report_time/1000), 'HH')
from dwd.dwd_event_log
where dt='$do_date' and name='ad';
"

hive -e "$sql"
### --- 执行脚本;从全部事件日志获取获取广告点击事件

[root@hadoop02 ~]# sh /data/yanqidw/script/advertisement/dwd_load_ad_log.sh 2020-07-21
### --- 查看是否获取到广告点击事件日志

hive (default)> show partitions dwd.dwd_ad;
partition
dt=2020-07-21

hive (default)> select count(*) from dwd.dwd_ad where dt='2020-07-21';
1454
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

yanqi_vip

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值