用户画像之实时标签

本文介绍了用户画像的实时标签系统,借鉴阿里云的架构方案进行修正,使用Lambda架构结合离线标签修正实时标签策略。架构设计中涉及MaxCompute、Hologres和实时计算Flink版,通过API实现DataHub源和sink。文章详细阐述了pom.xml配置、入口类、Event_info、配置文件、工具类和自定义Sink的实现,包括HoloSink、OdpsSink、DataHub Sink以及实时标签功能。
摘要由CSDN通过智能技术生成

1、用户画像体系部分分实时标签和离线标签,具体方案参考了阿里云用户画像标签体系的架构方案并加以修改,采用lamda架构的方式通过离线标签T-1修正实时标签策略。 

2、架构设计

参考阿里云架构体系:

修正的架构体系:

 基于MaxCompute+Hologres的人群圈选和数据服务实践-阿里云开发者社区icon-default.png?t=N7T8https://developer.aliyun.com/article/792500

 3、逻辑实现

由于Hologres版本过低,0.8版本不支持Blink  source sink ,才有APi方式实现

①pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.alibaba.blink</groupId>
    <artifactId>blink-udx-3.x</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <scala.version>2.11.12</scala.version>
        <scala.binary.version>2.11</scala.binary.version>
        <blink.version>blink-3.3.0</blink.version>
        <java.version>1.8</java.version>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
        <sdk.version>0.38.3-public</sdk.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>com.alibaba.blink</groupId>
            <artifactId>flink-core</artifactId>
            <version>${blink.version}</version>
<!--            <scope>provided</scope>-->
<!--            <systemPath>${project.basedir}/lib/flink-core-blink-3.2.2.jar</systemPath>-->
        </dependency>
        <dependency>
            <groupId>com.alibaba.blink</groupId>
            <artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
            <version>${blink.version}</version>
<!--            <scope>provided</scope>-->
<!--            <systemPath>${project.basedir}/lib/flink-streaming-java_2.11-blink-3.2.2.jar</systemPath>-->
        </dependency>

<!--打包udf自定义函数需要添加此依赖-->
        <dependency>
            <groupId>com.alibaba.blink</groupId>
            <artifactId>flink-table_${scala.binary.version}</artifactId>
            <version>${blink.version}</version>
<!--            <scope>provided</scope>-->
<!--            <systemPath>${project.basedir}/lib/flink-table_2.11-blink-3.2.2.jar</systemPath>-->
        </dependency>
        <dependency>
            <groupId>com.alibaba.blink</groupId>
            <artifactId>flink-table-common</artifactId>
            <version>${blink.version}</version>
<!--                        <scope>provided</scope>-->
            <!--            <systemPath>${project.basedir}/lib/flink-table_2.11-blink-3.2.2.jar</systemPath>-->
        </dependency>
        <!-- https://mvnrepository.com/artifact/com.alibaba.blink/flink-streaming-scala -->
        <dependency>
            <groupId>com.alibaba.blink</groupId>
            <artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>
            <version>${blink.version}</version>
<!--            <scope>provided</scope>-->
        </dependency>

        <dependency>
            <groupId>com.alibaba.blink</groupId>
            <artifactId>flink-java</artifactId>
            <version>${blink.version}</version>
        </dependency>
        <dependency>
            <groupId>com.alibaba.blink</groupId>
            <artifactId>flink-scala_${scala.binary.version}</artifactId>
            <version>${blink.version}</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/com.alibaba.blink/flink-jdbc -->
        <dependency>
            <groupId>com.alibaba.blink</groupId>
            <artifactId>flink-jdbc</artifactId>
            <version>${blink.version}</version>
        </dependency>

        <dependency>
            <groupId>com.alibaba.blink</groupId>
            <artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
            <version>${blink.version}</version>
<!--            <scope>provided</scope>-->
        </dependency>


        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-library</artifactId>
            <version>2.11.12</version>
            <scope>provided</scope>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.scala-lang/scala-reflect -->
        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-reflect</artifactId>
            <version>2.11.12</version>
            <scope>provided</scope>
        </dependency>


        <!--        <dependency>-->
<!--            <groupId>com.aliyun.datahub</groupId>-->
<!--            <artifactId>aliyun-sdk-datahub</artifactId>-->
<!--            <version>2.12.2-public</version>-->
<!--        </dependency>-->
        <dependency>
            <groupId>com.aliyun.datahub</groupId>
            <artifactId>aliyun-sdk-datahub</artifactId>
            <version>2.12.2-public</version>
            <scope>provided</scope>
            <exclusions>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-api</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-log4j12</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>jcl-over-slf4j</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>jul-slf4j</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>log4j</groupId>
                    <artifactId>log4j</artifactId>
                </exclusion>
                <!--<exclusion>-->
                <!--<artifactId>jackson-databind</artifactId>-->
                <!--<groupId>com.fasterxml.jackson.core</groupId>-->
                <!--</exclusion>-->
                <!--<exclusion>-->
                <!--<artifactId>jackson-annotations</artifactId>-->
                <!--<groupId>com.fasterxml.jackson.core</groupId>-->
                <!--</exclusion>-->
            </exclusions>
        </dependency>
        <dependency>
            <groupId>com.aliyun.emr</groupId>
            <artifactId>emr-datahub_2.11</artifactId>
            <version>2.0.0</version>
            <scope>provided</scope>
        </dependency>
        <!-- 打包datahubsink-datahubsource需要这个依赖-->
        <dependency>
            <groupId>com.alibaba.flink</groupId>
            <artifactId>datahub-connector</artifactId>
            <version>0.1-SNAPSHOT</version>
            <classifier>jar-with-dependencies</classifier>
<!--            <scope>provided</scope>-->
        </dependency>
        <dependency>
            <groupId>org.postgresql</groupId>
            <artifactId>postgresql</artifactId>
            <version>42.1.1</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.76</version>
            <!--<scope>provided</scope>-->
        </dependency>

        <dependency>
            <groupId>com.alibaba.blink</groupId>
            <artifactId>flink-connector-kafka-0.10_2.11</artifactId>
            <version>${blink.version}</version>
            <scope>provided</scope>
        </dependency>

        <!--采用Redis做维表缓存-->
        <dependency>
            <groupId>io.vertx</groupId>
            <artifactId>vertx-core</artifactId>
            <version>3.5.2</version>
        </dependency>
        <dependency>
            <groupId>io.vertx</groupId>
            <artifactId>vertx-redis-client</artifactId>
            <version>3.5.2.CR3</version>
        </dependency>


        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.46</version>
        </dependency>
        <!--odpssink需要这个依赖-->
        <dependency>
            <groupId>com.aliyun.odps</groupId>
            <artifactId>odps-sdk-core</artifactId>
            <version>${sdk.version}</version>
            <!--<scope>provided</scope>-->
        </dependency>
        <dependency>
            <groupId>com.aliyun.odps</groupId>
            <artifactId>odps-sdk-udf</artifactId>
            <version>${sdk.version}</version>
            <!--<scope>provided</scope>-->
        </dependency>
        <dependency>
            <groupId>com.aliyun.odps</groupId>
            <artifactId>odps-udf-local</artifactId>
            <version>${sdk.version}</version>
            <!--<scope>provided</scope>-->
        </dependency>
        <dependency>
            <groupId>com.aliyun.odps</groupId>
            <artifactId>odps-sdk-mapred</artifactId>
            <version>${sdk.version}</version>
            <!--<scope>provided</scope>-->
        </dependency>
        <dependency>
            <groupId>com.aliyun.odps</groupId>
            <artifactId>odps-mapred-local</artifactId>
            <version>${sdk.version}</version>
            <!--<scope>provided</scope>-->
        </dependency>
        <dependency>
            <groupId>com.aliyun.odps</groupId>
            <artifactId>odps-sdk-graph</artifactId>
            <version>${sdk.version}</version>
            <!--<scope>provided</scope>-->
        </dependency>
        <dependency>
            <groupId>com.aliyun.odps</groupId>
            <artifactId>odps-graph-local</artifactId>
            <version>${sdk.version}</version>
            <!--<scope>provided</scope>-->
        </dependency>

        <!--JDBC ODPS依赖-->
        <dependency>
            <groupId>com.aliyun.odps</groupId>
            <artifactId>odps-jdbc</artifactId>
            <version>3.0.1</version>
            <classifier>jar-with-dependencies</classifier>
        </dependency>

        <!--java工具类-->
        <dependency>
            <groupId>cn.hutool</groupId>
            <artifactId>hutool-all</artifactId>
            <version>4.6.3</version>
        </dependency>



    </dependencies>

<!--    <build>-->
<!--        <finalName>${project.artifactId}</finalName>-->
<!--        <outputDirectory>target/classes</outputDirectory>-->
<!--        <testOutputDirectory>target/test-classes</testOutputDirectory>-->
<!--        <plugins>-->
<!--            &lt;!&ndash; This plugin compiles Scala files &ndash;&gt;-->
<!--            <plugin>-->
<!--                <groupId>net.alchim31.maven</groupId>-->
<!--                <artifactId>scala-maven-plugin</artifactId>-->
<!--                <version>3.2.2</version>-->
<!--                <executions>-->
<!--                    <execution>-->
<!--                        <id>scala-compile-first</id>-->
<!--                        <phase>process-resources</phase>-->
<!--                        <goals>-->
<!--                            <goal>add-source</goal>-->
<!--                            <goal>compile</goal>-->
<!--                        </goals>-->
<!--                    </execution>-->
<!--                    <execution>-->
<!--                        <id>scala-test-compile</id>-->
<!--                        <phase>process-test-resources</phase>-->
<!--                        <goals>-->
<!--                            <goal>testCompile</goal>-->
<!--                        </goals>-->
<!--                    </execution>-->
<!--                </executions>-->
<!--            </plugin>-->

<!--            &lt;!&ndash; This plugin compiles Java files &ndash;&gt;-->
<!--            <plugin>-->
<!--                <groupId>org.apache.maven.plugins</groupId>-->
<!--                <artifactId>maven-compiler-plugin</artifactId>-->
<!--                <version>3.5.1</version>-->
<!--                <configuration>-->
<!--                    <source>1.8</source>-->
<!--                    <target>1.8</target>-->
<!--                </configuration>-->
<!--                <executions>-->
<!--                    <execution>-->
<!--                        <phase>compile</phase>-->
<!--                        <goals>-->
<!--                            <goal>compile</goal>-->
<!--                        </goals>-->
<!--                    </execution>-->
<!--                </executions>-->
<!--            </plugin>-->
<!--        </plugins>-->
<!--    </build>-->
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>3.1.1</version>
                <configuration>
                    <archive>
                        <manifest>
                            <mainClass>com.alibaba.blink.demo.ods.Ods_Stream_Demo</mainClass>
                        </manifest>
                    </archive>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
</project>

datahub source 需要手动加载maven仓库到本地,参考:读取DataHub数据示例 - 实时计算Flink版 - 阿里云icon-default.png?t=N7T8https://help.aliyun.com/document_detail/156813.html② 入口类

package com.alibaba.blink.demo.ods;

import com.alibaba.blink.mojo.Record;
import com.alibaba.blink.sink.holo.HoloSink;
import com.alibaba.blink.sink.holo.HoloSink1;
import com.alibaba.blink.sink.ods.DatahubSink;
import com.alibaba.blink.sink.ods.OdpsSink;
import com.alibaba.blink.source.ods.Event_info;
import com.alibaba.flink.connectors.datahub.datastream.source.DatahubSourceFunction;
import com.aliyun.datahub.client.model.RecordEntry;
import com.aliyun.odps.jdbc.utils.OdpsLogger;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import java.io.Serializable;
import java.util.List;

public class Ods_Stream_Demo implements Serializable {

//    private static String endPoint = ConfigPropUtils.get(" datahub_endpoint");
//    //private static String endPoint ="public endpoint";//公网访问(填写内网Endpoint,就不用填写公网Endpoint)。
//    private static String projectName = ConfigPropUtils.get("datahub_projectname");
//    private static String topicSourceName =  ConfigPropUtils.get("datahub_source_topic");
//    private static String topicSinkName =  ConfigPropUtils.get("datahub_sink_topic");
//    private static String accessId = ConfigPropUtils.get(" accessId");
//    private static String accessKey = ConfigPropUtils.get(" accessKey");
//    //设置消费的启动位点对应的时间。TimeToStampUtil.timeToStamp("2021-12-21") 此时间至少为当前时间
//    private static Long datahubStartInMs = TimeToStampUtil.timeToStamp("2021-12-14");
//    private static Long datahubEndInMs=Long.MAX_VALUE;

    public static void main(String[] args) throws Exception {
        OdpsLogger logger = new OdpsLogger(Ods_Stream_Demo.class.getName(),OdpsLogger.getDefaultOutputPath(),true,"");


        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        DataStreamSource<List<RecordEntry>> listDataStreamSource = env.addSource(
                new DatahubSourceFunction(
                        Event_info.endPoint
                        ,Event_info.projectName
                        ,Event_info.topicSourceName
                        ,Event_info.accessId
                        ,Event_info.accessKey
                        ,Event_info.datahubStartInMs
                        ,Event_info.datahubEndInMs
                        ,20L
                        ,1000L
                        ,1000
                ));

        DataStream<Record> result = listDataStreamSource
                .flatMap((FlatMapFunction<List<RecordEntry>, Record>) (ls, collector) -> {
                    for (RecordEntry recordEntry : ls) {
                        Record record = new Record(recordEntry);
                        collector.collect(record);
                    }
                }).returns(Record.class)
                .filter(s -> (s.getOneid() != null))
                .filter(s -> (s.getEvent() != null))
                ;

        result.print();

        result.addSink(new HoloSink()).setParallelism(10);
        result.addSink(new OdpsSink()).setParallelism(10);
        result.addSink(new HoloSink1()).setParallelism(10);
        result.addSink(new DatahubSink());

        env.execute();

    }

}

③Event_info

package com.alibaba.blink.source.ods;

import com.alibaba.blink.utils.ConfigPropUtils;
import com.alibaba.blink.utils.TimeToStampUtil;

import java.text.SimpleDateFormat;
import java.util.Date;

/**
 * @author yangyingchun
 * @version 1.0
 * @date 2021/12/14 15:55
 */
public class Event_info {
    static SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
    public static String endPoint = ConfigPropUtils.get("my_datahub_endpoint");
    //public static String endPoint ="public endpoint";//公网访问(填写内网Endpoint,就不用填写公网Endpoint)。
    public static String projectName = ConfigPropUtils.get("ods.datahub_projectname");
    public static String topicSourceName =  ConfigPropUtils.get("ods.datahub_source_topic.event_info");
    public static String topicSinkName =  ConfigPropUtils.get("datahub_sink_topic");
    public static String accessId = ConfigPropUtils.get("my_accessId");
    public static String accessKey = ConfigPropUtils.get("my_accessKey");
    //设置消费的启动位点对应的时间。TimeToStampUtil.timeToStamp("2021-12-21") 此时间至少为当前时间
    public static Long datahubStartInMs = TimeToStampUtil.timeToStamp(formatter.format(new Date()));
    public static Long datahubEndInMs=Long.MAX_VALUE;

    public static void main(String[] args) {
        Date currentTime = new Date();
        String dateString = formatter.format(currentTime);
        System.out.println(dateString);
    }

}

④resource/config.properties

my_datahub_endpoint=https://datahub.cn-********.cn
datahub_projectname=itsl
datahub_source_topic=event_info
datahub_source_topic1=event_info1
datahub_sink_topic=datahub_sink
my_accessId=3231321************
my_accessKey=32132131321***********
my_holo_url=jdbc:postgresql://holo-cn-bzdcbpeb21b7-********:80/ods?tcpKeepAlive=true
postgresdriver=org.postgresql.Driver
my_odps_endpoint=http://service.cn-**********.cn/api
my_odps_project=ODS
my_odps_driver=com.aliyun.odps.jdbc.OdpsDriver
my_odps_url=jdbc:odps:http://service.cn-**********.cn/api?project=ODS&charset=UTF-8&interactiveMode=true



kafka.topic=topic_name
bootstrap.servers=localhost:9092
zookeeper.connect=localhost:2181
group.id001=customer-001


##数仓相关
###############################ODS###############################
ods.datahub_projectname=ODS
ods.datahub_source_topic.event_info=event_info
###############################ODS###############################
###############################DWD###############################
dwd.datahub_projectname=ODS
dwd.datahub_source_topic.datahubsink=datahub_sink
###############################DWD###############################



##测试相关
testfile=E:\\software\\workspace\\blink_udx_3x-master\\src\\main\\resources\\testfile

⑤工具类

package com.alibaba.blink.utils;

import java.io.InputStream;
import java.util.Properties;

public class ConfigPropUtils {
    private static Properties props;
    static{
        try {
            props = new Properties();
            //"config.properties"放在classpath(类路径)下
            InputStream in = ConfigPropUtils.class.getClassLoader().getResourceAsStream("config.properties");
            props.load(in);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    //根据配置文件中的key获取value

    public static String get(String key){
        return props.getProperty(key, null);
    }

}

⑥自定义Sink

a) HoloSink 更新Oneid

package com.alibaba.blink.sink.holo;

import com.alibaba.blink.mojo.Record;
import com.alibaba.blink.utils.ConfigPropUtils;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.util.UUID;

public class HoloSink extends RichSinkFunction<Record> {
    private static String url= ConfigPropUtils.get("my_holo_url");
    private static String username = ConfigPropUtils.get("my_accessId");
    private static String password = ConfigPropUtils.get("my_accessKey");
    private static String postgresdriver = ConfigPropUtils.get("postgresdriver");
    private Connection connection;
    private ThreadLocal<PreparedStatement> pstmt;
    private ThreadLocal<PreparedStatement> querymt;
    private ThreadLocal<PreparedStatement> updatemt;

    private Connection getConnection() {
        Connection conn = null;
        try {
            Class.forName(postgresdriver);
            conn = DriverManager.getConnection(url, username, password);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return conn;
    }
    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        this.connection = getConnection();
        this.pstmt = new ThreadLocal<>();
        this.querymt = new ThreadLocal<>();
        this.updatemt = new ThreadLocal<>();
    }

    @Override
    public void invoke(Record record, Context context) throws Exception {
        if ( null == record || record.getOneid() == null) {
            System.out.println("record is null!!!");
            return;
        }
        //哪个线程空着呢 用哪个线程
        if (this.querymt.get() == null)
            this.querymt.set(this.connection.prepareStatement("select oneid,event from holo_sink where oneid=?"));
        if (this.updatemt.get() == null)
            this.updatemt.set(this.connection.prepareStatement("update holo_sink set event=? where oneid=?"));
        if (this.pstmt.get() == null)
            this.pstmt.set(this.connection.prepareStatement("insert into holo_sink(oneid,event) values (?,?)"));

        ((PreparedStatement)this.querymt.get()).setString(1, record.getOneid());
        ResultSet resultSet = ((PreparedStatement)this.querymt.get()).executeQuery();
//        System.out.println(resultSet.getFetchSize());
        //如果oneid存在 即更新
        if ( resultSet.next() ) {
            ((PreparedStatement)this.updatemt.get()).setString(1, record.getEvent());
            ((PreparedStatement)this.updatemt.get()).setString(2, record.getOneid());
            ((PreparedStatement)this.updatemt.get()).executeUpdate();
            System.out.println("update " + record.toString() + ",threadId:" + Thread.currentThread().getId());
         // oneid不存在就插入 并赋值新的oneid
        }
        else {
            ((PreparedStatement)this.pstmt.get()).setString(1, UUID.randomUUID().toString());
            ((PreparedStatement)this.pstmt.get()).setString(2, record.getEvent());
            ((PreparedStatement)this.pstmt.get()).executeUpdate();
            System.out.println("insert " + record.toString() + ",threadId:" + Thread.currentThread().getId());
        }
    }

    @Override
    public void close() throws Exception {
        super.close();
        if (this.pstmt.get() != null)
            ((PreparedStatement)this.pstmt.get()).close();
        if (this.querymt.get() != null)
            ((PreparedStatement)this.querymt.get()).close();
        if (this.updatemt.get() != null)
            ((PreparedStatement)this.updatemt.get()).close();
        if (this.connection != null)
            this.connection.close();
    }
}

b)OdpsSink  沉淀离线数据

package com.alibaba.blink.sink.ods;

import com.alibaba.blink.mojo.Record;
import com.alibaba.blink.utils.ConfigPropUtils;
import com.alibaba.blink.utils.ConnOdpsUtil;
import com.aliyun.odps.Odps;
import com.aliyun.odps.account.Account;
import com.aliyun.odps.account.AliyunAccount;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.Statement;
import java.util.UUID;

/**
 * @author yangyingchun
 * @version 1.0
 * @date 2021/12/20 8:45
 */
public class OdpsSink extends RichSinkFunction<Record> {
    // Endpoint以Region: 华东1为例,其他Region请按实际情况填写
//    static String endpoint = ConfigPropUtils.get("my_odps_endpoint");
//    static String projectName = ConfigPropUtils.get("my_odps_project");
//    static String accessId = ConfigPropUtils.get("my_accessId");
//    static String accessKey = ConfigPropUtils.get("my_accessKey");
//    static Odps odps;
//    static Account account;

//    private ThreadLocal<Statement> stmt;

    static  Connection odpsConn;
    static ThreadLocal<PreparedStatement> preparedStatement;
//    static ThreadLocal<PreparedStatement> pstmt;



    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
//        account = new AliyunAccount(accessId, accessKey);
//        odps = new Odps(account);
//        odps.setEndpoint(endpoint);
//        odps.setDefaultProject(projectName);
        odpsConn = ConnOdpsUtil.getOdpsConn();
        this.preparedStatement = new ThreadLocal<>();
    }

    @Override
    public void invoke(Record record, Context context) throws Exception {

        String  sql = "insert into table odps_sink values (?,?,?)" ;
        if (this.preparedStatement.get() == null)
            this.preparedStatement.set(odpsConn.prepareStatement(sql));

        preparedStatement.get().setString(1,record.getOneid());
        preparedStatement.get().setString(2,record.getEvent());
        preparedStatement.get().setString(3, UUID.randomUUID().toString());

        int i = preparedStatement.get().executeUpdate();
        System.out.println("successfully updated "+ i +" records,ThreadID:"+ Thread.currentThread().getId());


    }
}

c) DataHub Sink 实时数仓体系

package com.alibaba.blink.sink.ods;

import com.alibaba.blink.mojo.Record;
import com.alibaba.blink.utils.ConfigPropUtils;
import com.aliyun.datahub.client.DatahubClient;
import com.aliyun.datahub.client.DatahubClientBuilder;
import com.aliyun.datahub.client.auth.AliyunAccount;
import com.aliyun.datahub.client.common.DatahubConfig;
import com.aliyun.datahub.client.http.HttpConfig;
import com.aliyun.datahub.client.model.PutRecordsResult;
import com.aliyun.datahub.client.model.RecordEntry;
import com.aliyun.datahub.client.model.RecordSchema;
import com.aliyun.datahub.client.model.TupleRecordData;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;

import java.util.ArrayList;
import java.util.List;

/**
 * @author yangyingchun
 * @version 1.0
 * @date 2021/12/14 13:25
 */
public class DatahubSink extends RichSinkFunction<Record> {

    // Endpoint以Region: 华东1为例,其他Region请按实际情况填写
    static String endpoint = ConfigPropUtils.get("my_datahub_endpoint");
    //private static String endPoint ="public endpoint";//公网访问(填写内网Endpoint,就不用填写公网Endpoint)。
    static String projectName = ConfigPropUtils.get("datahub_projectname");
    static String topicSinkName =  ConfigPropUtils.get("datahub_sink_topic");
    static String accessId = ConfigPropUtils.get("my_accessId");
    static String accessKey = ConfigPropUtils.get("my_accessKey");
    static DatahubClient datahubClient;
    static RecordSchema recordSchema;
    static int retryTimes = 10;
    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        datahubClient = DatahubClientBuilder.newBuilder()
                .setDatahubConfig(
                        new DatahubConfig(endpoint,
                                // 是否开启二进制传输,服务端2.12版本开始支持
                                new AliyunAccount(accessId, accessKey), true))
                //专有云使用出错尝试将参数设置为           false
                // HttpConfig可不设置,不设置时采用默认值
                .setHttpConfig(new HttpConfig()
                        .setCompressType(HttpConfig.CompressType.LZ4) // 读写数据推荐打开网络传输 LZ4压缩
                        .setConnTimeout(10000))
                .build();
        // 获取schema
        recordSchema = datahubClient.getTopic(projectName,topicSinkName ).getRecordSchema();
    }

    @Override
    public void invoke(Record record, Context context) throws Exception {
        List<RecordEntry> recordEntries = new ArrayList<>();
        RecordEntry recordEntry = new RecordEntry();
        // 对每条数据设置额外属性,例如ip 机器名等。可以不设置额外属性,不影响数据写入
        recordEntry.addAttribute("key2", "value2");
        TupleRecordData data = new TupleRecordData(recordSchema);
        data.setField("oneid", record.getOneid());
        data.setField("message", record.getEvent());
        data.setField("if_exists", String.valueOf(Math.random()));
        recordEntry.setRecordData(data);
        recordEntries.add(recordEntry);
        datahubClient.putRecords(projectName, topicSinkName, recordEntries);
    }
}

d)实时标签

package com.alibaba.blink.demo.holo;

import com.alibaba.blink.mojo.GetResultSet;
import com.alibaba.blink.sink.holo.Holo_Write_Sink;
import com.alibaba.blink.mojo.Record;
import com.alibaba.blink.utils.ConfigPropUtils;
import com.alibaba.flink.connectors.datahub.datastream.source.DatahubSourceFunction;
import com.aliyun.datahub.client.model.RecordEntry;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

import java.util.ArrayList;
import java.util.List;
import java.util.UUID;

/**
 * @author yangyingchun
 * @version 1.0
 * @date 2021/12/13 14:46
 */
public class Stream_Holo_Tags {
    private static String endPoint = ConfigPropUtils.get("my_datahub_endpoint");
    //private static String endPoint ="public endpoint";//公网访问(填写内网Endpoint,就不用填写公网Endpoint)。
    private static String projectName = ConfigPropUtils.get("datahub_projectname");
    private static String topicSourceName =  ConfigPropUtils.get("datahub_source_topic1");
    private static String accessId = ConfigPropUtils.get("my_accessId");
    private static String accessKey = ConfigPropUtils.get("my_accessKey");
    private static Long datahubStartInMs = 0L;//设置消费的启动位点对应的时间。
    private static Long datahubEndInMs=Long.MAX_VALUE;
    private static List<String> oneids = new ArrayList<>();

    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);

        DataStreamSource<List<RecordEntry>> listDataStreamSource = env.addSource(
                new DatahubSourceFunction(endPoint,
                        projectName,
                        topicSourceName,
                        accessId,
                        accessKey,
                        datahubStartInMs,
                        datahubEndInMs,
                        20L,
                        1000L,
                        1000))
               ;


        SingleOutputStreamOperator<Record> result = listDataStreamSource.flatMap(new FlatMapFunction<List<RecordEntry>, Record>() {

            @Override
            public void flatMap(List<RecordEntry> recordEntries, Collector<Record> collector) throws Exception {
                for (RecordEntry recordEntry : recordEntries) {
                    Record record = new Record(recordEntry);
                    collector.collect(record);
                }
            }
        })
                    //map方法的Opend 方法读取的数据可以加载到内存中
                .map(new RichMapFunction<Record, Record>() {
                    @Override
                    public void open(Configuration parameters) throws Exception {
                        super.open(parameters);
                        oneids = GetResultSet.getResultSet();
                    }

                    @Override
                    public Record map(Record record) throws Exception {
                        //如果包含Oneid 不需要新生成
                        if (oneids.contains(record.getOneid())){
                            return record;
                        //如果不包含Oneid 需要重新生成
                        }else {
                            return new Record(UUID.randomUUID().toString(),record.getEvent());
                        }
                    }
                })
                .filter(record ->
                        null!=record &&
                        record.getOneid() != null);

        result.addSink(new Holo_Write_Sink()).setParallelism(8);


        env.execute("join demo");
    }
}

e)HoloWriteSink

package com.alibaba.blink.sink.holo;

import com.alibaba.blink.mojo.Record;
import com.alibaba.blink.mojo.TAGS_ID;
import com.alibaba.blink.mojo.TAGS_VALUES;
import com.alibaba.blink.utils.ConfigPropUtils;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.Timestamp;
import java.util.Date;
import java.util.Map;

/**
 * @author yangyingchun
 * @version 1.0
 * @date 2021/12/13 14:48
 */
public class Holo_Write_Sink extends RichSinkFunction<Record> {
    private static String url= ConfigPropUtils.get("my_holo_url");
    private static String username = ConfigPropUtils.get("my_accessId");
    private static String password = ConfigPropUtils.get("my_accessKey");
    private static String postgresdriver = ConfigPropUtils.get("postgresdriver");
    private Connection connection;
    private ThreadLocal<PreparedStatement> pstmt;
    private Connection getConnection() {
        Connection conn = null;
        try {
            Class.forName(postgresdriver);
            conn = DriverManager.getConnection(url, username, password);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return conn;
    }

    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        super.open(parameters);
        this.connection = getConnection();
        this.pstmt = new ThreadLocal<>();
    }

    @Override
    public void invoke(Record record, Context context) throws Exception {
        if ( null == record || record.getOneid() == null) {
            System.out.println("record is null!!!");
            return;
        }
        if (this.pstmt.get() == null)
            this.pstmt.set(this.connection.prepareStatement(
                    "insert into " +
                            "stream_tags" +
                            "(oneid,phone,tag_id,tag_value,create_time) " +
                            "values (?,?,?,?,?)"
                    )
            );
        this.pstmt.get().setString(1,record.getOneid());
        this.pstmt.get().setString(2,"phone"+String.valueOf(Math.random()*1000+1));
        this.pstmt.get().setString(3, String.valueOf(TAGS_ID.Key200001));
        this.pstmt.get().setString(4, TAGS_VALUES.get_value(TAGS_ID.Key200001,record.getEvent()));
        this.pstmt.get().setTimestamp(5, new Timestamp(System.currentTimeMillis()));

        int i = this.pstmt.get().executeUpdate();
        if (i>0){
            System.out.println("!!!更新标签成功!!!"+Thread.currentThread().getId());
        }
    }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值