Flink1.10-SQL(kafka2ES)

Flink1.10-SQL(kafka&PG2ES)

input

//维度表数据:PG数据库
deptdim
u02	2	d02	l02
u01	1	d01	l01
u03	1	d01	l01
//主表数据:Kafka
pageLog
>u01 1234
>u02 2333

output

//ES
 {
   "_index" : "user_transmit",
   "_type" : "read11",
   "_id" : "xUgcMXgBsBclsWGUZSyg",
   "_score" : 1.0,
   "_source" : {
     "location" : "l02",
     "username" : "u02",
     "deptname" : "d02",
     "transmit_count" : 1
   }
 },
{
  "_index" : "user_transmit",
  "_type" : "read11",
  "_id" : "izwaMXgBu-J4ALZ21oGG",
  "_score" : 1.0,
  "_source" : {
    "location" : "l01",
    "username" : "u01",
    "deptname" : "d01",
    "transmit_count" : 1
  }
}

Kafka5Pg2ES

package com.sql.app;

import com.sql.bean.UserTransmit;
import com.sql.utils.es.User_Dept_ESSink;
import com.sql.utils.kafka_api.KafkaUtils;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;

public class Kafka5Pg2ES {
    public static void main(String[] args) {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        DataStreamSource kf_ds = env.addSource(KafkaUtils.getFlinkKafkaConsumer("pageLog"));
        // water_sensor_ds.print();
        EnvironmentSettings mySetting = EnvironmentSettings
                .newInstance()
//                .useOldPlanner()
                .useBlinkPlanner()
                .inStreamingMode()
                .build();

        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, mySetting);


        SingleOutputStreamOperator user_ds = kf_ds.map(new MapFunction<String, UserTransmit>() {

            @Override
            public UserTransmit map(String s) throws Exception {
                String[] split = s.split(" ");
                return new UserTransmit(split[0], Long.parseLong(split[1]));
            }
        }).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<UserTransmit>(Time.seconds(2)) {
            @Override
            public long extractTimestamp(UserTransmit element) {
                return element.getTransmit_time() * 1000L;
            }
        });

        tableEnv.createTemporaryView("UserLog", user_ds);
        //Table result = tableEnv.sqlQuery("select * from UserLog limit 10");
        //tableEnv.toRetractStream(result, Row.class).print("sql");
//        create中VARCHAR(10)等长度一直不规范,DOUBLE()肯定报错,DECIMAL(5,2)也报错.总结数据库用DECIMAL,java用double就可以
//        类型需要内部保持一致,而不是和数据库类型保持一致。如数据库是TIMESTAMP(6),但这个类处理内部是TIMESTAMP(3)
//        TIMESTAMP(3) DOUBLE
        String sinkDDL =
                "CREATE TABLE DeptDim (                                       "
                        + "       username VARCHAR                                                   "
                        + "      ,deptno VARCHAR                                      "
                        + "      ,deptname VARCHAR                                            "
                        + "      ,location VARCHAR                                               "
                        + ") WITH (                                                             "
                        + "        'connector.type' = 'jdbc',                                   "
                        + "        'connector.url' = 'jdbc:postgresql://hadoop163:5432/test_db2', "
                        + "        'connector.table' = 'deptdim',                                "
                        + "        'connector.username' = 'test_user',                               "
                        + "        'connector.password' = 'aaaaaa',               "
                        + "        'connector.write.flush.max-rows' = '1'                       "
                        + ")";

        tableEnv.sqlUpdate(sinkDDL);
//        Table result = tableEnv.sqlQuery("select * from DeptDim limit 10");
//        tableEnv.toRetractStream(result, Row.class).print("sql");
/**
 * 主表:UserLog
 * 维表:DeptDim
 */
// FOR SYSTEM_TIME AS OF PROCTIME()
        Table result = tableEnv.sqlQuery("SELECT \n" +
                "u.username,d.deptname,d.location,count(*) AS transmit_count\n" +
                "FROM UserLog AS u\n" +
                "LEFT JOIN DeptDim AS d \n" +
                "ON u.username = d.username\n" +
                "group by u.username,d.deptname,d.location");

        //tableEnv.toRetractStream(result, Row.class).print("sql");
        DataStream<Tuple2<Boolean, Row>> tableResult = tableEnv.toRetractStream(result, Row.class);
        ElasticsearchSink.Builder<Tuple2<Boolean, Row>> esSink = User_Dept_ESSink.getESSink();
        tableResult.print();
        //print result
        //(true,u01,d01,l01,1)
        //(true,u02,d02,l02,1)
        tableResult.addSink(esSink.build());
        try {
            env.execute();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

User_Dept_ESSink

package com.sql.utils.es;

import com.alibaba.fastjson.JSONObject;
import com.sql.bean.UserTransmit;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction;
import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer;
import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink;
import org.apache.flink.types.Row;
import org.apache.http.HttpHost;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class User_Dept_ESSink {
    public static ElasticsearchSink.Builder<Tuple2<Boolean, Row>> getESSink() {
        List<HttpHost> httpHosts = new ArrayList<>();
        httpHosts.add(new HttpHost("192.168.1.162", 9200));
        httpHosts.add(new HttpHost("192.168.1.163", 9200));
        httpHosts.add(new HttpHost("192.168.1.164", 9200));
        // Builder第二个参数
        MyElasticSearchSinkSFunction myElasticSearchSinkSFunction = new MyElasticSearchSinkSFunction();

        ElasticsearchSink.Builder<Tuple2<Boolean, Row>> esBuilder = new ElasticsearchSink.Builder<>(httpHosts, myElasticSearchSinkSFunction);

        // 设置bulk的容量,1条就刷写
        // TODO 生产环境不要设置为 1,影响性能,这里只是为了 快速的看到 无界流 写入 ES 的结果
        esBuilder.setBulkFlushMaxActions(1);

        return esBuilder;
    }
    public static class MyElasticSearchSinkSFunction implements ElasticsearchSinkFunction<Tuple2<Boolean, Row>> {
        @Override
        public void process(Tuple2<Boolean, Row> element, RuntimeContext ctx, RequestIndexer indexer) {
            Map<String, Object> sourceMap = new HashMap<String, Object>();
            //sourceMap.put("data", element.toString());
//            SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
//            String dateTime = df.format(new Date());
            Row row = element.f1;
            sourceMap.put("username", row.getField(0));
            sourceMap.put("deptname", row.getField(1));
            sourceMap.put("location",row.getField(2));
            sourceMap.put("transmit_count",row.getField(3));
            // 创建一个Request
            IndexRequest indexRequest = Requests.indexRequest("user_transmit").type("read11").source(sourceMap);
            // 放入 indexer
            indexer.add(indexRequest);
        }
    }
    public static void main(String[] args) {

    }
}

KafkaUtils

package com.sql.utils.kafka_api;

import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;

import java.util.Properties;

public class KafkaUtils {
    public static Producer getKafkaProducer(){
        Properties props = new Properties();
        props.put("bootstrap.servers", "192.168.1.162:9092");//kafka集群,broker-list
        props.put("acks", "all");
        props.put("retries", 1);//批次大小
        props.put("linger.ms", 1);//等待时重试次数
        props.put("batch.size", 16384);//间
        props.put("buffer.memory", 33554432);//RecordAccumulator缓冲区大小
        props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        Producer<String, String> producer = new KafkaProducer<>(props);
        return producer;
    }

    public static FlinkKafkaConsumer011 getFlinkKafkaConsumer(String topicName){
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", "192.168.1.162:9092");
        properties.setProperty("zookeeper.connect", "192.168.1.162:2181");
        properties.setProperty("group.id", "KakaUtils002");
        properties.setProperty("key.deserializer",
                "org.apache.kafka.common.serialization.StringDeserializer");
        properties.setProperty("value.deserializer",
                "org.apache.kafka.common.serialization.StringDeserializer");
        properties.setProperty("auto.offset.reset", "latest");
        //pageLog
        FlinkKafkaConsumer011 kafkaSource = new FlinkKafkaConsumer011<String>(topicName, new
                SimpleStringSchema(), properties);
        return kafkaSource;
    }
}

UserTransmit

package com.sql.bean;
public class UserTransmit {
    private String username;
    private long transmit_time;
    @Override
    public String toString() {
        return "User{" +
                "username='" + username + '\'' +
                ", transmit_time='" + transmit_time + '\'' +
                '}';
    }

    public UserTransmit() {
    }

    public UserTransmit(String username, long transmit_time) {
        this.username = username;
        this.transmit_time = transmit_time;
    }

    public String getUsername() {
        return username;
    }

    public void setUsername(String username) {
        this.username = username;
    }

    public long getTransmit_time() {
        return transmit_time;
    }

    public void setTransmit_time(long transmit_time) {
        this.transmit_time = transmit_time;
    }
}

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.demo</groupId>
    <artifactId>flink1.10.0</artifactId>
    <version>1.0-SNAPSHOT</version>

   <dependencies>
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-java</artifactId>
           <version>1.10.1</version>
       </dependency>
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-streaming-java_2.12</artifactId>
           <version>1.10.1</version>
       </dependency>
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-connector-kafka-0.11_2.12</artifactId>
           <version>1.10.1</version>
       </dependency>
       <dependency>
           <groupId>org.apache.bahir</groupId>
           <artifactId>flink-connector-redis_2.11</artifactId>
           <version>1.0</version>
       </dependency>
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-connector-elasticsearch6_2.12</artifactId>
           <version>1.10.1</version>
       </dependency>
       <dependency>
           <groupId>mysql</groupId>
           <artifactId>mysql-connector-java</artifactId>
           <version>5.1.44</version>
       </dependency>
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-statebackend-rocksdb_2.12</artifactId>
           <version>1.10.1</version>
       </dependency>
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-table-planner_2.12</artifactId>
           <version>1.10.1</version>
       </dependency>
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-table-planner-blink_2.12</artifactId>
           <version>1.10.1</version>
       </dependency>
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-csv</artifactId>
           <version>1.10.1</version>
       </dependency>
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-jdbc_2.11</artifactId>
           <version>1.10.1</version>
       </dependency>
       <dependency>
           <groupId>org.postgresql</groupId>
           <artifactId>postgresql</artifactId>
           <version>42.2.5</version>
       </dependency>
       <dependency>
           <groupId>com.alibaba</groupId>
           <artifactId>fastjson</artifactId>
           <version>1.2.4</version>
       </dependency>
   </dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>3.1.1</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <artifactSet>
                                <excludes>
                                    <exclude>com.google.code.findbugs:jsr305</exclude>
                                    <exclude>org.slf4j:*</exclude>
                                    <exclude>log4j:*</exclude>
                                </excludes>
                            </artifactSet>
                            <filters>
                                <filter>
                                    <!-- Do not copy the signatures in the META-INF folder.
                                    Otherwise, this might cause SecurityExceptions when using the JAR. -->
                                    <artifact>*:*</artifact>
                                    <excludes>
                                        <exclude>META-INF/*.SF</exclude>
                                        <exclude>META-INF/*.DSA</exclude>
                                        <exclude>META-INF/*.RSA</exclude>
                                    </excludes>
                                </filter>
                            </filters>
                            <transformers>
                                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                    <mainClass>my.programs.main.clazz</mainClass>
                                </transformer>
                            </transformers>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
</project>
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值