Flink Table 将Stream追加写入Elasticsearch_flink将 json数据写入elasticsearch-CSDN博客

本文链接：https://blog.csdn.net/qq_33689414/article/details/96302958

Flink Table 将Stream追加写入Elasticsearch

Flink Table提供了多种方式，写入ElasticSearch。一种是Connector的方式连接ElasticSearch，另一种是通过Sink的方式。下面分别介绍2种方式。

一、Connector的方式写入Elasticsearch

public class SqlSinkElasticSearchStream {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

        Schema schema = new Schema()
                .field("userId", Types.STRING)
                .field("name", Types.STRING)
                .field("age", Types.STRING)
                .field("sex", Types.STRING)
                .field("createTime", Types.BIG_DEC)
                .field("updateTime", Types.BIG_DEC);

        tableEnv
                .connect(
                        new Kafka()
                                .topic("user")
                                .property("bootstrap.servers", "localhost:9092")
                                .property("group.id", "test")
                                .version("0.10")
                )
                .withSchema(schema)
                .withFormat(new Json().deriveSchema())
                .inAppendMode()
                .registerTableSource("Users");


        Table table = tableEnv.sqlQuery("select userId,name,age,sex,createTime,updateTime from Users");

        // print
        tableEnv.toAppendStream(table, TypeInformation.of(Row.class)).print();

        tableEnv
                .connect(
                        new Elasticsearch()
                                .version("6")
                                .host("localhost", 9200, "http")
                                .index("test")
                                .documentType("test")
                                // key之间的分隔符，默认"_"
                                .keyDelimiter("_")
                                // 如果key为null，则用"null"字符串替代
                                .keyNullLiteral("null")
                                // 失败处理策略，Fail（报错，job失败），Ignore(失败)，RetryRejected（重试），Custom（自己定义）
                                .failureHandlerIgnore()
                                // 关闭flush检测
                                .disableFlushOnCheckpoint()
                                // 为每个批量请求设置要缓冲的最大操作数
                                .bulkFlushMaxActions(20)
                                // 每个批量请求的缓冲最大值，目前仅支持 MB
                                .bulkFlushMaxSize("20 mb")
                                // 每个批量请求间隔时间
                                .bulkFlushInterval(60000L)
                                // 设置刷新批量请求时要使用的常量回退类型
                                .bulkFlushBackoffConstant()
                                // 设置刷新批量请求时每次回退尝试之间的延迟量（毫秒）
                                .bulkFlushBackoffDelay(30000L)
                                // 设置刷新批量请求时回退尝试的最大重试次数。
                                .bulkFlushBackoffMaxRetries(3)
                                // 设置刷新大容量请求时要使用的指数回退类型。
                                //.bulkFlushBackoffExponential()
                                // 设置同一请求多次重试时的最大超时（毫秒）
                                //.connectionMaxRetryTimeout(3)
                                // 向每个REST通信添加路径前缀
                                //.connectionPathPrefix("/v1")
                )
                .withSchema(schema)
                .withFormat(new Json().deriveSchema())
                .inUpsertMode()
                .registerTableSink("Result");


        tableEnv.insertInto(table, "Result", new StreamQueryConfig());

        env.execute("SqlSinkElasticSearchStream");
    }
}

这种方式api使用起来还是非常的爽的，我也极力推荐。

二、Sink到ElasticSearch的方式

public class SqlSinkElasticSearchStream2 {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

        TableSchema tableSchema = new TableSchema.Builder()
                .field("userId", Types.STRING)
                .field("name", Types.STRING)
                .field("age", Types.STRING)
                .field("sex", Types.STRING)
                .field("createTime", Types.BIG_DEC)
                .field("updateTime", Types.BIG_DEC)
                .build();

        Schema schema = new Schema()
                .field("userId", Types.STRING)
                .field("name", Types.STRING)
                .field("age", Types.STRING)
                .field("sex", Types.STRING)
                .field("createTime", Types.BIG_DEC)
                .field("updateTime", Types.BIG_DEC);


        tableEnv
                .connect(
                        new Kafka()
                                .topic("user")
                                .property("bootstrap.servers", "localhost:9092")
                                .property("group.id", "test")
                                .version("0.10")
                )
                .withSchema(new Schema().schema(tableSchema))
                .withFormat(new Json().deriveSchema())
                .inAppendMode()
                .registerTableSource("Users");


        Table table = tableEnv.sqlQuery("select userId,name,age,sex,createTime,updateTime from Users");

        // print
        tableEnv.toAppendStream(table, TypeInformation.of(Row.class)).print("append");

        // 参数参考 SqlSinkElasticSearchStream
        Map<ElasticsearchUpsertTableSinkBase.SinkOption, String> map = new HashMap<>();

        map.put(ElasticsearchUpsertTableSinkBase.SinkOption.DISABLE_FLUSH_ON_CHECKPOINT, "false");
        map.put(ElasticsearchUpsertTableSinkBase.SinkOption.BULK_FLUSH_MAX_ACTIONS, "20");
        map.put(ElasticsearchUpsertTableSinkBase.SinkOption.BULK_FLUSH_MAX_SIZE, "20 mb");
        map.put(ElasticsearchUpsertTableSinkBase.SinkOption.BULK_FLUSH_INTERVAL, "60000");
        map.put(ElasticsearchUpsertTableSinkBase.SinkOption.BULK_FLUSH_BACKOFF_DELAY, "30000");
        map.put(ElasticsearchUpsertTableSinkBase.SinkOption.BULK_FLUSH_BACKOFF_RETRIES, "3");

        ArrayList<ElasticsearchUpsertTableSinkBase.Host> hosts = new ArrayList<>();
        ElasticsearchUpsertTableSinkBase.Host host = new ElasticsearchUpsertTableSinkBase.Host("localhost", 9200, "http");
        hosts.add(host);

        TypeInformation<?>[] types = new TypeInformation[]{Types.STRING, Types.STRING, Types.STRING, Types.STRING, Types.BIG_DEC, Types.BIG_DEC};
        String[] names = new String[]{"userId", "name", "age", "sex", "createTime", "updateTime"};
        RowTypeInfo typeInfo = new RowTypeInfo(types, names);

        SerializationSchema<Row> schemaRow = new JsonRowSerializationSchema(typeInfo);

        Elasticsearch6UpsertTableSink sink = new Elasticsearch6UpsertTableSink(
                true, tableSchema, hosts, "test", "test",
                "_", "null",
                schemaRow,
                XContentType.JSON, new IgnoringFailureHandler(), map);

        tableEnv.writeToSink(table, sink, new StreamQueryConfig());

        env.execute("SqlSinkElasticSearchStream");
    }
}