Flink Table 将Stream追加写入Elasticsearch
Flink Table提供了多种方式,写入ElasticSearch。一种是Connector的方式连接ElasticSearch,另一种是通过Sink的方式。下面分别介绍2种方式。
一、Connector的方式写入Elasticsearch
public class SqlSinkElasticSearchStream {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
Schema schema = new Schema()
.field("userId", Types.STRING)
.field("name", Types.STRING)
.field("age", Types.STRING)
.field("sex", Types.STRING)
.field("createTime", Types.BIG_DEC)
.field("updateTime", Types.BIG_DEC);
tableEnv
.connect(
new Kafka()
.topic("user")
.property("bootstrap.servers", "localhost:9092")
.property("group.id", "test")
.version("0.10")
)
.withSchema(schema)
.withFormat(new Json().deriveSchema())
.inAppendMode()
.registerTableSource("Users");
Table table = tableEnv.sqlQuery("select userId,name,age,sex,createTime,updateTime from Users");
// print
tableEnv.toAppendStream(table, TypeInformation.of(Row.class)).print();
tableEnv
.connect(
new Elasticsearch()
.version("6")
.host("localhost", 9200, "http")
.index("test")
.documentType("test")
// key之间的分隔符,默认"_"
.keyDelimiter("_")
// 如果key为null,则用"null"字符串替代
.keyNullLiteral("null")
// 失败处理策略,Fail(报错,job失败),Ignore(失败),RetryRejected(重试),Custom(自己定义)
.failureHandlerIgnore()
// 关闭flush检测
.disableFlushOnCheckpoint()
// 为每个批量请求设置要缓冲的最大操作数
.bulkFlushMaxActions(20)
// 每个批量请求的缓冲最大值,目前仅支持 MB
.bulkFlushMaxSize("20 mb")
// 每个批量请求间隔时间
.bulkFlushInterval(60000L)
// 设置刷新批量请求时要使用的常量回退类型
.bulkFlushBackoffConstant()
// 设置刷新批量请求时每次回退尝试之间的延迟量(毫秒)
.bulkFlushBackoffDelay(30000L)
// 设置刷新批量请求时回退尝试的最大重试次数。
.bulkFlushBackoffMaxRetries(3)
// 设置刷新大容量请求时要使用的指数回退类型。
//.bulkFlushBackoffExponential()
// 设置同一请求多次重试时的最大超时(毫秒)
//.connectionMaxRetryTimeout(3)
// 向每个REST通信添加路径前缀
//.connectionPathPrefix("/v1")
)
.withSchema(schema)
.withFormat(new Json().deriveSchema())
.inUpsertMode()
.registerTableSink("Result");
tableEnv.insertInto(table, "Result", new StreamQueryConfig());
env.execute("SqlSinkElasticSearchStream");
}
}
这种方式api使用起来还是非常的爽的,我也极力推荐。
二、Sink到ElasticSearch的方式
public class SqlSinkElasticSearchStream2 {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
TableSchema tableSchema = new TableSchema.Builder()
.field("userId", Types.STRING)
.field("name", Types.STRING)
.field("age", Types.STRING)
.field("sex", Types.STRING)
.field("createTime", Types.BIG_DEC)
.field("updateTime", Types.BIG_DEC)
.build();
Schema schema = new Schema()
.field("userId", Types.STRING)
.field("name", Types.STRING)
.field("age", Types.STRING)
.field("sex", Types.STRING)
.field("createTime", Types.BIG_DEC)
.field("updateTime", Types.BIG_DEC);
tableEnv
.connect(
new Kafka()
.topic("user")
.property("bootstrap.servers", "localhost:9092")
.property("group.id", "test")
.version("0.10")
)
.withSchema(new Schema().schema(tableSchema))
.withFormat(new Json().deriveSchema())
.inAppendMode()
.registerTableSource("Users");
Table table = tableEnv.sqlQuery("select userId,name,age,sex,createTime,updateTime from Users");
// print
tableEnv.toAppendStream(table, TypeInformation.of(Row.class)).print("append");
// 参数参考 SqlSinkElasticSearchStream
Map<ElasticsearchUpsertTableSinkBase.SinkOption, String> map = new HashMap<>();
map.put(ElasticsearchUpsertTableSinkBase.SinkOption.DISABLE_FLUSH_ON_CHECKPOINT, "false");
map.put(ElasticsearchUpsertTableSinkBase.SinkOption.BULK_FLUSH_MAX_ACTIONS, "20");
map.put(ElasticsearchUpsertTableSinkBase.SinkOption.BULK_FLUSH_MAX_SIZE, "20 mb");
map.put(ElasticsearchUpsertTableSinkBase.SinkOption.BULK_FLUSH_INTERVAL, "60000");
map.put(ElasticsearchUpsertTableSinkBase.SinkOption.BULK_FLUSH_BACKOFF_DELAY, "30000");
map.put(ElasticsearchUpsertTableSinkBase.SinkOption.BULK_FLUSH_BACKOFF_RETRIES, "3");
ArrayList<ElasticsearchUpsertTableSinkBase.Host> hosts = new ArrayList<>();
ElasticsearchUpsertTableSinkBase.Host host = new ElasticsearchUpsertTableSinkBase.Host("localhost", 9200, "http");
hosts.add(host);
TypeInformation<?>[] types = new TypeInformation[]{Types.STRING, Types.STRING, Types.STRING, Types.STRING, Types.BIG_DEC, Types.BIG_DEC};
String[] names = new String[]{"userId", "name", "age", "sex", "createTime", "updateTime"};
RowTypeInfo typeInfo = new RowTypeInfo(types, names);
SerializationSchema<Row> schemaRow = new JsonRowSerializationSchema(typeInfo);
Elasticsearch6UpsertTableSink sink = new Elasticsearch6UpsertTableSink(
true, tableSchema, hosts, "test", "test",
"_", "null",
schemaRow,
XContentType.JSON, new IgnoringFailureHandler(), map);
tableEnv.writeToSink(table, sink, new StreamQueryConfig());
env.execute("SqlSinkElasticSearchStream");
}
}
如果需要Append追加,可以调用sink.setIsAppendOnly(true)
。