软件版本
ElasticSearch6.3
Flink 1.6.2
maven依赖
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-elasticsearch6_2.11</artifactId>
<version>1.6.1</version>
</dependency>
读kafka消息源
StreamExecutionEnvironment envStream = StreamExecutionEnvironment.createLocalEnvironment();
envStream.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);
Properties propsConsumer = new Properties();
propsConsumer.setProperty("bootstrap.servers", KafkaConfig.KAFKA_BROKER_LIST);
propsConsumer.setProperty("group.id", "flink-test");
FlinkKafkaConsumer011<String> consumer = new FlinkKafkaConsumer011<String>("topic_test", new SimpleStringSchema(), propsConsumer);
consumer.setStartFromLatest();
DataStream<String> stream = envStream.addSource(consumer).setParallelism(2);
stream.print();
这里是local模式.集群修改为StreamExecutionEnvironment.getExecutionEnvironment().
数据处理
DataStream<Tuple2<String,Integer>> filterStream= stream.map(new MapFunction<String,Tuple2<String, Integer> >() {
@Override
public Tuple2<String, Integer> map(String value) throws Exception {
try {
WkNotification transferLineBO = JSON.parseObject(value, WkNotification.class);
String type = transferLineBO.getType();
if(type.equals("jlh_crawling_result")){
Map<String, String> map = transferLineBO.getContent();
Integer status = Integer.parseInt(map.get("result"));
String id = map.get("id");
return Tuple2.<String,Integer>of(id,status);
}
}catch (Exception e){
}
return Tuple2.of(null,null);
}
}).filter(s->s.f0!=null);
filterStream.print();
解析json文件,抽出es要修改的_id的标识符,和要修改的字段status.
且对异常和不匹配的数据过滤.
ES update操作
//初始化es client链接
HttpHost httpHost = new HttpHost("xxx.xxx", 9200, "http");
List<HttpHost> httpHosts = new ArrayList<>();
httpHosts.add(httpHost);
ElasticsearchSink.Builder<Tuple2<String,Integer>> esSinkBuilder = new ElasticsearchSink.Builder<Tuple2<String,Integer>>(httpHosts,
new ElasticsearchSinkFunction<Tuple2<String,Integer>>() {
public UpdateRequest updateIndexRequest(Tuple2<String,Integer> element) throws IOException {
String id=element.f0;
Integer status=element.f1;
UpdateRequest updateRequest=new UpdateRequest();
//设置表的index和type,必须设置id才能update
updateRequest.index("trafficwisdom.test_index").type("route").id(id).doc(XContentFactory.jsonBuilder().startObject().field("status",status).endObject());
return updateRequest;
}
@Override
public void process(Tuple2<String,Integer> element, RuntimeContext runtimeContext, RequestIndexer requestIndexer) {
try {
requestIndexer.add(updateIndexRequest(element));
}catch (Exception e){
e.printStackTrace();
}
}
}
);
//必须设置flush参数
esSinkBuilder.setBulkFlushMaxActions(1);
filterStream.addSink(esSinkBuilder.build());
envStream.execute("this-test");