Flume1.8sink不兼容ES2.4.3
用flume从kafka消费消息,然后用ElasticsearchSink往ES写入数据。ES从1.7.1升级到5.4.1后,flume的代码不改启动后sink大量报错,所有数据都写入失败。JDK升到1.8,elasticsearch版本升到5.4.1,Log4j也加上,还是不行。
查了下flume最新版本1.7也不支持ES2.X(https://stackoverflow.com/questions/36614488/flume-1-6-compatibility-with-elasticsearch-2-3-1)。
GitHub上有个项目可以支持ES2.x(https://github.com/lucidfrontier45/ElasticsearchSink2)。
其实sink很简单,只是把kafka里拉出来的数据,简单解析后写入ES,为了解决兼容问题,考虑下面2个方案
1、放弃使用flume-ng-elasticsearch-sink包,使用自定义sink
2、修改flume-ng-elasticsearch-sink包源代码,更新源码包
本人采取了第一种方案:
在源码基础上修改部分代码: process方法和start方法
process方法
@Override
public Status process() throws EventDeliveryException {
logger.debug("processing...");
Status status = Status.READY;
Channel channel = getChannel();
Transaction txn = channel.getTransaction();
try {
txn.begin();
int count;
if(bulkRequestBuilder == null){
bulkRequestBuilder = esClient.prepareBulk();
}
for (count = 0; count < batchSize; ++count) {
Event event = channel.take();
if (event == null) {
break;
}
// 添加到Builder
//String realIndexType = BucketPath.escapeString(indexType, event.getHeaders());
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
indexName = indexName + "_" +sdf.format(new Date());
bulkRequestBuilder.add(esClient.prepareIndex(indexName,indexName).setSource(event.getBody()));
}
if (count <= 0) {
sinkCounter.incrementBatchEmptyCount();
counterGroup.incrementAndGet("channel.underflow");
status = Status.BACKOFF;
} else {
if (count < batchSize) {
sinkCounter.incrementBatchUnderflowCount();
status = Status.BACKOFF;
} else {
sinkCounter.incrementBatchCompleteCount();
}
sinkCounter.addToEventDrainAttemptCount(count);
//批量提交
BulkResponse bulkResponse = bulkRequestBuilder.execute().actionGet();
bulkRequestBuilder.request().requests().clear();
if (bulkResponse.hasFailures()){
System.out.println("failure");
}
}
txn.commit();
sinkCounter.addToEventDrainSuccessCount(count);
counterGroup.incrementAndGet("transaction.success");
} catch (Throwable ex) {
try {
txn.rollback();
counterGroup.incrementAndGet("transaction.rollback");
} catch (Exception ex2) {
logger.error(
"Exception in rollback. Rollback might not have been successful.",
ex2);
}
if (ex instanceof Error || ex instanceof RuntimeException) {
logger.error("Failed to commit transaction. Transaction rolled back.",
ex);
Throwables.propagate(ex);
} else {
logger.error("Failed to commit transaction. Transaction rolled back.",
ex);
throw new EventDeliveryException(
"Failed to commit transaction. Transaction rolled back.", ex);
}
} finally {
txn.close();
}
return status;
}
start方法
@Override
public void start() {
//########### 自定义sink client###############
// 设置集群名称
Settings settings = Settings.builder().put("cluster.name", clusterName).build();
esClient = TransportClient.builder().settings(settings).build();
// 创建client
for (String esIpTcpport : serverAddresses) {
String[] hostPort = esIpTcpport.trim().split(":");
try {
if (null == esClient) {
esClient = TransportClient.builder().settings(settings).build()
.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(hostPort[0]), Integer.parseInt(hostPort[1])));
} else {
esClient = esClient.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(hostPort[0]), Integer.parseInt(hostPort[1])));
}
} catch (UnknownHostException e) {
e.printStackTrace();
}
}
super.start();
}