DataX开发基于读取ElasticSearch数据的Reader插件
1、检出DataX源码(git clone https://github.com/alibaba/DataX.git DataX),导入项目,新建一个esreader的maven项目进行插件开发。
2、在DataX安装目录的plugins/reader目录下新建esreader目录,目录下包含plugin_job_template.json、plugin.json、esreader-0.0.1-SNAPSHOT.jar,同时在目录下创建一个libs目录,存放相关依赖的jar文件。
相关代码:
package com.alibaba.datax.plugin.reader.esreader;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.List;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.element.StringColumn;
import com.alibaba.datax.common.plugin.RecordSender;
import com.alibaba.datax.common.spi.Reader;
import com.alibaba.datax.common.util.Configuration;
import com.google.gson.Gson;
public class ESReader extends Reader {
public static class Job extends Reader.Job {
private Configuration originalConfiguration = null;
@Override
public void preCheck() {
super.preCheck();
}
@Override
public void preHandler(Configuration jobConfiguration) {
super.preHandler(jobConfiguration);
}
@Override
public void init() {
this.originalConfiguration = super.getPluginJobConf();
}
@Override
public void prepare() {
super.prepare();
}
@Override
public void post() {
super.post();
}
@Override
public void postHandler(Configuration jobConfiguration) {
super.postHandler(jobConfiguration);
}
@Override
public void destroy() {
}
@Override
public List<Configuration> split(int adviceNumber) {
List<Configuration> readerSplitConfigurations = new ArrayList<Configuration>();
for (int i = 0; i < adviceNumber; i++) {
Configuratio