import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.elasticsearch.action.bulk.BackoffPolicy;
import org.elasticsearch.action.bulk.BulkProcessor;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue;
import org.springframework.beans.factory.annotation.Autowired;
public class BulkProcessor2EsDemo {
public final static int BULK_COUNT = 20000;
private static TransportClient client=ESConfig.getClient();
//etl551_0705
public static String table = "world";//表名作为index
public static long importData(String sql) throws InterruptedException {
String url = "jdbc:mysql://127.0.0.1:3306/" + table + "?user=root&password=1234&autoReconnect=true&useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&useSSL=false";
try {
Class.forName("com.mysql.jdbc.Driver");
} catch (ClassNotFoundException e1) {
e1.printStackTrace();
}
boolean isLastEmpty = true;
long count = 0;
int blukNum = 0;
List<String> columnName = Arrays.asList("ID", "Name", "CountryCode", "District", "Population");
Connection con = null;
PreparedStatement ps = null;
Statement st = null;
ResultSet rs = null;
try {
con = DriverManager.getConnection(url);
ps = (PreparedStatement) con.prepareStatement(sql,
ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
ps.setFetchSize(Integer.MIN_VALUE);
ps.setFetchDirection(ResultSet.FETCH_REVERSE);
rs = ps.executeQuery();
ResultSetMetaData rsmd = rs.getMetaData();
int colCount = rsmd.getColumnCount();
String index = table;
String type = "historyLog";
long allStart = System.currentTimeMillis();//记录批量执行的开始时间
BulkProcessor bulkProcessor = createBulkProcessor();
while (rs.next()) {// while控制行数
blukNum++;
Map<String, String> map = new LinkedHashMap<>();
for (int i = 1; i <= colCount; i++) {
String name = rsmd.getColumnName(i);
if (columnName.contains(name)) {
String value = rs.getString(i);
if (value != null && !"".equals(value.trim())
&& value.trim().length() > 0) {
map.put(name, value);
}
}
}
bulkProcessor.add(new IndexRequest(index, type, blukNum + "").source(map));
}//end while
// 关闭
bulkProcessor.awaitClose(10, TimeUnit.MINUTES);
System.out.println("取回数据量为 " + blukNum + " 行!");
long end = System.currentTimeMillis();//记录批量入库结束的开始时间
System.out.println("共耗时 " + (end - allStart) / 1000 + " s!");
} catch (SQLException e) {
e.printStackTrace();
} finally {
try {
if (rs != null) {
rs.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
try {
if (ps != null) {
ps.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
try {
if (con != null) {
con.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
}
if(client != null){
client.close();
}
return count;
}
public static BulkProcessor createBulkProcessor() {
// 初始化Bulk处理器
BulkProcessor bulkProcessor = BulkProcessor.builder(
client, new BulkProcessor.Listener() {
long begin = 0;
long cost;
int count = 0;
@Override
public void afterBulk(long executionId, BulkRequest bulkRequest, BulkResponse bulkResponse) {
cost = (System.currentTimeMillis() - begin) / 1000;
count += bulkRequest.numberOfActions();
System.out.println("bulk success. size:[{" + count + "}] cost:[{" + cost + "}s]");
}
@Override
public void afterBulk(long executionId, BulkRequest bulkRequest, Throwable throwable) {
System.out.println("bulk update has failures, will retry:" + throwable);
}
@Override
public void beforeBulk(long executionId, BulkRequest bulkRequest) {
begin = System.currentTimeMillis();
}
})
.setBulkActions(20000)// 批量导入个数
.setBulkSize(new ByteSizeValue(200, ByteSizeUnit.MB))// 满xMB进行导入
.setConcurrentRequests(10)// 并发数
.setFlushInterval(TimeValue.timeValueSeconds(50))// 冲刷间隔
.setBackoffPolicy(BackoffPolicy.constantBackoff(TimeValue.timeValueSeconds(1), 3)) // 重试3次,间隔1s
.build();
return bulkProcessor;
}
public static void main(String[] args) throws InterruptedException {
String sql = "select * from city ";
importData(sql);
}
}
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.net.InetAddress;
import java.net.UnknownHostException;
@Configuration
public class ESConfig {
private static String clusterName="my-application";
private static String host="****";
private static Integer port=9300;
/** 构建Settings 对象 */
private static Settings settings = Settings.builder().put("cluster.name", clusterName).build();
/** TransportClient 对象, 用于连接ES集群 */
private static volatile TransportClient client;
/**
* 同步synchronized(*.class)代码块的作用和synchronized static方法作用一样,
* 对当前对应的*.class 进行持锁, static方法和.class 一样都是锁的该类本身,同一个监听器
* @return
*/
@Bean
public static TransportClient getClient(){
if(client == null){
synchronized (TransportClient.class){
client = new PreBuiltTransportClient(settings);
try {
String[] allHost = host.split(",");
for (String str:allHost) {
client.addTransportAddresses(new TransportAddress(InetAddress.getByName(str), port));
}
} catch (UnknownHostException e) {
e.printStackTrace();
}
}
}
return client;
}
}