一:创建ES连接
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import java.io.IOException;
import java.net.InetAddress;
public class ElasticSearchUtil {
private static TransportClient client;
static {
try {
/*此处可穿件配置文件自行配置,代码中为方便直接写死
InputStream resourceAsStream = ElasticSearchUtil.class.getClassLoader().getResourceAsStream("elasticsearch.conf");*/
String host = "10.0.221.73";
int port = Integer.parseInt("9300");
String clusterName = "my-application";
Settings settings = Settings.builder()
.put("cluster.name", clusterName)
//.put("client.transport.sniff", true)
.build();
client = new PreBuiltTransportClient(settings);
//这里可以有多个,集群模式
TransportAddress ta = new TransportAddress(
InetAddress.getByName(host), port);
//addTransportAddresses(TransportAddress... transportAddress),参数为一个可变参数
client.addTransportAddresses(ta);
} catch (IOException e) {
e.printStackTrace();
}
}
public static TransportClient getTransportClient() {
return client;
}
public static void close(TransportClient client) {
if (client != null) {
client.close();
}
}
}
二: 创建文件输出流
import java.io.FileOutputStream;
import java.io.IOException;
public class JsonFormatTool {
/**
* 将字符串追加到文件已有内容后面
*
* @param fileFullPath 文件完整地址:/es.json
* @param
content 需要写入的
*/
public static void writeFile(String fileFullPath,String content) {
FileOutputStream fos = null;
try {
//true不覆盖已有内容
fos = new FileOutputStream(fileFullPath, true);
//写入
fos.write(content.getBytes());
// 写入一个换行
fos.write("\r\n".getBytes());
} catch (IOException e) {
e.printStackTrace();
}finally{
if(fos != null){
try {
fos.flush();
fos.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
三:读取ES数据并调用输出流输出json文件
import org.elasticsearch.action.search.*;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.sort.SortOrder;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
public class ScrollAll {
public static void main(String[] args) {
ScroolSearchID();
}
public static String ScroolSearchID() {
//动态获取日期作为生成json文件的后缀
Date date = new Date();
DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
String suffix = dateFormat.format(date);
//获取ES连接
Client client = ElasticSearchUtil.getTransportClient();
//指定一个index和type
SearchRequestBuilder search = client.prepareSearch("binloghash").setTypes("binloghash");
//使用原生排序优化性能
search.addSort("_doc", SortOrder.ASC);
//设置每批读取的数据量,此处为测试环境为了测试批次设置值较小,生产可根据实际情况调参
search.setSize(10);
//默认是查询所有
search.setQuery(QueryBuilders.queryStringQuery("*:*"));
//设置 search context 维护1分钟的有效期
search.setScroll(TimeValue.timeValueMinutes(1));
//获得首次的查询结果
SearchResponse scrollResp = search.get();
//打印命中数量
System.out.println("命中总数量:" + scrollResp.getHits().getTotalHits());
//打印计数
int count = 1;
do {
System.out.println("第" + count + "次打印数据:");
//读取结果集数据
for (SearchHit hit : scrollResp.getHits().getHits()) {
//System.out.println(hit.getSourceAsString());
//获取ES数据
String esData = hit.getSourceAsString();
//按日期动态生成文件
String fileFullPath = "/tmp/es"+suffix+".json";
//调用输出流输出文件
JsonFormatTool.writeFile(fileFullPath,esData);
}
count++;
//将scorllId循环传递
scrollResp = client.prepareSearchScroll(scrollResp.getScrollId()).setScroll(TimeValue.timeValueMinutes(1)).execute().actionGet();
//当searchHits的数组为空的时候结束循环,至此数据全部读取完毕
} while (scrollResp.getHits().getHits().length != 0);
return null;
}
}
四:maven依赖
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.my.es</groupId>
<artifactId>EsToFile</artifactId>
<packaging>jar</packaging>
<version>1.0-SNAPSHOT</version>
<name>EsToFile Maven Webapp</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency><!-- 依赖管理,有选择的继承-->
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>6.4.2</version>
</dependency>
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20180813</version>
</dependency>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-hadoop</artifactId>
<version>6.4.2</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<useUniqueVersions>false</useUniqueVersions>
<classpathPrefix>lib/</classpathPrefix>
<mainClass>cn.mymaven.test.TestMain</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>
总结:需要注意的点1. es7以后 TransportClient被废弃,此代码不适用7版本以后,es7的java代码,只能使用restclient,建议采用 High-level-rest-client 的方式操作ES集群。
2. 以上java连接ES,注意在ES集群中的elasticsearch.yml添加开放9300端口配置