The Java High Level REST Client https://www.elastic.co/guide/en/elasticsearch/client/java-rest/6.6/java-rest-high.html
前言
一台机器上面要起多个节点一定要通过参数的方式启动,节点就是一个JVM进程,生产上ES单独部署,提供非常快速的数据的写入或者查询,而且是SSD,不是和Hadoop共用的
Hadoop上面有datanode以及nodemanger,你的作业说不定就会出现卡住的现象,这时候再提供高性能的快速查询是不现实的
Java API
The Java High Level REST Client
https://www.elastic.co/guide/en/elasticsearch/client/java-rest/6.6/java-rest-high.html
这个链接一般会收藏,官网上写的比较乱,7.x low level 会被移除
使用版本ElasticSearch6.6.2,jdk8
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>6.6.2</version>
</dependency>
ES里面两个重要的端口:
- 9200:http协议,用于外部的通信,REST方式去访问
- 9300:tcp协议,集群之间是通过9300端口
The Java High Level走的是9200,之前Low Level走的是9300
建立一个测试类,使用单元测试来测
package com.ruozedata;
import org.apache.http.HttpHost;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.get.*;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.action.update.UpdateResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.sum.Sum;
import org.elasticsearch.search.aggregations.metrics.sum.SumAggregationBuilder;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.elasticsearch.search.sort.SortOrder;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
public class ESApp {
//访问的客户端,可以点开看下源码,走的是Rest client
RestHighLevelClient client;
@Before
//我们要单元测试,肯定要初始化这个客户端,这里使用setUp,当然pom文件中junit肯定要有,用4.12版本
public void setUp() {
client = new RestHighLevelClient(RestClient.builder(
//这里写多个host也是可以的
new HttpHost("hadoop001", 9200)
));
}
@Test
public void test01() {
//这里测试一下client是否创建成功
System.out.println(client);
}
@Test
//创建索引,以Json的方式,当然这种方式不太建议用
public void createIndexWithJson() throws Exception {
//构建IndexRequest
IndexRequest request = new IndexRequest(
"posts",
"doc",
"1");
String jsonString = "{" +
"\"user\":\"ruozedata\"," +
"\"postDate\":\"2013-01-30\"," +
"\"message\":\"trying out Elasticsearch\"" +
"}";
request.source(jsonString, XContentType.JSON);
//http协议一般是一个request过去 --> 一个response回来
IndexResponse indexResponse = client.index(request, RequestOptions.DEFAULT);
System.out.println(indexResponse.getId());
}
//上面Json格式,字符串拼接非常容易出错,不太建议用
@Test
public void createIndexWithMap() throws Exception {
Map<String, Object> jsonMap = new HashMap<>();
jsonMap.put("user", "大树");
jsonMap.put("postDate", new Date());
jsonMap.put("message", "trying out Elasticsearch");
IndexRequest request = new IndexRequest("posts", "doc", "2")
.source(jsonMap);
IndexResponse indexResponse = client.index(request, RequestOptions.DEFAULT);
System.out.println(indexResponse.getId());
}
@Test
//这种方式用的也比较多
public void createIndexWithXContentBuilder() throws Exception {
//从工厂去拿,拿到一个jsonBuilder
XContentBuilder builder = XContentFactory.jsonBuilder();
builder.startObject()
.field("user", "夜")
.field("postDate", new Date())
.field("message", "trying out Elasticsearch")
.endObject();
IndexRequest request = new IndexRequest("posts", "doc", "3")
.source(builder);
IndexResponse indexResponse = client.index(request, RequestOptions.DEFAULT);
System.out.println(indexResponse.getId());
}
@Test
//获取
public void getIndex() throws Exception {
//请求发出去
GetRequest getRequest = new GetRequest(
"posts",
"doc",
"1");
//获得Response
GetResponse getResponse = client.get(getRequest, RequestOptions.DEFAULT);
String sourceAsString = getResponse.getSourceAsString();
System.out.println(sourceAsString);
}
@Test
//判断存不存在
public void gexistsIndex() throws Exception {
GetRequest getRequest = new GetRequest(
"posts",
"doc",
"1");
//fetchSourceContext只关注存不存在,而不去取数据
//jpa框架中有个lazy load 懶加載,真正用的时候才会发SQL语句去查
getRequest.fetchSourceContext(new FetchSourceContext(false));
getRequest.storedFields("_none_");
boolean exists = client.exists(getRequest, RequestOptions.DEFAULT);
System.out.println(exists);
}
@Test
//修改更新
public void updateIndex() throws Exception {
UpdateRequest request = new UpdateRequest(
"posts",
"doc",
"1");
//ES中每次操作都是版本加1,我们可以获取更新后的版本号
//不用关心是全量还是增量覆盖,类似于upset,有就更新,没有就插入
String jsonString = "{" +
"\"updated\":\"2017-01-01\"," +
"\"reason\":\"daily update\"" +
"}";
request.doc(jsonString, XContentType.JSON);
UpdateResponse updateResponse = client.update(
request, RequestOptions.DEFAULT);
System.out.println(updateResponse.getVersion());
System.out.println(updateResponse.getResult());
}
@Test
public void deleteIndex() throws Exception {
DeleteRequest request = new DeleteRequest(
"posts",
"doc",
"1");
DeleteResponse deleteResponse = client.delete(
request, RequestOptions.DEFAULT);
System.out.println(deleteResponse.getVersion());
System.out.println(deleteResponse.getResult());
}
@Test
//上面都是一条一条的增删改查,每次都要经过Http请求比较麻烦,Bulk API多条操作
//bulk函数可以判断哪些条数成功、哪些条数失败,一次传输,三十多兆的数据肯定是没问题的
public void bulkIndex() throws Exception {
BulkRequest request = new BulkRequest();
request.add(new IndexRequest("posts", "doc", "11")
.source(XContentType.JSON, "field", "foo"));
request.add(new IndexRequest("posts", "doc", "12")
.source(XContentType.JSON, "field", "bar"));
request.add(new IndexRequest("posts", "doc", "13")
.source(XContentType.JSON, "field", "baz"));
BulkResponse bulkResponse = client.bulk(request, RequestOptions.DEFAULT);
if (bulkResponse.hasFailures()) {
//遍历一下,判断里面是否有失败的,哪些是成功的,哪些是失败的
for (BulkItemResponse bulkItemResponse : bulkResponse) {
if (bulkItemResponse.isFailed()) {
BulkItemResponse.Failure failure = bulkItemResponse.getFailure();
System.out.println(failure.getMessage());
}
}
}
}
@Test
public void multiGetIndex() throws Exception {
MultiGetRequest request = new MultiGetRequest();
request.add(new MultiGetRequest.Item("posts", "doc", "11"));
request.add(new MultiGetRequest.Item("posts", "doc", "12"));
request.add(new MultiGetRequest.Item("posts", "doc", "100"));
MultiGetResponse response = client.mget(request, RequestOptions.DEFAULT);
//而且多条写入的也存在ID没有的情况,上面的测试数据就是两个存在一个不存在,这里编程的时候记得关联源码
MultiGetItemResponse[] items = response.getResponses();
for (MultiGetItemResponse item : items) {
GetResponse getResponse = item.getResponse();
//严谨一些可以判断一下存不存在,null的就直接不要出来,没有意义
if (getResponse.isExists()) {
System.out.println(getResponse.getSourceAsString());
}
}
}
@Test
//查询
public void search() throws Exception {
SearchRequest searchRequest = new SearchRequest();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//这里做的是查所有
// searchSourceBuilder.query(QueryBuilders.matchAllQuery())
//当然这里也可以根据某个字段做升序还是降序
// .sort("age", SortOrder.DESC)
//查询名字为tom的,精准查
// searchSourceBuilder.query(QueryBuilders.matchQuery("name","tom"));
//这里也是精准查
// searchSourceBuilder.query(QueryBuilders.termQuery("name","tom"));
//这样可以查很多,类似于like查询,而且这是在一个字段里查的
// searchSourceBuilder.query(QueryBuilders.queryStringQuery("name:tom*"));
//当然也可以在多个字段里面查,精准查询
searchSourceBuilder.query(QueryBuilders.multiMatchQuery("tom", "name", "age"));
//做范围查询
// .postFilter(QueryBuilders.rangeQuery("age").from(30).to(31));
//做分页查询,每一页显示5条,这里需要确认是从第一个开始显示5条还是从第一页开始显示5条
//.from(0).size(5)
SearchRequest source = searchRequest.source(searchSourceBuilder);
//指定索引、types
searchRequest.indices("posts");
searchRequest.types("doc");
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHits hits = searchResponse.getHits();
System.out.println(hits.totalHits);
SearchHit[] searchHits = hits.getHits();
for (SearchHit hit : searchHits) {
System.out.println(hit.getSourceAsString());
}
}
@Test
public void testAggr() throws Exception {
SearchRequest searchRequest = new SearchRequest();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//按照name字段取了一个名字叫by_name,select name,count(1) from xx group by name
//name后面要加上一个keyword,不然会报文本格式的disable
TermsAggregationBuilder aggregationBuilder = AggregationBuilders.terms("by_name").field("name.keyword");
searchSourceBuilder.query(QueryBuilders.matchAllQuery()).aggregation(aggregationBuilder);
SearchRequest source = searchRequest.source(searchSourceBuilder);
//指定索引
searchRequest.indices("rz");
SearchResponse response = client.search(searchRequest, RequestOptions.DEFAULT);
//取分组
Terms terms = response.getAggregations().get("by_name");
//在scala中叫泛型,在java中叫Upper Bounds Lower Bounds,也就是里面可以装Bucket的子类
for (Terms.Bucket bucket : terms.getBuckets()) {
System.out.println(bucket.getKey() + " : " + bucket.getDocCount());
}
}
//算每个人的总成绩
@Test
public void testAggr02() throws Exception {
SearchRequest searchRequest = new SearchRequest();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//按照name字段取了一个名字叫by_name,select name,count(1) from xx group by name
//name后面要加上一个keyword,不然会报文本格式的disable
TermsAggregationBuilder nameAgg = AggregationBuilders.terms("by_name").field("name.keyword");
//按照成绩聚合
SumAggregationBuilder scoreAgg = AggregationBuilders.sum("by_score").field("score");
nameAgg.subAggregation(scoreAgg);
searchSourceBuilder.query(QueryBuilders.matchAllQuery()).aggregation(nameAgg);
SearchRequest source = searchRequest.source(searchSourceBuilder);
//指定索引
searchRequest.indices("rz");
SearchResponse response = client.search(searchRequest, RequestOptions.DEFAULT);
//取分组
Terms terms = response.getAggregations().get("by_name");
//在scala中叫泛型,在java中叫Upper Bounds Lower Bounds,也就是里面可以装Bucket的子类
for (Terms.Bucket bucket : terms.getBuckets()) {
//TODO......
Sum sum = bucket.getAggregations().get("by_score");
System.out.println(bucket.getKey() + " : " + sum.getValue());
}
}
@After
//有setUp,建议定义一个tearDown用来销毁,名称可以随意自定义
//这里做个判断,不然为空的话,调close去关会报null point 异常
public void tearDown() throws IOException {
if (client != null) {
client.close();
}
}
}
开发小技巧:.var可以快速出返回值,avg返回值是sum类型