一、elasticsearch客户端交互方式
http方式,通过访问9200端口交互,还可以通过发送curl命令交互
java方式,通过访问9300端口交互
Node Client(节点客户端)
Transport Client(传输客户端)
二、elasticsearch 6.3.2 java api 之Transport Client的使用
pom.xml
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>6.3.2</version>
</dependency>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>6.3.2</version>
</dependency>
获取Transport client
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
try {
// 设置集群名称
Settings settings = Settings.builder().put("cluster.name", CLUSTER_NAME).build();
// 创建client
Transport client = new PreBuiltTransportClient(settings).addTransportAddresses(
new TransportAddress(InetAddress.getByName(HOST), Integer.parseInt(REQUEST_PORT)));
} catch (NumberFormatException | UnknownHostException e) {
e.printStackTrace();
}
创建索引---这里整合了为索引起别名,以及通过外部文件内的json设置mapping的代码,如果不需要可去掉
/**
* 创建索引
*
* @param client
* @param shard 分片数
* @param replic 副本数
* @param index 索引名称
* @param type 索引类型
* @param filePath 存放mapping的json配置的文件地址
* @param alias 别名
* @return
*/
public static Boolean createIndex(TransportClient client, String shard, String replic, String index, String type,
String filePath, String alias) {
Boolean success = true;
try {
Boolean isExist = indexExist(client, index, type);
// 如果分索引不存在
if (!isExist) {
CreateIndexRequest request = new CreateIndexRequest(index);
request.settings(Settings.builder().put("index.number_of_shards", Integer.parseInt(shard))
.put("index.number_of_replicas", Integer.parseInt(replic)).put("index.refresh_interval", "5s"));
// .put("index.refresh_interval","30s")
// 创建索引
CreateIndexResponse response = client.admin().indices().create(request).actionGet();
if (response.isAcknowledged()) {
System.out.println(
"===Success in creating " + index + ",shard is " + shard + " and replic is " + replic);
} else {
success = false;
System.out.println(
"===Faild in creating " + index + ",shard is " + shard + " and replic is " + replic);
}
// 为分索引绑定别名
if (!MiscUtils.isNullOrEmpty(alias)) {
IndicesAliasesRequestBuilder indicesAliasesRequestBuilder = client.admin().indices()
.prepareAliases().addAlias(index, alias);
IndicesAliasesResponse indicesAliasesResponse = indicesAliasesRequestBuilder.execute().actionGet();
if (indicesAliasesResponse.isAcknowledged()) {
System.out.println("===Success in alias " + index + " to " + alias);
} else {
success = false;
System.out.println("===Faild in alias " + index + " to " + alias);
}
}
// 读取json文件创建type并设置mapping
File article = new File(filePath);
FileReader fr = new FileReader(article);
BufferedReader bfr = new BufferedReader(fr);
String mapping_json = null;
if ((mapping_json = bfr.readLine()) != null) {
PutMappingResponse putMappingResponse = client.admin().indices().preparePutMapping(index)
.setType(type).setSource(JSON.parseObject(mapping_json)).execute().actionGet();
if (putMappingResponse.isAcknowledged()) {
System.out.println("===Success in setting mapping for " + type);
} else {
success = false;
System.out.println("===Faild in setting mapping for " + type);
}
}
bfr.close();
}
} catch (Exception e) {
success = false;
e.printStackTrace();
}
return success;
}
检测索引是否存在
/**
* 用于检测索引是否存在
*
* @param client
* @param index 索引名称
* @param type 索引类型
* @return
*/
public static boolean indexExist(TransportClient client, String index, String type) {
IndicesExistsRequest request = new IndicesExistsRequest(index);
IndicesExistsResponse response = client.admin().indices().exists(request).actionGet();
if (response.isExists()) {
return true;
} else {
return false;
}
}
插入数据---bulk,这里只放入核心代码
//获取client连接
TransportClient client = ESJDBCHelper.getInstance().getClient();
//
BulkRequestBuilder bulk = client.prepareBulk();
// 使用map存放要写入文档的数据
Map<String, Object> resource = new HashMap<String, Object>();
//将数据以key-value存入map
...
//使用upser方式(有则修改,无则新增方式)将数据写入文档
//使用upser文档id一定要确定
String docId = new MD5(docId).compute().substring(0,16);
IndexRequest parentFileIndexRequest = new IndexRequest(index,type, docId).routing(_routting).source(resource);
UpdateRequest parentFileUpdateRequest = new UpdateRequest(index,type, docId).routing(_routting) .doc(resource).upsert(parentFileIndexRequest);
if (num == batchSize) {
BulkResponse bulkResponse = bulk.execute().actionGet();
if (bulkResponse.hasFailures()) {
System.out.println(bulkResponse.buildFailureMessage());
}
num = 0;
bulk = null;
bulk = client.prepareBulk();
} else {
num++;
bulk.add(parentFileUpdateRequest);
}
获取索引总记录数
TransportClient client = ESJDBCHelper.getInstance().getClient();
long total = client.prepareSearch(index).setTypes(type).setSize(0).execute().actionGet().getHits().getTotalHits();
es的分页查询from+size和scroll
from+size:首先,该种分页方式是基于分片的,from相当于offset,size相当于size。如果from+size为19+10,分片为5,则每个分片会根据排序规则取出200条记录汇总再整体排序,即总共取出1000条数据,截取最后的10条返回。页面越大或者分页越深,需要排序的量也就越大,效率就会越低;其次,es保证分页不占用大量的堆内存,避免OOM,参数index.max_result_window默认设置为10000,如果不修改这个值,查询超过10000条数据时会报错,当然你可以对索引的该值动态修改,但是不建议这么做,因为即使不会OOM,对cpu和内存消耗很大;
scroll:可以一次性将所有满足查询条件的数据,都放到内存中。分页的时候,在内存中查询。相对浅分页,就可以避免多次读取磁盘。首先,将所有符合搜索条件的搜索结果缓存起来,可以想象成快照;其次,在遍历时候,拿到上一次遍历中的_scroll_id,然后带scroll参数,重复上一次的遍历步骤,直到返回的数据为空,表示遍历完成。每次都要传参数scroll,刷新搜索结果的缓存时间,另外不需要指定index和type(不要把缓存的时时间设置太长,占用内存)。
其中scroll不适合有跳页的情况,但是现实一般都是一页一页的翻转,所以没关系。其中滚动上下文是昂贵的,所以并不建议将其用于实时用户请求。建议使用scroll来获取es中的全量数据;
Transport client= EsJDBCHelper.getInstance().getClient();
SearchResponse response = client.prepareSearch(index).setTypes(type).setQuery(bqb)
.setExplain(false).setScroll(TimeValue.MINUS_ONE).setSize(10000).execute().actionGet();
while (true) {
String scrollId = response.getScrollId();
response = client.prepareSearchScroll(scrollId).setScroll(TimeValue.MINUS_ONE).execute().actionGet();
if (response.getHits().getHits().length == 0) {
break;
}
}
es查询结果处理
/**
* 将查询后获得的response转成list
*
* @param client
* @param response
* @return
*/
public static List<Map<String, Object>> responseToList(TransportClient client, SearchResponse response) {
SearchHits hits = response.getHits();
List<Map<String, Object>> list = new LinkedList<Map<String, Object>>();
for (int i = 0; i < hits.getHits().length; i++) {
String id = hits.getAt(i).getId();
Map<String, Object> map = hits.getAt(i).getSourceAsMap();
map.put("id", id);
list.add(map);
}
return list;
}
es复合查询
must | 返回的文档必须满足must子句的条件,并且参与计算分值 |
filter | 【filter以前时单独的query DSL,现在归入bool query】;子句(查询)必须出现在匹配的文档中。然而,不同于must查询的是——它不参与分数计算。 Filter子句在过滤器上下文(filter context)中执行,这意味着score被忽略并且子句可缓存【所以filter可能更快】 |
should | “权限”比must/filter低。如果没有must或者filter,有一个或者多个should子句,那么只要满足一个就可以返回。minimum_should_match参数定义了至少满足几个子句。 |
must_not | 返回的文档必须不满足must_not定义的条件 |
/**
* 多条件查询
*/
public static void boolQuery() {
QueryBuilder query = QueryBuilders.boolQuery()
.must(QueryBuilders.matchQuery("title", "董事"))//满足“董事”在“title”的分词集合中
.must(QueryBuilders.wildcardQuery("title", "*审*"))//满足“*审*”通配符匹配“title”
.filter(QueryBuilders.matchQuery("author", "生物"));//满足“author”匹配“生物”
SearchResponse response = getClient().prepareSearch("telegraph").setQuery(query).setSize(5).get();
SearchHits hits = response.getHits();
for (SearchHit searchHit : hits) {
System.out.println(searchHit.getSourceAsString());
}
}
es基本查询(只针对常用举例)
一、一般查询
termQuery(精确匹配)
QueryBuilder qb = QueryBuilders.termQuery("key", obj);
termsQuery(类似于in操作)
QueryBuilders.termsQuery("ci", Arrays.asList(cis))
wildcardQuery模糊查询(?匹配单个字符,*匹配多个字符)
WildcardQueryBuilder queryBuilder = QueryBuilders.wildcardQuery("name", "*jack*");
matchAllQuery(查询所有)
QueryBuilder qb = QueryBuilders.matchAllQuery();
matchQuery(匹配单个字段的文档)
QueryBuilder qb = QueryBuilders.matchQuery("title", "article");
multiMatchQuery;
commonTermQuery(client);(一种略高级的查询,充分考虑了stop-word的低优先级,提高了查询精确性)
testPrefixQuery前缀
rangeQuery(范围查询)
QueryBuilder query = QueryBuilders.rangeQuery("age").gt(10).lt(20);
nested query(在关系查询中,存在一对多和多对一的关系。因为就会出现两种查询情况。)
二、聚合查询
avgQuery
minQuery
maxQuery
valueCountQuery//统计个数
extendedStatsQuery//统计聚合(一堆)
percentileQuery
percentileRankQuery//百分比
rangeQuery(client)//范围
...有很多,
记一次es分桶查询
BoolQueryBuilder bqb = QueryBuilders.boolQuery();
...这里省略组合查询条件
// 多字段聚合的第一个字段om,并设置分桶为10000
TermsAggregationBuilder omAgg = AggregationBuilders.terms("om_count").field("om").size(10000);
// 多字段觉和第二个字段为自己拼接字段[rp;st;rt]
Script script = new Script("doc['rp'].value+';'+doc['st'].value+';'+doc['dt'].value");
TermsAggregationBuilder stAgg = AggregationBuilders.terms("rp_st_dt_count").script(script).size(10000);
SearchResponse response = esJDBCHelper.getClient().prepareSearch(sx_calls_index).setTypes(sx_calls_type)
.setQuery(bqb).addAggregation(omAgg.subAggregation(stAgg).size(10000)).setExplain(false).execute().actionGet();
Map<String, JSONObject> rpMap = new LinkedHashMap<String, JSONObject>();
Map<String, String> rpNameMap = new LinkedHashMap<String, String>();
Map<String, Aggregation> aggMap = response.getAggregations().asMap();
StringTerms omTerms = (StringTerms) aggMap.get("om_count");
Iterator<Bucket> omTermIt = omTerms.getBuckets().iterator();
String om = "";
while (omTermIt.hasNext()) {
Bucket rpBucket = omTermIt.next();
om = rpBucket.getKeyAsString();
StringTerms stDtTerms = (StringTerms) rpBucket.getAggregations().asMap().get("rp_st_dt_count");
Iterator<Bucket> stDtIt = stDtTerms.getBuckets().iterator();
while (stDtIt.hasNext()) {
String stDt = stDtIt.next().getKeyAsString();
String rp = stDt.split(";")[0];
int dt = Integer.parseInt(stDt.split(";")[2]);
if (dt > 0) {
if (rpMap.containsKey(rp)) {
JSONObject temp = rpMap.get(rp);
int times = temp.getIntValue("times") + 1;
int dts = temp.getIntValue("dts") + dt;
temp.put("times", times);
temp.put("length", dts);
rpMap.put(rp, temp);
} else {
JSONObject temp = new JSONObject();
temp.put("rp", rp);
temp.put("times", 1);
temp.put("length", dt);
temp.put("name", "");
rpMap.put(rp, temp);
}
}
}
}
记join父子文档批量写入数据
mappings设置
"properties": {
"join_fields": {
"type": "join",
"relations": {
"fields_values": [ "ph_value"]
}
}
}
}
父文档
Map<String, Object> Parent = new HashMap<String, Object>();
Parent.put("name", "fields_values");
parentFileSource.put("join_fields", Parent);
IndexRequest parentFileIndexRequest = new IndexRequest(index, type, parentId).source(parentFileSource);
UpdateRequest parentFileUpdateRequest = new UpdateRequest(index, type, parentId)
.doc(parentFileSource)
.upsert(parentFileIndexRequest);
bulkRequestBuilder.add(parentFileUpdateRequest);
子文档
Map<String, Object> phChild = new HashMap<String, Object>();
Map<String, Object> phChildInfo = new HashMap<String, Object>();
phChildInfo.put("name", "ph_value");
phChildInfo.put("parent", parentId);
phChild.put("phone", phone);
phChild.put("join_fields", phChildInfo);
IndexRequest childFileIndexRequest = new IndexRequest(index, type, childId).routing(parentId)
.source(phChild);
UpdateRequest childFileUpateRequest = new UpdateRequest(index, type, childId).routing(parentId)
.doc(phChild)
.upsert(childFileIndexRequest);
bulkRequestBuilder.add(childFileUpateRequest);
根据父文档id查询子文档
JoinQueryBuilders.parentId("ph_value", id);
根据子文档字段值查询父文档
JoinQueryBuilders.hasChildQuery("ph_value", QueryBuilders.termQuery("phone", keyword),
ScoreMode.None)