elasticsearch java api

最新推荐文章于 2024-05-30 16:10:57 发布

yuyingting5

最新推荐文章于 2024-05-30 16:10:57 发布

阅读量668

点赞数

分类专栏： elasticsearch

本文链接：https://blog.csdn.net/yuyingting5/article/details/100153145

版权

elasticsearch 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

一、elasticsearch客户端交互方式

http方式，通过访问9200端口交互，还可以通过发送curl命令交互

java方式，通过访问9300端口交互

Node Client（节点客户端）

Transport Client（传输客户端）

二、elasticsearch 6.3.2 java api 之Transport Client的使用

pom.xml

		<dependency>
			<groupId>org.elasticsearch.client</groupId>
			<artifactId>transport</artifactId>
			<version>6.3.2</version>
		</dependency>
		<dependency>
			<groupId>org.elasticsearch</groupId>
			<artifactId>elasticsearch</artifactId>
			<version>6.3.2</version>
		</dependency>

获取Transport client

import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.transport.client.PreBuiltTransportClient;

        try {
            // 设置集群名称
            Settings settings = Settings.builder().put("cluster.name", CLUSTER_NAME).build();
            // 创建client
            Transport client = new PreBuiltTransportClient(settings).addTransportAddresses(
                    new TransportAddress(InetAddress.getByName(HOST), Integer.parseInt(REQUEST_PORT)));
        } catch (NumberFormatException | UnknownHostException e) {
            e.printStackTrace();
        }

创建索引---这里整合了为索引起别名，以及通过外部文件内的json设置mapping的代码，如果不需要可去掉

    /**
	 * 创建索引
	 * 
	 * @param client 
	 * @param shard 分片数
	 * @param replic 副本数
	 * @param index 索引名称
	 * @param type 索引类型
	 * @param filePath  存放mapping的json配置的文件地址
	 * @param alias 别名
	 * @return
	 */
	public static Boolean createIndex(TransportClient client, String shard, String replic, String index, String type,
			String filePath, String alias) {

		Boolean success = true;
		try {
			Boolean isExist = indexExist(client, index, type);
			// 如果分索引不存在
			if (!isExist) {
				CreateIndexRequest request = new CreateIndexRequest(index);
				request.settings(Settings.builder().put("index.number_of_shards", Integer.parseInt(shard))
						.put("index.number_of_replicas", Integer.parseInt(replic)).put("index.refresh_interval", "5s"));
				// .put("index.refresh_interval","30s")

				// 创建索引
				CreateIndexResponse response = client.admin().indices().create(request).actionGet();
				if (response.isAcknowledged()) {
					System.out.println(
							"===Success in creating " + index + ",shard is " + shard + " and replic is " + replic);
				} else {
					success = false;
					System.out.println(
							"===Faild in creating " + index + ",shard is " + shard + " and replic is " + replic);
				}

				// 为分索引绑定别名
				if (!MiscUtils.isNullOrEmpty(alias)) {
					IndicesAliasesRequestBuilder indicesAliasesRequestBuilder = client.admin().indices()
							.prepareAliases().addAlias(index, alias);
					IndicesAliasesResponse indicesAliasesResponse = indicesAliasesRequestBuilder.execute().actionGet();
					if (indicesAliasesResponse.isAcknowledged()) {
						System.out.println("===Success in alias " + index + " to " + alias);
					} else {
						success = false;
						System.out.println("===Faild in alias " + index + " to " + alias);
					}
				}

				// 读取json文件创建type并设置mapping
				File article = new File(filePath);
				FileReader fr = new FileReader(article);
				BufferedReader bfr = new BufferedReader(fr);
				String mapping_json = null;
				if ((mapping_json = bfr.readLine()) != null) {
					PutMappingResponse putMappingResponse = client.admin().indices().preparePutMapping(index)
							.setType(type).setSource(JSON.parseObject(mapping_json)).execute().actionGet();
					if (putMappingResponse.isAcknowledged()) {
						System.out.println("===Success in setting mapping for " + type);
					} else {
						success = false;
						System.out.println("===Faild in setting mapping for " + type);
					}
				}

				bfr.close();

			}
		} catch (Exception e) {
			success = false;
			e.printStackTrace();
		}

		return success;
	}

检测索引是否存在

    /**
	 * 用于检测索引是否存在
	 * 
	 * @param client 
	 * @param index 索引名称
	 * @param type 索引类型
	 * @return
	 */
	public static boolean indexExist(TransportClient client, String index, String type) {
		IndicesExistsRequest request = new IndicesExistsRequest(index);
		IndicesExistsResponse response = client.admin().indices().exists(request).actionGet();
		if (response.isExists()) {
			return true;
		} else {
			return false;
		}
	}

插入数据---bulk，这里只放入核心代码

//获取client连接
TransportClient client = ESJDBCHelper.getInstance().getClient();
//
BulkRequestBuilder bulk = client.prepareBulk();

// 使用map存放要写入文档的数据
Map<String, Object> resource = new HashMap<String, Object>();
//将数据以key-value存入map
...
//使用upser方式（有则修改，无则新增方式）将数据写入文档
//使用upser文档id一定要确定
String docId = new MD5(docId).compute().substring(0,16);
IndexRequest parentFileIndexRequest = new IndexRequest(index,type, docId).routing(_routting).source(resource);
UpdateRequest parentFileUpdateRequest = new UpdateRequest(index,type, docId).routing(_routting)											.doc(resource).upsert(parentFileIndexRequest);
if (num == batchSize) {
BulkResponse bulkResponse = bulk.execute().actionGet();
    if (bulkResponse.hasFailures()) {													                
        System.out.println(bulkResponse.buildFailureMessage());
    }
    num = 0;
    bulk = null;
    bulk = client.prepareBulk();
} else {
    num++;
    bulk.add(parentFileUpdateRequest);
}

获取索引总记录数

TransportClient client = ESJDBCHelper.getInstance().getClient();
long total = client.prepareSearch(index).setTypes(type).setSize(0).execute().actionGet().getHits().getTotalHits();

es的分页查询from+size和scroll

from+size：首先，该种分页方式是基于分片的，from相当于offset，size相当于size。如果from+size为19+10，分片为5，则每个分片会根据排序规则取出200条记录汇总再整体排序，即总共取出1000条数据，截取最后的10条返回。页面越大或者分页越深，需要排序的量也就越大，效率就会越低；其次，es保证分页不占用大量的堆内存，避免OOM，参数index.max_result_window默认设置为10000，如果不修改这个值，查询超过10000条数据时会报错，当然你可以对索引的该值动态修改，但是不建议这么做，因为即使不会OOM，对cpu和内存消耗很大；

scroll：可以一次性将所有满足查询条件的数据，都放到内存中。分页的时候，在内存中查询。相对浅分页，就可以避免多次读取磁盘。首先,将所有符合搜索条件的搜索结果缓存起来，可以想象成快照；其次，在遍历时候，拿到上一次遍历中的_scroll_id，然后带scroll参数，重复上一次的遍历步骤，直到返回的数据为空，表示遍历完成。每次都要传参数scroll，刷新搜索结果的缓存时间，另外不需要指定index和type（不要把缓存的时时间设置太长，占用内存）。

其中scroll不适合有跳页的情况，但是现实一般都是一页一页的翻转，所以没关系。其中滚动上下文是昂贵的，所以并不建议将其用于实时用户请求。建议使用scroll来获取es中的全量数据；

Transport client= EsJDBCHelper.getInstance().getClient();
SearchResponse response = client.prepareSearch(index).setTypes(type).setQuery(bqb)
					.setExplain(false).setScroll(TimeValue.MINUS_ONE).setSize(10000).execute().actionGet();
while (true) {
	String scrollId = response.getScrollId();
	response = client.prepareSearchScroll(scrollId).setScroll(TimeValue.MINUS_ONE).execute().actionGet();
	if (response.getHits().getHits().length == 0) {
		break;
	}			
}

es查询结果处理

	/**
	 * 将查询后获得的response转成list
	 *
	 * @param client
	 * @param response
	 * @return
	 */
	public static List<Map<String, Object>> responseToList(TransportClient client, SearchResponse response) {
		SearchHits hits = response.getHits();
		List<Map<String, Object>> list = new LinkedList<Map<String, Object>>();
		for (int i = 0; i < hits.getHits().length; i++) {
			String id = hits.getAt(i).getId();
			Map<String, Object> map = hits.getAt(i).getSourceAsMap();
			map.put("id", id);
			list.add(map);

		}
		return list;
	}

es复合查询

must	返回的文档必须满足must子句的条件，并且参与计算分值
filter	【filter以前时单独的query DSL，现在归入bool query】；子句（查询）必须出现在匹配的文档中。然而，不同于must查询的是——它不参与分数计算。 Filter子句在过滤器上下文（filter context）中执行，这意味着score被忽略并且子句可缓存【所以filter可能更快】
should	“权限”比must/filter低。如果没有must或者filter，有一个或者多个should子句，那么只要满足一个就可以返回。minimum_should_match参数定义了至少满足几个子句。
must_not	返回的文档必须不满足must_not定义的条件

	/**
	 * 多条件查询
	 */
	public static void boolQuery() {
		
		QueryBuilder query = QueryBuilders.boolQuery()
				.must(QueryBuilders.matchQuery("title", "董事"))//满足“董事”在“title”的分词集合中
				.must(QueryBuilders.wildcardQuery("title", "*审*"))//满足“*审*”通配符匹配“title”
				.filter(QueryBuilders.matchQuery("author", "生物"));//满足“author”匹配“生物”
		
		SearchResponse response = getClient().prepareSearch("telegraph").setQuery(query).setSize(5).get();
		SearchHits hits = response.getHits();
		for (SearchHit searchHit : hits) {
			System.out.println(searchHit.getSourceAsString());
		}
	}

es基本查询（只针对常用举例）

一、一般查询

termQuery（精确匹配）

QueryBuilder qb = QueryBuilders.termQuery("key", obj)；

termsQuery（类似于in操作）

QueryBuilders.termsQuery("ci", Arrays.asList(cis))

wildcardQuery模糊查询（?匹配单个字符，*匹配多个字符）

WildcardQueryBuilder queryBuilder = QueryBuilders.wildcardQuery("name", "*jack*")；

matchAllQuery（查询所有）

QueryBuilder qb = QueryBuilders.matchAllQuery();

matchQuery（匹配单个字段的文档）

QueryBuilder qb = QueryBuilders.matchQuery("title", "article");

multiMatchQuery;

commonTermQuery(client);（一种略高级的查询，充分考虑了stop-word的低优先级，提高了查询精确性）

testPrefixQuery前缀

rangeQuery（范围查询）

QueryBuilder query = QueryBuilders.rangeQuery("age").gt(10).lt(20);

nested query（在关系查询中，存在一对多和多对一的关系。因为就会出现两种查询情况。）

二、聚合查询
avgQuery

minQuery

maxQuery

valueCountQuery//统计个数

extendedStatsQuery//统计聚合(一堆)

percentileQuery

percentileRankQuery//百分比

rangeQuery(client)//范围

...有很多，

记一次es分桶查询

	BoolQueryBuilder bqb = QueryBuilders.boolQuery();
	...这里省略组合查询条件

	// 多字段聚合的第一个字段om，并设置分桶为10000
TermsAggregationBuilder omAgg = AggregationBuilders.terms("om_count").field("om").size(10000);

	// 多字段觉和第二个字段为自己拼接字段[rp;st;rt]
Script script = new Script("doc['rp'].value+';'+doc['st'].value+';'+doc['dt'].value");
TermsAggregationBuilder stAgg = AggregationBuilders.terms("rp_st_dt_count").script(script).size(10000);
SearchResponse response = esJDBCHelper.getClient().prepareSearch(sx_calls_index).setTypes(sx_calls_type)
					.setQuery(bqb).addAggregation(omAgg.subAggregation(stAgg).size(10000)).setExplain(false).execute().actionGet();

Map<String, JSONObject> rpMap = new LinkedHashMap<String, JSONObject>();

Map<String, String> rpNameMap = new LinkedHashMap<String, String>();
Map<String, Aggregation> aggMap = response.getAggregations().asMap();
StringTerms omTerms = (StringTerms) aggMap.get("om_count");
Iterator<Bucket> omTermIt = omTerms.getBuckets().iterator();
String om = "";
while (omTermIt.hasNext()) {
	Bucket rpBucket = omTermIt.next();
	om = rpBucket.getKeyAsString();
	StringTerms stDtTerms = (StringTerms) rpBucket.getAggregations().asMap().get("rp_st_dt_count");
	Iterator<Bucket> stDtIt = stDtTerms.getBuckets().iterator();
while (stDtIt.hasNext()) {
		String stDt = stDtIt.next().getKeyAsString();
		String rp = stDt.split(";")[0];
		int dt = Integer.parseInt(stDt.split(";")[2]);
		if (dt > 0) {
			if (rpMap.containsKey(rp)) {
			   JSONObject temp = rpMap.get(rp);
			   int times = temp.getIntValue("times") + 1;
			   int dts = temp.getIntValue("dts") + dt;
			   temp.put("times", times);
			   temp.put("length", dts);
			   rpMap.put(rp, temp);
			} else {
			   JSONObject temp = new JSONObject();
			   temp.put("rp", rp);
			   temp.put("times", 1);
			   temp.put("length", dt);
			   temp.put("name", "");
			   rpMap.put(rp, temp);
			}
		}
}
}

记join父子文档批量写入数据

mappings设置

"properties": {
   "join_fields": {
            "type": "join",
            "relations": {
                "fields_values": [ "ph_value"]
            }
        }
    }
}

父文档

Map<String, Object> Parent = new HashMap<String, Object>();
Parent.put("name", "fields_values");
parentFileSource.put("join_fields", Parent);
IndexRequest parentFileIndexRequest = new IndexRequest(index, type, parentId).source(parentFileSource);
UpdateRequest parentFileUpdateRequest = new UpdateRequest(index, type, parentId)
                                .doc(parentFileSource)
                                .upsert(parentFileIndexRequest);
bulkRequestBuilder.add(parentFileUpdateRequest);

子文档

Map<String, Object> phChild = new HashMap<String, Object>();
Map<String, Object> phChildInfo = new HashMap<String, Object>();
phChildInfo.put("name", "ph_value");
phChildInfo.put("parent", parentId);
phChild.put("phone", phone);                              
phChild.put("join_fields", phChildInfo);
IndexRequest childFileIndexRequest = new IndexRequest(index, type, childId).routing(parentId)
                                        .source(phChild);
UpdateRequest childFileUpateRequest = new UpdateRequest(index, type, childId).routing(parentId)
                                        .doc(phChild)
                                        .upsert(childFileIndexRequest);
bulkRequestBuilder.add(childFileUpateRequest);

根据父文档id查询子文档

JoinQueryBuilders.parentId("ph_value", id)；

根据子文档字段值查询父文档

JoinQueryBuilders.hasChildQuery("ph_value", QueryBuilders.termQuery("phone", keyword),
ScoreMode.None)

yuyingting5

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
elasticsearch java api

一、elasticsearch客户端交互方式http方式，通过访问9200端口交互，还可以通过发送curl命令交互java方式，通过访问9300端口交互Node Client（节点客户端）Transport Client（传输客户端）二、elasticsearch 6.3.2 java api 之Transport Client的使用pom.xml ...
复制链接

扫一扫