elasticsearch java api

一、elasticsearch客户端交互方式

http方式,通过访问9200端口交互,还可以通过发送curl命令交互

 

java方式,通过访问9300端口交互

Node Client(节点客户端)

Transport Client(传输客户端)

二、elasticsearch 6.3.2 java api 之Transport Client的使用

 pom.xml

		<dependency>
			<groupId>org.elasticsearch.client</groupId>
			<artifactId>transport</artifactId>
			<version>6.3.2</version>
		</dependency>
		<dependency>
			<groupId>org.elasticsearch</groupId>
			<artifactId>elasticsearch</artifactId>
			<version>6.3.2</version>
		</dependency>

获取Transport client

import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
        try {
            // 设置集群名称
            Settings settings = Settings.builder().put("cluster.name", CLUSTER_NAME).build();
            // 创建client
            Transport client = new PreBuiltTransportClient(settings).addTransportAddresses(
                    new TransportAddress(InetAddress.getByName(HOST), Integer.parseInt(REQUEST_PORT)));
        } catch (NumberFormatException | UnknownHostException e) {
            e.printStackTrace();
        }

创建索引---这里整合了为索引起别名,以及通过外部文件内的json设置mapping的代码,如果不需要可去掉

    /**
	 * 创建索引
	 * 
	 * @param client 
	 * @param shard 分片数
	 * @param replic 副本数
	 * @param index 索引名称
	 * @param type 索引类型
	 * @param filePath  存放mapping的json配置的文件地址
	 * @param alias 别名
	 * @return
	 */
	public static Boolean createIndex(TransportClient client, String shard, String replic, String index, String type,
			String filePath, String alias) {

		Boolean success = true;
		try {
			Boolean isExist = indexExist(client, index, type);
			// 如果分索引不存在
			if (!isExist) {
				CreateIndexRequest request = new CreateIndexRequest(index);
				request.settings(Settings.builder().put("index.number_of_shards", Integer.parseInt(shard))
						.put("index.number_of_replicas", Integer.parseInt(replic)).put("index.refresh_interval", "5s"));
				// .put("index.refresh_interval","30s")

				// 创建索引
				CreateIndexResponse response = client.admin().indices().create(request).actionGet();
				if (response.isAcknowledged()) {
					System.out.println(
							"===Success in creating " + index + ",shard is " + shard + " and replic is " + replic);
				} else {
					success = false;
					System.out.println(
							"===Faild in creating " + index + ",shard is " + shard + " and replic is " + replic);
				}

				// 为分索引绑定别名
				if (!MiscUtils.isNullOrEmpty(alias)) {
					IndicesAliasesRequestBuilder indicesAliasesRequestBuilder = client.admin().indices()
							.prepareAliases().addAlias(index, alias);
					IndicesAliasesResponse indicesAliasesResponse = indicesAliasesRequestBuilder.execute().actionGet();
					if (indicesAliasesResponse.isAcknowledged()) {
						System.out.println("===Success in alias " + index + " to " + alias);
					} else {
						success = false;
						System.out.println("===Faild in alias " + index + " to " + alias);
					}
				}

				// 读取json文件创建type并设置mapping
				File article = new File(filePath);
				FileReader fr = new FileReader(article);
				BufferedReader bfr = new BufferedReader(fr);
				String mapping_json = null;
				if ((mapping_json = bfr.readLine()) != null) {
					PutMappingResponse putMappingResponse = client.admin().indices().preparePutMapping(index)
							.setType(type).setSource(JSON.parseObject(mapping_json)).execute().actionGet();
					if (putMappingResponse.isAcknowledged()) {
						System.out.println("===Success in setting mapping for " + type);
					} else {
						success = false;
						System.out.println("===Faild in setting mapping for " + type);
					}
				}

				bfr.close();

			}
		} catch (Exception e) {
			success = false;
			e.printStackTrace();
		}

		return success;
	}

检测索引是否存在

    /**
	 * 用于检测索引是否存在
	 * 
	 * @param client 
	 * @param index 索引名称
	 * @param type 索引类型
	 * @return
	 */
	public static boolean indexExist(TransportClient client, String index, String type) {
		IndicesExistsRequest request = new IndicesExistsRequest(index);
		IndicesExistsResponse response = client.admin().indices().exists(request).actionGet();
		if (response.isExists()) {
			return true;
		} else {
			return false;
		}
	}

插入数据---bulk,这里只放入核心代码

//获取client连接
TransportClient client = ESJDBCHelper.getInstance().getClient();
//
BulkRequestBuilder bulk = client.prepareBulk();

// 使用map存放要写入文档的数据
Map<String, Object> resource = new HashMap<String, Object>();
//将数据以key-value存入map
...
//使用upser方式(有则修改,无则新增方式)将数据写入文档
//使用upser文档id一定要确定
String docId = new MD5(docId).compute().substring(0,16);
IndexRequest parentFileIndexRequest = new IndexRequest(index,type, docId).routing(_routting).source(resource);
UpdateRequest parentFileUpdateRequest = new UpdateRequest(index,type, docId).routing(_routting)											.doc(resource).upsert(parentFileIndexRequest);
if (num == batchSize) {
BulkResponse bulkResponse = bulk.execute().actionGet();
    if (bulkResponse.hasFailures()) {													                
        System.out.println(bulkResponse.buildFailureMessage());
    }
    num = 0;
    bulk = null;
    bulk = client.prepareBulk();
} else {
    num++;
    bulk.add(parentFileUpdateRequest);
}

获取索引总记录数

TransportClient client = ESJDBCHelper.getInstance().getClient();
long total = client.prepareSearch(index).setTypes(type).setSize(0).execute().actionGet().getHits().getTotalHits();

es的分页查询from+size和scroll

from+size:首先,该种分页方式是基于分片的,from相当于offset,size相当于size。如果from+size为19+10,分片为5,则每个分片会根据排序规则取出200条记录汇总再整体排序,即总共取出1000条数据,截取最后的10条返回。页面越大或者分页越深,需要排序的量也就越大,效率就会越低;其次,es保证分页不占用大量的堆内存,避免OOM,参数index.max_result_window默认设置为10000,如果不修改这个值,查询超过10000条数据时会报错,当然你可以对索引的该值动态修改,但是不建议这么做,因为即使不会OOM,对cpu和内存消耗很大;

 

scroll:可以一次性将所有满足查询条件的数据,都放到内存中。分页的时候,在内存中查询。相对浅分页,就可以避免多次读取磁盘。首先,将所有符合搜索条件的搜索结果缓存起来,可以想象成快照;其次,在遍历时候,拿到上一次遍历中的_scroll_id,然后带scroll参数,重复上一次的遍历步骤,直到返回的数据为空,表示遍历完成。每次都要传参数scroll,刷新搜索结果的缓存时间,另外不需要指定index和type(不要把缓存的时时间设置太长,占用内存)。

其中scroll不适合有跳页的情况,但是现实一般都是一页一页的翻转,所以没关系。其中滚动上下文是昂贵的,所以并不建议将其用于实时用户请求。建议使用scroll来获取es中的全量数据;

Transport client= EsJDBCHelper.getInstance().getClient();
SearchResponse response = client.prepareSearch(index).setTypes(type).setQuery(bqb)
					.setExplain(false).setScroll(TimeValue.MINUS_ONE).setSize(10000).execute().actionGet();
while (true) {
	String scrollId = response.getScrollId();
	response = client.prepareSearchScroll(scrollId).setScroll(TimeValue.MINUS_ONE).execute().actionGet();
	if (response.getHits().getHits().length == 0) {
		break;
	}			
}

es查询结果处理

	/**
	 * 将查询后获得的response转成list
	 *
	 * @param client
	 * @param response
	 * @return
	 */
	public static List<Map<String, Object>> responseToList(TransportClient client, SearchResponse response) {
		SearchHits hits = response.getHits();
		List<Map<String, Object>> list = new LinkedList<Map<String, Object>>();
		for (int i = 0; i < hits.getHits().length; i++) {
			String id = hits.getAt(i).getId();
			Map<String, Object> map = hits.getAt(i).getSourceAsMap();
			map.put("id", id);
			list.add(map);

		}
		return list;
	}

es复合查询

must返回的文档必须满足must子句的条件,并且参与计算分值
filter【filter以前时单独的query DSL,现在归入bool query】;子句(查询)必须出现在匹配的文档中。然而,不同于must查询的是——它不参与分数计算。 Filter子句在过滤器上下文(filter context)中执行,这意味着score被忽略并且子句可缓存【所以filter可能更快】
should“权限”比must/filter低。如果没有must或者filter,有一个或者多个should子句,那么只要满足一个就可以返回。minimum_should_match参数定义了至少满足几个子句。
must_not返回的文档必须不满足must_not定义的条件

 

	/**
	 * 多条件查询
	 */
	public static void boolQuery() {
		
		QueryBuilder query = QueryBuilders.boolQuery()
				.must(QueryBuilders.matchQuery("title", "董事"))//满足“董事”在“title”的分词集合中
				.must(QueryBuilders.wildcardQuery("title", "*审*"))//满足“*审*”通配符匹配“title”
				.filter(QueryBuilders.matchQuery("author", "生物"));//满足“author”匹配“生物”
		
		SearchResponse response = getClient().prepareSearch("telegraph").setQuery(query).setSize(5).get();
		SearchHits hits = response.getHits();
		for (SearchHit searchHit : hits) {
			System.out.println(searchHit.getSourceAsString());
		}
	}

 es基本查询(只针对常用举例)

一、一般查询

termQuery(精确匹配)

          QueryBuilder qb = QueryBuilders.termQuery("key", obj);

termsQuery(类似于in操作)

         QueryBuilders.termsQuery("ci", Arrays.asList(cis))

wildcardQuery模糊查询(?匹配单个字符,*匹配多个字符)

          WildcardQueryBuilder queryBuilder = QueryBuilders.wildcardQuery("name",   "*jack*");

 matchAllQuery(查询所有)

          QueryBuilder qb = QueryBuilders.matchAllQuery();

matchQuery(匹配单个字段的文档)

         QueryBuilder qb = QueryBuilders.matchQuery("title", "article");

multiMatchQuery;

commonTermQuery(client);(一种略高级的查询,充分考虑了stop-word的低优先级,提高了查询精确性)

testPrefixQuery前缀

rangeQuery(范围查询)

        QueryBuilder query = QueryBuilders.rangeQuery("age").gt(10).lt(20);

nested query(在关系查询中,存在一对多和多对一的关系。因为就会出现两种查询情况。)

二、聚合查询
avgQuery

minQuery

maxQuery

valueCountQuery//统计个数

extendedStatsQuery//统计聚合(一堆)

percentileQuery

percentileRankQuery//百分比

rangeQuery(client)//范围

...有很多,

 

记一次es分桶查询

	BoolQueryBuilder bqb = QueryBuilders.boolQuery();
	...这里省略组合查询条件

	// 多字段聚合的第一个字段om,并设置分桶为10000
TermsAggregationBuilder omAgg = AggregationBuilders.terms("om_count").field("om").size(10000);

	// 多字段觉和第二个字段为自己拼接字段[rp;st;rt]
Script script = new Script("doc['rp'].value+';'+doc['st'].value+';'+doc['dt'].value");
TermsAggregationBuilder stAgg = AggregationBuilders.terms("rp_st_dt_count").script(script).size(10000);
SearchResponse response = esJDBCHelper.getClient().prepareSearch(sx_calls_index).setTypes(sx_calls_type)
					.setQuery(bqb).addAggregation(omAgg.subAggregation(stAgg).size(10000)).setExplain(false).execute().actionGet();

Map<String, JSONObject> rpMap = new LinkedHashMap<String, JSONObject>();

Map<String, String> rpNameMap = new LinkedHashMap<String, String>();
Map<String, Aggregation> aggMap = response.getAggregations().asMap();
StringTerms omTerms = (StringTerms) aggMap.get("om_count");
Iterator<Bucket> omTermIt = omTerms.getBuckets().iterator();
String om = "";
while (omTermIt.hasNext()) {
	Bucket rpBucket = omTermIt.next();
	om = rpBucket.getKeyAsString();
	StringTerms stDtTerms = (StringTerms) rpBucket.getAggregations().asMap().get("rp_st_dt_count");
	Iterator<Bucket> stDtIt = stDtTerms.getBuckets().iterator();
while (stDtIt.hasNext()) {
		String stDt = stDtIt.next().getKeyAsString();
		String rp = stDt.split(";")[0];
		int dt = Integer.parseInt(stDt.split(";")[2]);
		if (dt > 0) {
			if (rpMap.containsKey(rp)) {
			   JSONObject temp = rpMap.get(rp);
			   int times = temp.getIntValue("times") + 1;
			   int dts = temp.getIntValue("dts") + dt;
			   temp.put("times", times);
			   temp.put("length", dts);
			   rpMap.put(rp, temp);
			} else {
			   JSONObject temp = new JSONObject();
			   temp.put("rp", rp);
			   temp.put("times", 1);
			   temp.put("length", dt);
			   temp.put("name", "");
			   rpMap.put(rp, temp);
			}
		}
}
}

记join父子文档批量写入数据

mappings设置

"properties": {
   "join_fields": {
            "type": "join",
            "relations": {
                "fields_values": [ "ph_value"]
            }
        }
    }
}

父文档

Map<String, Object> Parent = new HashMap<String, Object>();
Parent.put("name", "fields_values");
parentFileSource.put("join_fields", Parent);
IndexRequest parentFileIndexRequest = new IndexRequest(index, type, parentId).source(parentFileSource);
UpdateRequest parentFileUpdateRequest = new UpdateRequest(index, type, parentId)
                                .doc(parentFileSource)
                                .upsert(parentFileIndexRequest);
bulkRequestBuilder.add(parentFileUpdateRequest);

子文档

Map<String, Object> phChild = new HashMap<String, Object>();
Map<String, Object> phChildInfo = new HashMap<String, Object>();
phChildInfo.put("name", "ph_value");
phChildInfo.put("parent", parentId);
phChild.put("phone", phone);                              
phChild.put("join_fields", phChildInfo);
IndexRequest childFileIndexRequest = new IndexRequest(index, type, childId).routing(parentId)
                                        .source(phChild);
UpdateRequest childFileUpateRequest = new UpdateRequest(index, type, childId).routing(parentId)
                                        .doc(phChild)
                                        .upsert(childFileIndexRequest);
bulkRequestBuilder.add(childFileUpateRequest);

根据父文档id查询子文档

JoinQueryBuilders.parentId("ph_value", id);

根据子文档字段值查询父文档

JoinQueryBuilders.hasChildQuery("ph_value", QueryBuilders.termQuery("phone", keyword),
                        ScoreMode.None)

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值