支持拼音首字母,全拼,和中文匹配提示,如下
中文搜索使用IK分词器,IK分词器安装:https://blog.csdn.net/wwd0501/article/details/78258274
因要支持拼音提示,故需安装拼音插件,拼音插件安装以及IK+拼音使用地址:https://blog.csdn.net/wwd0501/article/details/80622669
1、创建index,设置setting
curl -XPUT "http://localhost:9200/medcl/" -d '
{
"index": {
"analysis": {
"analyzer": {
"default": {
"tokenizer": "ik_max_word"
},
"pinyin_analyzer": {
"tokenizer": "shopmall_pinyin"
},
"first_py_letter_analyzer": {
"tokenizer": "first_py_letter"
},
"full_pinyin_letter_analyzer": {
"tokenizer": "full_pinyin_letter"
}
},
"tokenizer": {
"shopmall_pinyin": {
"keep_joined_full_pinyin": "true",
"keep_first_letter": "true",
"keep_separate_first_letter": "false",
"lowercase": "true",
"type": "pinyin",
"limit_first_letter_length": "16",
"keep_original": "true",
"keep_full_pinyin": "true",
"keep_none_chinese_in_joined_full_pinyin": "true"
},
"first_py_letter": {
"type": "pinyin",
"keep_first_letter": true,
"keep_full_pinyin": false,
"keep_original": false,
"limit_first_letter_length": 16,
"lowercase": true,
"trim_whitespace": true,
"keep_none_chinese_in_first_letter": false,
"none_chinese_pinyin_tokenize": false,
"keep_none_chinese": true,
"keep_none_chinese_in_joined_full_pinyin": true
},
"full_pinyin_letter": {
"type": "pinyin",
"keep_separate_first_letter": false,
"keep_full_pinyin": false,
"keep_original": false,
"limit_first_letter_length": 16,
"lowercase": true,
"keep_first_letter": false,
"keep_none_chinese_in_first_letter": false,
"none_chinese_pinyin_tokenize": false,
"keep_none_chinese": true,
"keep_joined_full_pinyin": true,
"keep_none_chinese_in_joined_full_pinyin": true
}
}
}
}
}'
2、mapping
curl -XPOST http://localhost:9200/medcl/folks/_mapping -d'
{
"folks": {
"properties": {
"name": {
"type": "completion",
"fields": {
"pinyin": {
"type": "completion",
"analyzer": "pinyin_analyzer"
},
"keyword_pinyin": {
"type": "completion",
"analyzer": "full_pinyin_letter_analyzer"
},
"keyword_first_py": {
"type": "completion",
"analyzer": "first_py_letter_analyzer"
}
}
}
}
}
}'
3、初始化测试数据
curl -XPOST http://localhost:9200/medcl/folks/ -d'{"name":"苹果"}'
4、搜索
curl -XPOST http://localhost:9200/medcl/folks/_search -d '
{
"size": 0,
"_source": "name",
"suggest": {
"my-suggest-1": {
"text": "苹",
"completion": {
"field": "name",
"size": 20
}
}
}
}'
5、结果
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 0,
"max_score": 0,
"hits": []
},
"suggest": {
"my-suggest-1": [{
"text": "苹",
"offset": 0,
"length": 1,
"options": [{
"text": "苹果",
"_index": "medcl",
"_type": "folks",
"_id": "AWRLJ9lrsB4QSA8b-FrJ",
"_score": 1,
"_source": {
"name": "苹果"
}
}
]
}]
}
}
6、代码实现,基于elasticsearch5.x版本
* 1、检测搜索词是中文还是拼音
* 2、若是中文,直接按照name字段提示
* 3、若是拼音(拼音+汉字),先按照name.keyword_pinyin获取,若是无结果按照首字母name.keyword_first_py获取
Java代码:
package test;
import java.net.UnknownHostException;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Pattern;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.search.suggest.SuggestBuilder;
import org.elasticsearch.search.suggest.completion.CompletionSuggestion;
import org.elasticsearch.search.suggest.completion.CompletionSuggestionBuilder;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
public class EsSuggestTest01 {
private static Client client;
@Before
public void init() throws UnknownHostException {
client = ElasticsearchConfiguration.getClient();
}
@After
public void close() {
ElasticsearchConfiguration.close();
}
@Test
public void test(){
String index = "medcl";
String type = "folks";
QueryBuilder queryBuilder = QueryBuilders.matchAllQuery();
String text = "苹";
String field = "name";
if(checkLetter(text)) {
field = "name.keyword_pinyin";
} else if(checkChinese(text)) {
field = "name";
} else {
field = "name.keyword_pinyin";
}
Set<String> results = getSuggestWord(index, type, field, text, queryBuilder);
//结果为空且是拼音,可以尝试拼音首字母提示
if(results.size() == 0 && checkLetter(text)) {
field = "nicknames.nicknameSuggest.keyword_first_py";
results = getSuggestWord(index, type, field, text, queryBuilder);
}
for (String result : results) {
System.out.println(result);
}
}
/**
* Description:提示词,支持中文、拼音、首字母等(注意要去掉_source信息)
*
* 1、检测搜索词是中文还是拼音
* 2、若是中文,直接按照name字段提示
* 3、若是拼音(拼音+汉字),先按照name.keyword_pinyin获取,若是无结果按照首字母name.keyword_first_py获取
*
* SearchRequestBuilder的size要设置为0,否则显示hits结果
* searchRequestBuilder.setSize(0);
*
* _source 由于磁盘读取和网络传输开销,可以影响性能的大小,为了节省一些网络开销,请从_source 使用源过滤中过滤掉不必要的字段以最小化 _source大小
* 可以采用过滤的形式,也可以直接不显示_source
* 1、searchRequestBuilder.setFetchSource("name", null); 过滤形式
* 2、searchRequestBuilder.setFetchSource(false) 直接不显示_source
*
* @author wangweidong
* CreateTime: 2018年6月28日 下午2:39:47
*
* @param index
* @param type
* @param field
* @param text
* @return
*/
public static Set<String> getSuggestWord(String index, String type, String field, String text, QueryBuilder queryBuilder) {
//过滤相同的提示词,Es5.2版本不支持过滤掉重复的建议,故需自己对ES返回做去重处理,Es6.1以上版本可以通过skip_duplicates字段处理,skip_duplicates表示是否应过滤掉重复的建议(默认为false)
Set<String> results = new TreeSet<String>();
CompletionSuggestionBuilder suggestionBuilder = new CompletionSuggestionBuilder(field);
suggestionBuilder.text(text);
suggestionBuilder.size(20);
SuggestBuilder suggestBuilder = new SuggestBuilder();
suggestBuilder.addSuggestion("my-suggest-1", suggestionBuilder);
SearchRequestBuilder searchRequestBuilder = client.prepareSearch(index).setTypes(type);
searchRequestBuilder.setExplain(false);
searchRequestBuilder.setSize(0);
searchRequestBuilder.setQuery(queryBuilder);
searchRequestBuilder.suggest(suggestBuilder);
searchRequestBuilder.setFetchSource(false);
// searchRequestBuilder.setFetchSource("name", null);
SearchResponse resp = searchRequestBuilder.execute().actionGet();
Suggest sugg = resp.getSuggest();
CompletionSuggestion suggestion = sugg.getSuggestion("my-suggest-1");
List<CompletionSuggestion.Entry> list = suggestion.getEntries();
for (int i = 0; i < list.size(); i++) {
List<? extends Suggest.Suggestion.Entry.Option> options = list.get(i).getOptions();
for (Suggest.Suggestion.Entry.Option op : options) {
results.add(op.getText().toString());
}
}
return results;
}
/**
* 只包含字母
* @return 验证成功返回true,验证失败返回false
*/
public static boolean checkLetter(String cardNum) {
String regex = "^[A-Za-z]+$";
return Pattern.matches(regex, cardNum);
}
/**
* 验证中文
* @param chinese 中文字符
* @return 验证成功返回true,验证失败返回false
*/
public static boolean checkChinese(String chinese) {
String regex = "^[\u4E00-\u9FA5]+$";
return Pattern.matches(regex,chinese);
}
}
import org.elasticsearch.client.Client;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import java.net.InetAddress;
import java.net.URI;
import java.net.UnknownHostException;
import java.util.Arrays;
import java.util.List;
public class ElasticsearchConfiguration {
private static TransportClient client;
private static String clusterName;
private static List<String> clusterNodes;
static {
clusterName = "shop-es";
clusterNodes = Arrays.asList("http://172.16.32.6:9300","http://172.16.32.8:9300");
}
private ElasticsearchConfiguration() {
}
public static Client getClient() throws UnknownHostException {
Settings settings = Settings.builder().put("cluster.name", clusterName).build();
client = new PreBuiltTransportClient(settings);
for (String node : clusterNodes) {
URI host = URI.create(node);
client.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(host.getHost()), host.getPort()));
}
return client;
}
public static void close() {
client.close();
}
}
参考官网地址:https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-suggesters-completion.html