创建索引
PUT /_template/search-company
{
"index_patterns": [
"search-company*"
],
"settings": {
"index": {
"refresh_interval": "60s",
"number_of_shards": 6,
"number_of_replicas": 0,
"similarity": {
"default": {
"type": "constant"
}
},
"max_result_window": 100000,
"translog.durability": "async",
"translog.sync_interval": "120s"
}
},
"mappings": {
"date_detection": false,
"dynamic_templates": [
{
"flag_as_string": {
"match_mapping_type": "string",
"match": "*_flag",
"mapping": {
"type": "keyword",
"ignore_above": 32766
}
}
},
{
"no_as_string": {
"match_mapping_type": "string",
"match": "*_no",
"mapping": {
"type": "keyword",
"ignore_above": 32766
}
}
},
{
"code_as_string": {
"match_mapping_type": "string",
"match": "*_code",
"mapping": {
"type": "keyword",
"ignore_above": 32766
}
}
},
{
"count_as_long": {
"match_mapping_type": "string",
"match": "*_count",
"mapping": {
"type": "keyword",
"ignore_above": 32766
}
}
},
{
"tag_as_string": {
"match_mapping_type": "string",
"match": "*_tag",
"mapping": {
"type": "text"
}
}
},
{
"time_as_date": {
"match_mapping_type": "string",
"match": "*_time",
"mapping": {
"type": "long"
}
}
},
{
"time_numeric_as_date": {
"match_mapping_type": "long",
"match": "*_time",
"mapping": {
"type": "long"
}
}
},
{
"amount_as_float": {
"match_mapping_type": "string",
"match": "*_amount",
"mapping": {
"type": "double"
}
}
},
{
"amount_numeric_as_float": {
"match_mapping_type": "double",
"match": "*_amount",
"mapping": {
"type": "double"
}
}
},
{
"score_as_float": {
"match_mapping_type": "string",
"match": "*_score",
"mapping": {
"type": "double"
}
}
},
{
"score_numeric_as_float": {
"match_mapping_type": "double",
"match": "*_score",
"mapping": {
"type": "double"
}
}
},
{
"strings": {
"match_mapping_type": "string",
"mapping": {
"type": "keyword",
"ignore_above": 32766
}
}
}
],
"properties": {
"company_id": {
"type": "keyword"
},
"company_name_tag": {
"type": "text",
"fields": {
"raw": {
"type": "keyword",
"ignore_above": 32766
}
}
},
"old_name_tag": {
"type": "text",
"fields": {
"raw": {
"type": "keyword",
"ignore_above": 32766
}
}
},
"complex_company_name_tag": {
"type": "text",
"fields": {
"raw": {
"type": "keyword",
"ignore_above": 32766
}
}
},
"administrative_division_tag": {
"type": "text",
"fields": {
"raw": {
"type": "keyword",
"ignore_above": 32766
}
}
},
"location": {
"type": "geo_point"
},
"bidding_no": {
"type": "text"
},
"mobile_tag": {
"type": "keyword"
},
"phone_tag": {
"type": "keyword"
},
"email_tag": {
"type": "keyword"
},
"qq_tag": {
"type": "keyword"
},
"boost": {
"type": "double"
},
"allow": {
"type": "nested"
},
"annual_report": {
"type": "nested"
},
"bidding": {
"type": "nested"
},
"job": {
"type": "nested"
},
"website": {
"type": "nested"
},
"tiktok": {
"type": "nested"
},
"weibo": {
"type": "nested"
},
"wechat": {
"type": "nested"
},
"applet": {
"type": "nested"
},
"android": {
"type": "nested"
},
"ios": {
"type": "nested"
},
"extension": {
"type": "nested"
},
"shop": {
"type": "nested"
},
"patent": {
"type": "nested"
},
"trademark": {
"type": "nested"
},
"copyright_work": {
"type": "nested"
},
"copyright_soft": {
"type": "nested"
},
"cert": {
"type": "nested"
},
"abnormal": {
"type": "nested"
},
"court_announcement": {
"type": "nested"
},
"business": {
"type": "nested"
},
"company_brand": {
"type": "nested"
}
}
}
}
查询语句涉及的搜索函数
-
function_score
- 针对核心召回字段相关属性进行自定义打分。例如企业有荣誉称号的可以加分
-
multi_match
- 根据召回字段相关性设置不同召回策略。例如通过关键字阿里巴巴搜索,通过连续匹配,分词匹配等相关性来进行打分
实现难点
比如我们输入关键词阿里,如何能准确获取杭州阿里巴巴(中国)网络技术有限公司。首先公司名称是一个关键词的搜索来源。其次我们还需要有多个维度的数据来支撑搜索。比如公司的网站名称,app名称,软件著作权,微信公众号,招聘岗位名称等等。这些都可以用来作为搜索对象。通过对不同关键词设置不同权重来达到一个平衡比,最终得到你需要的结果。
GET company_test1/_search
{
"from": 0,
"size": 10,
"timeout": "3000ms",
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"multi_match": {
"query": "菜鸟",
"fields": [
"app_name_tag^0.5",//字段加权
"company_name_tag^2.0",
"copy_right_soft_full_title_tag^0.5",
"copy_right_work_title_tag^0.5",
"ec_goods_title_tag^0.5",
"ec_shop_name_tag^1.0",
"job_title_tag^0.5",
"trademark_title_tag^0.5",
"website_title_tag^0.5",
"wechat_public_tag^1.0"
],
"type": "best_fields",//获取最佳匹配字段的得分
"operator": "AND",//每个字段都需要满足条件
"slop": 0,//
"prefix_length": 0,
"max_expansions": 50,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"fuzzy_transpositions": true,
"minimum_should_match": 30%
"boost": 0
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
}
],
"should": [
{
"multi_match": {
"query": "菜鸟",
"fields": [
"app_name_tag^0.5",
"company_name_tag^2.0",
"copy_right_soft_full_title_tag^0.5",
"copy_right_work_title_tag^0.5",
"ec_goods_title_tag^0.5",
"ec_shop_name_tag^1.0",
"job_title_tag^0.5",
"trademark_title_tag^0.5",
"website_title_tag^0.5",
"wechat_public_tag^1.0"
],
"type": "phrase",//使用最匹配的**短语**字段作为主要分数,并使用决胜局根据剩余字段匹配调整分数
"operator": "OR",
"slop": 0,
"prefix_length": 0,
"max_expansions": 50,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"fuzzy_transpositions": true,
"boost": 100
}
},
{
"multi_match": {
"query": "菜鸟",
"fields": [
"app_name_tag^0.5",
"company_name_tag^2.0",
"copy_right_soft_full_title_tag^0.5",
"copy_right_work_title_tag^0.5",
"ec_goods_title_tag^0.5",
"ec_shop_name_tag^1.0",
"job_title_tag^0.5",
"trademark_title_tag^0.5",
"website_title_tag^0.5",
"wechat_public_tag^1.0"
],
"type": "most_fields",//使用匹配的布尔字段的总和来对查询进行评分
"operator": "AND",
"slop": 0,
"prefix_length": 0,
"max_expansions": 50,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"fuzzy_transpositions": true,
"boost": 5
}
},
{
"multi_match": {
"query": "菜鸟",
"fields": [
"app_name_tag^0.5",
"company_name_tag^2.0",
"copy_right_soft_full_title_tag^0.5",
"copy_right_work_title_tag^0.5",
"ec_goods_title_tag^0.5",
"ec_shop_name_tag^1.0",
"job_title_tag^0.5",
"trademark_title_tag^0.5",
"website_title_tag^0.5",
"wechat_public_tag^1.0"
],
"type": "best_fields",
"operator": "AND",
"slop": 0,
"prefix_length": 0,
"max_expansions": 50,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"fuzzy_transpositions": true,
"boost": 0.5
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"functions": [
{
"filter": {
"bool": {
"should": [
{
"term": {
"honor_flag": {
"value": "2",
"boost": 1
}
}
},
{
"term": {
"honor_flag": {
"value": "12",
"boost": 1
}
}
},
{
"term": {
"honor_flag": {
"value": "16",
"boost": 1
}
}
},
{
"term": {
"honor_flag": {
"value": "17",
"boost": 1
}
}
},
{
"term": {
"honor_flag": {
"value": "19",
"boost": 1
}
}
},
{
"term": {
"honor_flag": {
"value": "23",
"boost": 1
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"weight": 2
},
{
"filter": {
"term": {
"status_flag": {
"value": "1",
"boost": 1
}
}
},
"weight": 1.5
},
{
"filter": {
"bool": {
"should": [
{
"term": {
"company_type_flag": {
"value": "5",
"boost": 1
}
}
},
{
"term": {
"company_type_flag": {
"value": "6",
"boost": 1
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"weight": 0.5
},
{
"filter": {
"bool": {
"should": [
{
"term": {
"mobile_flag": {
"value": "1",
"boost": 1
}
}
},
{
"term": {
"phone_flag": {
"value": "1",
"boost": 1
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"weight": 1.1
}
],
"score_mode": "multiply",
"max_boost": 3.4028235e+38,
"boost": 1
}
},
"track_total_hits": 2147483647
}