elasticsearch 如何实现企查查企业搜索

创建索引

PUT /_template/search-company
{
    "index_patterns": [
        "search-company*"
    ],
    "settings": {
        "index": {
            "refresh_interval": "60s",
            "number_of_shards": 6,
            "number_of_replicas": 0,
            "similarity": {
                "default": {
                    "type": "constant"
                }
            },
            "max_result_window": 100000,
            "translog.durability": "async",
            "translog.sync_interval": "120s"
        }
    },
    "mappings": {
        "date_detection": false,
        "dynamic_templates": [
            {
                "flag_as_string": {
                    "match_mapping_type": "string",
                    "match": "*_flag",
                    "mapping": {
                        "type": "keyword",
                        "ignore_above": 32766
                    }
                }
            },
            {
                "no_as_string": {
                    "match_mapping_type": "string",
                    "match": "*_no",
                    "mapping": {
                        "type": "keyword",
                        "ignore_above": 32766
                    }
                }
            },
            {
                "code_as_string": {
                    "match_mapping_type": "string",
                    "match": "*_code",
                    "mapping": {
                        "type": "keyword",
                        "ignore_above": 32766
                    }
                }
            },
            {
                "count_as_long": {
                    "match_mapping_type": "string",
                    "match": "*_count",
                    "mapping": {
                        "type": "keyword",
                        "ignore_above": 32766
                    }
                }
            },
            {
                "tag_as_string": {
                    "match_mapping_type": "string",
                    "match": "*_tag",
                    "mapping": {
                        "type": "text"
                    }
                }
            },
            {
                "time_as_date": {
                    "match_mapping_type": "string",
                    "match": "*_time",
                    "mapping": {
                        "type": "long"
                    }
                }
            },
            {
                "time_numeric_as_date": {
                    "match_mapping_type": "long",
                    "match": "*_time",
                    "mapping": {
                        "type": "long"
                    }
                }
            },
            {
                "amount_as_float": {
                    "match_mapping_type": "string",
                    "match": "*_amount",
                    "mapping": {
                        "type": "double"
                     }
                }
            },
            {
                "amount_numeric_as_float": {
                    "match_mapping_type": "double",
                    "match": "*_amount",
                    "mapping": {
                        "type": "double"
                    }
                }
            },
            {
                "score_as_float": {
                    "match_mapping_type": "string",
                    "match": "*_score",
                    "mapping": {
                        "type": "double"
                    }
                }
            },
            {
                "score_numeric_as_float": {
                    "match_mapping_type": "double",
                    "match": "*_score",
                    "mapping": {
                        "type": "double"
                    }
                }
            },
            {
                "strings": {
                    "match_mapping_type": "string",
                    "mapping": {
                        "type": "keyword",
                        "ignore_above": 32766
                    }
                }
            }
        ],
        "properties": {
            "company_id": {
                "type": "keyword"
            },
            "company_name_tag": {
                "type": "text",
                "fields": {
                    "raw": {
                        "type": "keyword",
                        "ignore_above": 32766
                    }
                }
            },
            "old_name_tag": {
                "type": "text",
                "fields": {
                    "raw": {
                        "type": "keyword",
                        "ignore_above": 32766
                    }
                }
            },
            "complex_company_name_tag": {
                "type": "text",
                "fields": {
                    "raw": {
                        "type": "keyword",
                        "ignore_above": 32766
                    }
                }
            },
            "administrative_division_tag": {
                "type": "text",
                "fields": {
                    "raw": {
                        "type": "keyword",
                        "ignore_above": 32766
                    }
                }
            },
            "location": {
                "type": "geo_point"
            },
            "bidding_no": {
                "type": "text"
            },
            "mobile_tag": {
                "type": "keyword"
            },
            "phone_tag": {
                "type": "keyword"
            },
            "email_tag": {
                "type": "keyword"
            },
            "qq_tag": {
                "type": "keyword"
            },
            "boost": {
                "type": "double"
            },
            "allow": {
                "type": "nested"
            },
            "annual_report": {
                "type": "nested"
            },
            "bidding": {
                "type": "nested"
            },
            "job": {
                "type": "nested"
            },
            "website": {
                "type": "nested"
            },
            "tiktok": {
                "type": "nested"
            },
            "weibo": {
                "type": "nested"
            },
            "wechat": {
                "type": "nested"
            },
            "applet": {
                "type": "nested"
            },
            "android": {
                "type": "nested"
            },
            "ios": {
                "type": "nested"
            },
            "extension": {
                "type": "nested"
            },
            "shop": {
                "type": "nested"
            },
            "patent": {
                "type": "nested"
            },
            "trademark": {
                "type": "nested"
            },
            "copyright_work": {
                "type": "nested"
            },
            "copyright_soft": {
                "type": "nested"
            },
            "cert": {
                "type": "nested"
            },
            "abnormal": {
                "type": "nested"
            },
            "court_announcement": {
                "type": "nested"
            },
            "business": {
                "type": "nested"
            },
            "company_brand": {
                "type": "nested"
            }
        }
    }
}

查询语句涉及的搜索函数

  • function_score

    • 针对核心召回字段相关属性进行自定义打分。例如企业有荣誉称号的可以加分
  • multi_match

    • 根据召回字段相关性设置不同召回策略。例如通过关键字阿里巴巴搜索,通过连续匹配,分词匹配等相关性来进行打分

实现难点

比如我们输入关键词阿里,如何能准确获取杭州阿里巴巴(中国)网络技术有限公司。首先公司名称是一个关键词的搜索来源。其次我们还需要有多个维度的数据来支撑搜索。比如公司的网站名称,app名称,软件著作权,微信公众号,招聘岗位名称等等。这些都可以用来作为搜索对象。通过对不同关键词设置不同权重来达到一个平衡比,最终得到你需要的结果。
GET company_test1/_search
{
  "from": 0,
  "size": 10,
  "timeout": "3000ms",
  "query": {
    "function_score": {
      "query": {
        "bool": {
          "must": [
            {
              "bool": {
                "should": [
                  {
                    "multi_match": {
                      "query": "菜鸟",
                      "fields": [
                        "app_name_tag^0.5",//字段加权
                        "company_name_tag^2.0",
                        "copy_right_soft_full_title_tag^0.5",
                        "copy_right_work_title_tag^0.5",
                        "ec_goods_title_tag^0.5",
                        "ec_shop_name_tag^1.0",
                        "job_title_tag^0.5",
                        "trademark_title_tag^0.5",
                        "website_title_tag^0.5",
                        "wechat_public_tag^1.0"
                      ],
                      "type": "best_fields",//获取最佳匹配字段的得分
                      "operator": "AND",//每个字段都需要满足条件
                      "slop": 0,//
                      "prefix_length": 0,
                      "max_expansions": 50,
                      "zero_terms_query": "NONE",
                      "auto_generate_synonyms_phrase_query": true,
                      "fuzzy_transpositions": true,
                      "minimum_should_match": 30%
                      "boost": 0
                    }
                  }
                ],
                "adjust_pure_negative": true,
                "boost": 1
              }
            }
          ],
          "should": [
            {
              "multi_match": {
                "query": "菜鸟",
                "fields": [
                  "app_name_tag^0.5",
                  "company_name_tag^2.0",
                  "copy_right_soft_full_title_tag^0.5",
                  "copy_right_work_title_tag^0.5",
                  "ec_goods_title_tag^0.5",
                  "ec_shop_name_tag^1.0",
                  "job_title_tag^0.5",
                  "trademark_title_tag^0.5",
                  "website_title_tag^0.5",
                  "wechat_public_tag^1.0"
                ],
                "type": "phrase",//使用最匹配的**短语**字段作为主要分数,并使用决胜局根据剩余字段匹配调整分数
                "operator": "OR",
                "slop": 0,
                "prefix_length": 0,
                "max_expansions": 50,
                "zero_terms_query": "NONE",
                "auto_generate_synonyms_phrase_query": true,
                "fuzzy_transpositions": true,
                "boost": 100
              }
            },
            {
              "multi_match": {
                "query": "菜鸟",
                "fields": [
                  "app_name_tag^0.5",
                  "company_name_tag^2.0",
                  "copy_right_soft_full_title_tag^0.5",
                  "copy_right_work_title_tag^0.5",
                  "ec_goods_title_tag^0.5",
                  "ec_shop_name_tag^1.0",
                  "job_title_tag^0.5",
                  "trademark_title_tag^0.5",
                  "website_title_tag^0.5",
                  "wechat_public_tag^1.0"
                ],
                "type": "most_fields",//使用匹配的布尔字段的总和来对查询进行评分
                "operator": "AND",
                "slop": 0,
                "prefix_length": 0,
                "max_expansions": 50,
                "zero_terms_query": "NONE",
                "auto_generate_synonyms_phrase_query": true,
                "fuzzy_transpositions": true,
                "boost": 5
              }
            },
            {
              "multi_match": {
                "query": "菜鸟",
                "fields": [
                  "app_name_tag^0.5",
                  "company_name_tag^2.0",
                  "copy_right_soft_full_title_tag^0.5",
                  "copy_right_work_title_tag^0.5",
                  "ec_goods_title_tag^0.5",
                  "ec_shop_name_tag^1.0",
                  "job_title_tag^0.5",
                  "trademark_title_tag^0.5",
                  "website_title_tag^0.5",
                  "wechat_public_tag^1.0"
                ],
                "type": "best_fields",
                "operator": "AND",
                "slop": 0,
                "prefix_length": 0,
                "max_expansions": 50,
                "zero_terms_query": "NONE",
                "auto_generate_synonyms_phrase_query": true,
                "fuzzy_transpositions": true,
                "boost": 0.5
              }
            }
          ],
          "adjust_pure_negative": true,
          "boost": 1
        }
      },
      "functions": [
        {
          "filter": {
            "bool": {
              "should": [
                {
                  "term": {
                    "honor_flag": {
                      "value": "2",
                      "boost": 1
                    }
                  }
                },
                {
                  "term": {
                    "honor_flag": {
                      "value": "12",
                      "boost": 1
                    }
                  }
                },
                {
                  "term": {
                    "honor_flag": {
                      "value": "16",
                      "boost": 1
                    }
                  }
                },
                {
                  "term": {
                    "honor_flag": {
                      "value": "17",
                      "boost": 1
                    }
                  }
                },
                {
                  "term": {
                    "honor_flag": {
                      "value": "19",
                      "boost": 1
                    }
                  }
                },
                {
                  "term": {
                    "honor_flag": {
                      "value": "23",
                      "boost": 1
                    }
                  }
                }
              ],
              "adjust_pure_negative": true,
              "boost": 1
            }
          },
          "weight": 2
        },
        {
          "filter": {
            "term": {
              "status_flag": {
                "value": "1",
                "boost": 1
              }
            }
          },
          "weight": 1.5
        },
        {
          "filter": {
            "bool": {
              "should": [
                {
                  "term": {
                    "company_type_flag": {
                      "value": "5",
                      "boost": 1
                    }
                  }
                },
                {
                  "term": {
                    "company_type_flag": {
                      "value": "6",
                      "boost": 1
                    }
                  }
                }
              ],
              "adjust_pure_negative": true,
              "boost": 1
            }
          },
          "weight": 0.5
        },
        {
          "filter": {
            "bool": {
              "should": [
                {
                  "term": {
                    "mobile_flag": {
                      "value": "1",
                      "boost": 1
                    }
                  }
                },
                {
                  "term": {
                    "phone_flag": {
                      "value": "1",
                      "boost": 1
                    }
                  }
                }
              ],
              "adjust_pure_negative": true,
              "boost": 1
            }
          },
          "weight": 1.1
        }
      ],
      "score_mode": "multiply",
      "max_boost": 3.4028235e+38,
      "boost": 1
    }
  },
  "track_total_hits": 2147483647
}
  • 3
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值