es_demand为索引
分页查询
GET /es_demand/_search
{
"from": 0,
"size": 5
}
条件查询
GET /es_demand/_search
{
"from": 0,
"size": 20,
"query": {
"bool": {
"must": [
{
"match": {
"id": "1438111557757308928"
}
}
]
}
}
}
查询索引的mapping
get /es_demand/_mapping
修改某个索引的mapping
可以先查询索引mapping,然后对应修改就可以了
PUT /es_demand/_mapping
{
"properties":{
"simple_description" : {
"type" : "text",
"boost":3.0,
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
评分配置
https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html
使用 boost 调整查询语句的权重
查询时可以用 boost 配置来增加权重,不过这里设置的对象并不是某个字段,而是查询语句。
点击率配置
filed_value_factor
热度、推荐权重等对评分的影响可以按权重相乘,刚好适合 filed_value_factor 这种类型的函数
点击率与标题查询评分结合
评分就会和click_times相关了
GET /es_demand/_search
{
"from": 0,
"size": 30,
"query": {
"function_score": {
"field_value_factor": {
"field": "click_times",
"factor": 10
},
"query": {
"match": {
"simple_description": "开发"
}
}
}
}
}
geo使用,常用于查询附近的数据等功能
https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-geo-distance-query.html
查询两个词的条件
POST /es_knowledge_base/_search
{
"from": 0,
"size": 20,
"query": {
"bool": {
"should": [
{"match": {
"title": "婚假有多少天?"
}},
{"match": {
"content": "婚假有多少天?"
}}
]
}
}
}
通过id修改某个字段
POST es_knowledge_base/_update/1562380768175591424
{
"doc":{
"click_times":2
}
}
清空索引
#清空索引
POST es_knowledge_base/_delete_by_query?refresh&slices=5&pretty
{
"query": {
"match_all": {}
}
}
in查询
POST /es_knowledge_base/_search
{
"from": 0,
"size": 5,
"query": {
"bool": {
"must": [
{"match": {
"title": "婚假有多少天?"
}},
{"match": {
"content": "婚假有多少天?"
}},
{
"match": {
"labels": "婚假"
}
}
],
"filter": [
{"terms": {
"sys_boss_dept_id": [-1,211]
}}
]
}
}
}
权重匹配查询
POST /es_knowledge_base/_search
{
"from": 0,
"size": 5,
"query": {
"bool": {
"must": [
{"match": {
"title": {
"query": "婚假有多少天",
"boost": 50
}
}},
{"match": {
"content": {
"query": "员工自入职日期开始",
"boost": 10
}
}},
{
"match": {
"labels": {
"query": "婚假",
"boost": 20
}
}
}
],
"filter": [
{"terms": {
"sys_boss_dept_id": [-1,211]
}}
]
}
}
}
批量更新字段
更新click_times为0的数据为click_times为1
POST /es_knowledge_base/_update_by_query
{
"query": {
"bool": {
"must": [
{"match": {
"click_times": "0"
}}
]
}
},
"script": {
"source": "ctx._source.click_times='1'",
"lang": "painless"
}
}
点击率配置
doc[‘click_times’].value代表点击率的值,
最后的结果为
原始评分*
(1.0 + doc[‘click_times’].value/(doc[‘click_times’].value+1))
GET /es_knowledge_base/_search
{
"from": 0,
"size": 30,
"query": {
"function_score": {
"script_score": {
"script":
{
"source": "1 + (((doc['hit_times'].value+1)/(doc['hit_times'].value+2)))"
}
},
"query": {
"bool": {
"must": [
{"match": {
"title": {
"query": "婚假有多少天",
"boost": 50
}
}},
{"match": {
"content": {
"query": "婚假有多少天",
"boost": 10
}
}}
],
"should": [
{"match": {
"labels": {
"query": "婚假",
"boost": 20
}
}}
],
"filter": [
{"terms": {
"sys_boss_dept_id": [-1,211]
}
}
]
}
}
}
}
}
查看索引的字段属性
GET /es_knowledge_base/_mapping
Text analysis
tokenization
一个词如何分成多个小分词
Normalization
定义词转规则,比如
Quick can be lowercased: quick.
foxes can be stemmed, or reduced to its root word: fox.
jump and leap are synonyms and can be indexed as a single word: jump.
analyzer例子
PUT my-index-000001
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "standard",
"char_filter": [
"my_char_filter"
]
}
},
"char_filter": {
"my_char_filter": {
"type": "pattern_replace",
"pattern": "(\\d+)-(?=\\d)",
"replacement": "$1_"
}
}
}
}
}
POST my-index-000001/_analyze
{
"analyzer": "my_analyzer",
"text": "My credit card is 123-456-789"
}
结果返回
[ My, credit, card, is, 123_456_789 ]
这个正则用java替换的话就是
Pattern p = Pattern.compile("(\\d+)-(?=\\d)");
Matcher m = p.matcher("My credit card is 123-456-789");
StringBuffer sb = new StringBuffer();
while (m.find()) {
m.appendReplacement(sb, "$1_");
}
m.appendTail(sb);
System.out.println(sb.toString());
\\d数字字符匹配。等效于 [0-9]。
?=看菜鸟的教程代表意思
$1相当于(\d+),这个意思是把-替换成_
逗号字符串拆分写法
Pattern p = Pattern.compile("(\\d+),(?=\\d+)");
Matcher m = p.matcher("1,2,3,4");
StringBuffer sb = new StringBuffer();
while (m.find()) {
m.appendReplacement(sb, "$1 ");
}
m.appendTail(sb);
System.out.println(sb.toString());
逗号拼接字符串前后添加逗号写法
Pattern p = Pattern.compile("(^\\d+),(\\d+,)*(\\d+$)");
Matcher m = p.matcher("1,222,333,411");
StringBuffer sb = new StringBuffer();
while (m.find()) {
m.appendReplacement(sb, ",$0,");
}
m.appendTail(sb);
System.out.println(sb.toString());
正则匹配
GET /knowledge_base/_search
{
"query": {
"wildcard": {
"sys_boss_id_str.keyword": {
"value": "*,211,*"
}
}
}
}
query配合正则
GET /es_knowledge_base/_search
{
"from": 0,
"size": 30,
"query": {
"function_score": {
"script_score": {
"script":
{
"source": "1 + (((doc['click_times'].value+1)/(doc['click_times'].value+2)))"
}
},
"query": {
"bool": {
"must": [
{"match": {
"title": {
"query": "管理",
"boost": 60
}
}},
{"match": {
"content": {
"query": "以部门在职",
"boost": 10
}
}},
{
"match": {
"status": {
"query": "5"
}
}
},
{
"regexp": {
"sys_boss_dept_ids.keyword": ".*,1000,.*"
}
}
],
"should": [
{"match": {
"labels": {
"query": "管理",
"boost": 20
}
}}
]
}
}
}
}
}
or配置至少匹配上一个or,
minimum_should_match是重点
GET /es_knowledge_base/_search
{
"from": 0,
"size": 30,
"query": {
"function_score": {
"script_score": {
"script":
{
"source": "1 + (((doc['click_times'].value+1)/(doc['click_times'].value+2)))"
}
},
"query": {
"bool": {
"must": [
{"match": {
"title": {
"query": "报销",
"boost": 60
}
}},
{"match": {
"content": {
"query": "报销",
"boost": 10
}
}},
{
"match": {
"labels": {
"query": "报销",
"boost": 10
}
}
},
{
"match": {
"status": {
"query": "1"
}
}
}
],
"should": [
{
"regexp": {
"sys_boss_dept_ids.keyword": ".*,1,.*"
}
},{
"regexp": {
"sys_boss_dept_ids.keyword": ".*,211,.*"
}
}
],
"minimum_should_match": 1
}
}
}
}
}
查询节点状态
GET /_nodes/stats?pretty
多个should配合
GET /es_knowledge_base/_search
{
"from": 0,
"size": 30,
"query": {
"function_score": {
"script_score": {
"script":
{
"source": "1 + (((doc['click_times'].value+1)/(doc['click_times'].value+2)))"
}
},
"query": {
"bool": {
"must": [
{
"match": {
"status": {
"query": "1"
}
}
},
{
"bool": {
"minimum_should_match": 1,
"should": [
{
"regexp": {
"sys_boss_dept_ids.keyword": ".*,536,.*"
}
},{
"regexp": {
"sys_boss_dept_ids.keyword": ".*,1280,.*"
}
}
]
}
}
],
"should": [
{
"match": {
"labels": {
"query": "产假有多少天",
"boost": 15
}
}
},
{"match": {
"content": {
"query": "产假有多少天",
"boost": 10
}
}},
{"match": {
"title": {
"query": "产假有多少天",
"boost": 70
}
}}
]
}
}
}
}
}
最终版本
GET /es_knowledge_base/_search
{
"from": 0,
"size": 100,
"query": {
"function_score": {
"script_score": {
"script":
{
"source": "1 + (((doc['click_times'].value+1)/(doc['click_times'].value+2)))"
}
},
"query": {
"bool": {
"must": [
{
"match": {
"status": {
"query": "1"
}
}
},
{
"bool": {
"minimum_should_match": 1,
"should": [
{
"bool": {
"must": [
{
"regexp": {
"sys_boss_dept_ids.keyword": ".*,536,.*"
}
},
{
"match": {
"dept_relation_type": "labour"
}
}
]
}
},
{
"bool": {
"must": [
{
"regexp": {
"sys_boss_dept_ids.keyword": ".*,1280,.*"
}
},
{
"match": {
"dept_relation_type": "manager"
}
}
]
}
}
]
}
}
],
"should": [
{
"match": {
"labels": {
"query": "产假有多少天",
"boost": 15
}
}
},
{"match": {
"content": {
"query": "产假有多少天",
"boost": 10
}
}},
{"match": {
"title": {
"query": "产假有多少天",
"boost": 70
}
}}
]
}
}
}
}
}