创建一个test索引,指定主从分片,指定name,age的类型
不指定会默认给定类型
PUT /test
{
"settings": {
"number_of_shards": 1
, "number_of_replicas": 1
},
"mappings": {
"properties":{
"name":{"type":"text"},
"age":{"type":"integer"}
}
}
}
指定enname这个字段的分词解析器(不指定默认是 standard分词器)
PUT /test
{
"settings": {
"number_of_shards": 1
, "number_of_replicas": 1
},
"mappings": {
"properties":{
"name":{"type":"text"},
"enname":{"type": "text","analyzer": "english"},
"age":{"type":"integer"}
}
}
}
查询字段内容被分析成哪些词语
//查看name的值 被解析成哪些关键字
GET /test2/_validate/query?explain
{
"query": {
"match": {"name":"赵云兄"}
}
}
//查看my name is huhu and i like eatting 被解析成哪些关键字
GET /test/_analyze
{
"field": "enname"
, "text": "my name is huhu and i like eatting"
}
新增
PUT /test/_doc/1
{
"name":"赵云",
"age":"87"
}
PUT /test/_doc/2
{
"name":"赵2云",
"enname":"my name is huhu and i like eatting",
"age":"66",
"sex":"男"
}
批量新增
PUT /_bulk
{"index": {"_index": "test","_type": "_doc","_id": 6}}
{"name": "张飞","age":"34"}
{"index": {"_index": "test","_type": "_doc","_id": 7}}
{"name": "关羽","age":"36"}
全量更新(导致age丢失)
PUT /test/_doc/1
{
"name":"hzh"
}
//批量更新,会导致age丢失
PUT /_bulk
{"index":{"_index" : "test","_type" : "_doc","_id" : "4"}}
{"name": "黄阿喵"}
{"index":{"_index" : "test","_type" : "_doc","_id" : "5"}}
{"name": "黄阿虎", "age": "48"}
局部字段更新(age不会丢失)
POST /test/_doc/1/_update
{
"doc":{
"name":"hzh"
}
}
//把名字含’张‘,’飞‘的 修改为name=黄忠,age=30。注意:这里的张飞会被分词为”张“和”飞“,所有有张的,飞的文档都会被更新到,跟delete的有区别
POST /test/_update_by_query
{
"query": {
"bool": {
"must": [
{
"match": {
"name": "张飞"
}
}
]
}
},
"script": {
"inline": "ctx._source['name'] = '黄忠';ctx._source['age'] = '30'"
}
}
删除
//删除一整个 test索引
DELETE /test
//删除 _id 是1的内容
DELETE /test/_doc/1
//把名字是张飞的全部删除,精确匹配张飞这个条件再删除, must , must_not
POST /test/_delete_by_query
{
"query": {
"bool": {
"must": [
{
"match": {
"name": "张飞"
}
}
]
}
}
}
简单查询
//查询test这个索引下的所有内容
GET /test/_search
//查询test这个索引下的_type为_doc,id为1的内容
//类比数据库 test是库,_doc是表,1是id
GET /test/_doc/1
//查询eated,eatting 会被analyzer 解析成 eat 去匹配,所以能匹配到
GET /test/_search
{
"query": {
"match": {"enname":"eated"}
}
}
复杂查询
1、operator
”赵天“ 被解析分成"赵",“天”,这两个字都去都匹配得到才返回结果
1、//”赵天“ 被解析分成"赵","天",这两个字都去都匹配得到才返回结果
GET /test2/_search
{
"query": {
"match": {
"name":{
"query": "赵天",
"operator": "and" //这里也可以or,多个字词都去匹配只要一个匹配上就返回结果,不写默认就是or
}
}
}
}
2、minimum_should_match
//”赵天云“ 被解析分成"赵",“天”,“云”,这3个字都去都匹配,至少2个都匹配得到才返回结果
GET /test2/_search
{
"query": {
"match": {
"name":{
"query": "赵天云",
"operator": "or",
"minimum_should_match": 2
}
}
}
}
3、term
"赵云"不会被分词,直接拿着"赵云"去匹配,而第一条插入的数据”赵云“是被分词拆成 ”赵“和”云“,所以没有匹配上
3、//term查询,"赵云"不会被分词,直接拿着"赵云"去匹配,而第一条插入的数据”赵云“是被分词拆成 ”赵“和”云“,所以没有匹配上
GET /test/_search
{
"query": {
"term": {
"name": {
"value":"赵云"
}
}
}
}
4、multi_match
指定去多个字段匹配
GET /test/_search
{
"query": {
"multi_match": {
"query": "赵天云",
"fields": ["name","age"]
}
}
}
//可以给name加权重,_score得分会更高,查询返回的结果排在更前面 如:”name^10“,
//原来boost是2.2,现在boost是22
GET /test/_search
{
"query": {
"multi_match": {
"query": "赵天云",
"fields": ["name^10","age"]
}
}
}
//加 "explain": true, 查看执行计划
GET /test/_search
{
"explain": true,
"query": {
"multi_match": {
"query": "赵天云",
"fields": ["name^10","age"]
}
}
}
//cross_fields把匹配到的最高分作为得分,most_fields把每个词得分相加作为得分
//非常适合字词为主的搜索
GET /test/_search
{
"query": {
"multi_match": {
"query": "赵天云",
"fields": ["name","age"],
"type": "cross_fields"
}
}
}
5、query_string AND 必须大写,不然就是or
非常适合手动分词的场景
GET /test/_search
{
"query": {
"query_string": {
"default_field": "name",
"query": "赵 AND 六"
}
}
}
6、bool查询 should表示多个match字段有一个匹配上就可以返回,字段会被分词去匹配
must表示多个match字段都匹配上才可以返回,字段会被分词去匹配
GET /test/_search
{
"query": {
"bool": {
"should": [
{
"match": {
"name": "赵云"
}
},
{
"match": {
"age": "20"
}
}
]
}
}
}
7、范围查询,查询age在50到90之间
//这种过滤查询_score全部是0.0
GET /test/_search
{
"query": {
"bool": {
"filter": {
"range": {
"age": {
"gte": 50,
"lte": 90
}
}
}
}
}
}
//想要有评分的过滤查询
//1、should ,如果name匹配不到,通过age匹配得到也会查出数据,注意这个点
//2、must,如果name和age 都匹配得到才会查出数据
GET /test2/_search
{
"query": {
"bool": {
"must": [
{"match": {
"name": "赵云"
}}
],
"filter": {
"range": {
"age": {
"gte": 50,
"lte": 90
}
}
}
}
}
}
10、explain
elasticsearch默认根据_score的大小排序,那么_score 是怎么计算得来的呢?
tf 指一个词在一篇文档出现的次数,次数越多,tf值越大(这个 词越多表明越相关)
idf 指一个词在多少篇文档中出现过,次数越多值越小 (这个词 所有的文档都有,那这个就没有意义)
_score = boost * idf * tf 具体值看查出来的结果,即
1.0935872 = 2.2 * 0.9444616 * 0.5263158
GET /test2/_search
{
"explain": true,
"query": {
"match": {"name":"赵天"}
}
}
//查看分析
{
"took" : 4,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 1.0935872,
"hits" : [
{
"_shard" : "[test2][0]",
"_node" : "V4kZbe4FQMWAgjecDWNUPQ",
"_index" : "test2",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0935872,
"_source" : {
"name" : "赵云",
"age" : "电视机坏了"
},
"_explanation" : {
"value" : 1.0935872,
"description" : "sum of:",
"details" : [
{
"value" : 1.0935872,
"description" : "weight(name:赵 in 0) [PerFieldSimilarity], result of:",
"details" : [
{
"value" : 1.0935872,
"description" : "score(freq=1.0), computed as boost * idf * tf from:",
//这里说明boost是2.2
"details" : [
{
"value" : 2.2,
"description" : "boost",
"details" : [ ]
},
//这里说明idf是 0.9444616
{
"value" : 0.9444616,
"description" : "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
"details" : [
{
"value" : 3,
"description" : "n, number of documents containing term",
"details" : [ ]
},
{
"value" : 8,
"description" : "N, total number of documents with field",
"details" : [ ]
}
]
},
//这里说明tf是 0.5263158
{
"value" : 0.5263158,
"description" : "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
"details" : [
{
"value" : 1.0,
"description" : "freq, occurrences of term within document",
"details" : [ ]
},
{
"value" : 1.2,
"description" : "k1, term saturation parameter",
"details" : [ ]
},
{
"value" : 0.75,
"description" : "b, length normalization parameter",
"details" : [ ]
},
{
"value" : 2.0,
"description" : "dl, length of field",
"details" : [ ]
},
{
"value" : 3.0,
"description" : "avgdl, average length of field",
"details" : [ ]
}
]
}
]
}
]
}
]
}
},
{
"_shard" : "[test2][0]",
"_node" : "V4kZbe4FQMWAgjecDWNUPQ",
"_index" : "test2",
"_type" : "_doc",
"_id" : "2",
"_score" : 0.9444616,
"_source" : {
"name" : "赵云1",
"age" : "电视机"
},
"_explanation" : {
"value" : 0.9444616,
"description" : "sum of:",
"details" : [
{
"value" : 0.9444616,
"description" : "weight(name:赵 in 4) [PerFieldSimilarity], result of:",
"details" : [
{
"value" : 0.9444616,
"description" : "score(freq=1.0), computed as boost * idf * tf from:",
"details" : [
{
"value" : 2.2,
"description" : "boost",
"details" : [ ]
},
{
"value" : 0.9444616,
"description" : "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
"details" : [
{
"value" : 3,
"description" : "n, number of documents containing term",
"details" : [ ]
},
{
"value" : 8,
"description" : "N, total number of documents with field",
"details" : [ ]
}
]
},
{
"value" : 0.45454544,
"description" : "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
"details" : [
{
"value" : 1.0,
"description" : "freq, occurrences of term within document",
"details" : [ ]
},
{
"value" : 1.2,
"description" : "k1, term saturation parameter",
"details" : [ ]
},
{
"value" : 0.75,
"description" : "b, length normalization parameter",
"details" : [ ]
},
{
"value" : 3.0,
"description" : "dl, length of field",
"details" : [ ]
},
{
"value" : 3.0,
"description" : "avgdl, average length of field",
"details" : [ ]
}
]
}
]
}
]
}
]
}
},
{
"_shard" : "[test2][0]",
"_node" : "V4kZbe4FQMWAgjecDWNUPQ",
"_index" : "test2",
"_type" : "_doc",
"_id" : "3",
"_score" : 0.9444616,
"_source" : {
"name" : "赵云5",
"age" : "电视机"
},
"_explanation" : {
"value" : 0.9444616,
"description" : "sum of:",
"details" : [
{
"value" : 0.9444616,
"description" : "weight(name:赵 in 5) [PerFieldSimilarity], result of:",
"details" : [
{
"value" : 0.9444616,
"description" : "score(freq=1.0), computed as boost * idf * tf from:",
"details" : [
{
"value" : 2.2,
"description" : "boost",
"details" : [ ]
},
{
"value" : 0.9444616,
"description" : "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
"details" : [
{
"value" : 3,
"description" : "n, number of documents containing term",
"details" : [ ]
},
{
"value" : 8,
"description" : "N, total number of documents with field",
"details" : [ ]
}
]
},
{
"value" : 0.45454544,
"description" : "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
"details" : [
{
"value" : 1.0,
"description" : "freq, occurrences of term within document",
"details" : [ ]
},
{
"value" : 1.2,
"description" : "k1, term saturation parameter",
"details" : [ ]
},
{
"value" : 0.75,
"description" : "b, length normalization parameter",
"details" : [ ]
},
{
"value" : 3.0,
"description" : "dl, length of field",
"details" : [ ]
},
{
"value" : 3.0,
"description" : "avgdl, average length of field",
"details" : [ ]
}
]
}
]
}
]
}
]
}
}
]
}
}
复制索引内容
把数据从test2导一份到test, 两索引的字段类型要一致,不一致的会导不过去
POST _reindex
{
"source": {
"index": "test2",
"size":100
},
"dest": {
"index": "test"
}
}
持续更新中