结构化数据
结构化搜索(Structured search) 是指对结构化数据的搜索
⽇期,布尔类型和数字都是结构化的
⽂本也可以是结构化的。
如彩⾊笔可以有离散的颜⾊集合: 红(red) 、 绿(green) 、 蓝(blue)
⼀个博客可能被标记了标签,例如,分布式(distributed) 和 搜索(search)
电商⽹站上的商品都有 UPCs(通⽤产品码 Universal Product Codes)或其他的唯⼀标
识,它们都需要遵从严格规定的、结构化的格式。
ES 中的结构化搜索
布尔,时间,⽇期和数字这类结构化数据:有精确的格式,我们可以对这些格式进⾏逻辑操
作。包括⽐较数字或时间的范围,或判定两个值的⼤⼩。
结构化的⽂本可以做精确匹配或者部分匹配
Term 查询 / Prefix 前缀查询
结构化结果只有“是”或“否”两个值
根据场景需要,可以决定结构化搜索是否需要打分
#对布尔值 match 查询,有算分
POST products/_search
{
"profile": "true",(搜索过程)
"explain": true,(算分过程)
"query": {
"term": {
"avaliable": true
}
}
}
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 0.35667494,
"hits" : [
{
"_shard" : "[products][0]",
"_node" : "FOnISXrCSHCxo6YKkUhf7g",
"_index" : "products",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.35667494,
"_source" : {
"price" : 10,
"avaliable" : true,
"date" : "2018-01-01",
"productID" : "XHDK-A-1293-#fJ3"
},
"_explanation" : {
"value" : 0.35667494,
"description" : "weight(avaliable:T in 0) [PerFieldSimilarity], result of:",
"details" : [
{
"value" : 0.35667494,
"description" : "score(freq=1.0), product of:",
"details" : [
{
"value" : 2.2,
"description" : "boost",
"details" : [ ]
},
{
"value" : 0.35667494,
"description" : "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
"details" : [
{
"value" : 3,
"description" : "n, number of documents containing term",
"details" : [ ]
},
{
"value" : 4,
"description" : "N, total number of documents with field",
"details" : [ ]
}
]
},
{
"value" : 0.45454544,
"description" : "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
"details" : [
{
"value" : 1.0,
"description" : "freq, occurrences of term within document",
"details" : [ ]
},
{
"value" : 1.2,
"description" : "k1, term saturation parameter",
"details" : [ ]
},
{
"value" : 0.75,
"description" : "b, length normalization parameter",
"details" : [ ]
},
{
"value" : 1.0,
"description" : "dl, length of field",
"details" : [ ]
},
{
"value" : 1.0,
"description" : "avgdl, average length of field",
"details" : [ ]
}
]
}
]
}
]
}
},
{
"_shard" : "[products][0]",
"_node" : "FOnISXrCSHCxo6YKkUhf7g",
"_index" : "products",
"_type" : "_doc",
"_id" : "2",
"_score" : 0.35667494,
"_source" : {
"price" : 20,
"avaliable" : true,
"date" : "2019-01-01",
"productID" : "KDKE-B-9947-#kL5"
},
"_explanation" : {
"value" : 0.35667494,
"description" : "weight(avaliable:T in 1) [PerFieldSimilarity], result of:",
"details" : [
{
"value" : 0.35667494,
"description" : "score(freq=1.0), product of:",
"details" : [
{
"value" : 2.2,
"description" : "boost",
"details" : [ ]
},
{
"value" : 0.35667494,
"description" : "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
"details" : [
{
"value" : 3,
"description" : "n, number of documents containing term",
"details" : [ ]
},
{
"value" : 4,
"description" : "N, total number of documents with field",
"details" : [ ]
}
]
},
{
"value" : 0.45454544,
"description" : "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
"details" : [
{
"value" : 1.0,
"description" : "freq, occurrences of term within document",
"details" : [ ]
},
{
"value" : 1.2,
"description" : "k1, term saturation parameter",
"details" : [ ]
},
{
"value" : 0.75,
"description" : "b, length normalization parameter",
"details" : [ ]
},
{
"value" : 1.0,
"description" : "dl, length of field",
"details" : [ ]
},
{
"value" : 1.0,
"description" : "avgdl, average length of field",
"details" : [ ]
}
]
}
]
}
]
}
},
{
"_shard" : "[products][0]",
"_node" : "FOnISXrCSHCxo6YKkUhf7g",
"_index" : "products",
"_type" : "_doc",
"_id" : "3",
"_score" : 0.35667494,
"_source" : {
"price" : 30,
"avaliable" : true,
"productID" : "JODL-X-1937-#pV7"
},
"_explanation" : {
"value" : 0.35667494,
"description" : "weight(avaliable:T in 2) [PerFieldSimilarity], result of:",
"details" : [
{
"value" : 0.35667494,
"description" : "score(freq=1.0), product of:",
"details" : [
{
"value" : 2.2,
"description" : "boost",
"details" : [ ]
},
{
"value" : 0.35667494,
"description" : "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
"details" : [
{
"value" : 3,
"description" : "n, number of documents containing term",
"details" : [ ]
},
{
"value" : 4,
"description" : "N, total number of documents with field",
"details" : [ ]
}
]
},
{
"value" : 0.45454544,
"description" : "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
"details" : [
{
"value" : 1.0,
"description" : "freq, occurrences of term within document",
"details" : [ ]
},
{
"value" : 1.2,
"description" : "k1, term saturation parameter",
"details" : [ ]
},
{
"value" : 0.75,
"description" : "b, length normalization parameter",
"details" : [ ]
},
{
"value" : 1.0,
"description" : "dl, length of field",
"details" : [ ]
},
{
"value" : 1.0,
"description" : "avgdl, average length of field",
"details" : [ ]
}
]
}
]
}
]
}
}
]
},
"profile" : {
"shards" : [
{
"id" : "[FOnISXrCSHCxo6YKkUhf7g][products][0]",
"searches" : [
{
"query" : [
{
"type" : "TermQuery",
"description" : "avaliable:T",
"time_in_nanos" : 147617,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 11061,
"match" : 0,
"next_doc_count" : 4,
"score_count" : 3,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 0,
"advance_count" : 0,
"score" : 8326,
"build_scorer_count" : 2,
"create_weight" : 81361,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 46859
}
}
],
"rewrite_time" : 2736,
"collector" : [
{
"name" : "CancellableCollector",
"reason" : "search_cancelled",
"time_in_nanos" : 33399,
"children" : [
{
"name" : "SimpleTopScoreDocCollector",
"reason" : "search_top_hits",
"time_in_nanos" : 20283
}
]
}
]
}
],
"aggregations" : [ ]
}
]
}
}
#对布尔值,通过constant score 转成 filtering,没有算分
POST products/_search
{
"profile": "true",
"explain": true,
"query": {
"constant_score": {
"filter": {
"term": {
"avaliable": true
}
}
}
}
}
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_shard" : "[products][0]",
"_node" : "FOnISXrCSHCxo6YKkUhf7g",
"_index" : "products",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"price" : 10,
"avaliable" : true,
"date" : "2018-01-01",
"productID" : "XHDK-A-1293-#fJ3"
},
"_explanation" : {
"value" : 1.0,
"description" : "ConstantScore(avaliable:T)",
"details" : [ ]
}
},
{
"_shard" : "[products][0]",
"_node" : "FOnISXrCSHCxo6YKkUhf7g",
"_index" : "products",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"price" : 20,
"avaliable" : true,
"date" : "2019-01-01",
"productID" : "KDKE-B-9947-#kL5"
},
"_explanation" : {
"value" : 1.0,
"description" : "ConstantScore(avaliable:T)",
"details" : [ ]
}
},
{
"_shard" : "[products][0]",
"_node" : "FOnISXrCSHCxo6YKkUhf7g",
"_index" : "products",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.0,
"_source" : {
"price" : 30,
"avaliable" : true,
"productID" : "JODL-X-1937-#pV7"
},
"_explanation" : {
"value" : 1.0,
"description" : "ConstantScore(avaliable:T)",
"details" : [ ]
}
}
]
},
"profile" : {
"shards" : [
{
"id" : "[FOnISXrCSHCxo6YKkUhf7g][products][0]",
"searches" : [
{
"query" : [
{
"type" : "ConstantScoreQuery",
"description" : "ConstantScore(avaliable:T)",
"time_in_nanos" : 110754,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 4078,
"match" : 0,
"next_doc_count" : 4,
"score_count" : 3,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 0,
"advance_count" : 0,
"score" : 2485,
"build_scorer_count" : 2,
"create_weight" : 39319,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 64862
},
"children" : [
{
"type" : "TermQuery",
"description" : "avaliable:T",
"time_in_nanos" : 69990,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 4078,
"match" : 0,
"next_doc_count" : 4,
"score_count" : 0,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 0,
"advance_count" : 0,
"score" : 0,
"build_scorer_count" : 2,
"create_weight" : 21195,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 44710
}
}
]
}
],
"rewrite_time" : 3907,
"collector" : [
{
"name" : "CancellableCollector",
"reason" : "search_cancelled",
"time_in_nanos" : 23863,
"children" : [
{
"name" : "SimpleTopScoreDocCollector",
"reason" : "search_top_hits",
"time_in_nanos" : 13055
}
]
}
]
}
],
"aggregations" : [ ]
}
]
}
}
#数字类型 Term
POST products/_search
{
"profile": "true",
"explain": true,
"query": {
"term": {
"price": 30
}
}
}
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_shard" : "[products][0]",
"_node" : "FOnISXrCSHCxo6YKkUhf7g",
"_index" : "products",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.0,
"_source" : {
"price" : 30,
"avaliable" : true,
"productID" : "JODL-X-1937-#pV7"
},
"_explanation" : {
"value" : 1.0,
"description" : "price:[30 TO 30]",
"details" : [ ]
}
},
{
"_shard" : "[products][0]",
"_node" : "FOnISXrCSHCxo6YKkUhf7g",
"_index" : "products",
"_type" : "_doc",
"_id" : "4",
"_score" : 1.0,
"_source" : {
"price" : 30,
"avaliable" : false,
"productID" : "QQPX-R-3956-#aD8"
},
"_explanation" : {
"value" : 1.0,
"description" : "price:[30 TO 30]",
"details" : [ ]
}
}
]
},
"profile" : {
"shards" : [
{
"id" : "[FOnISXrCSHCxo6YKkUhf7g][products][0]",
"searches" : [
{
"query" : [
{
"type" : "PointRangeQuery",
"description" : "price:[30 TO 30]",
"time_in_nanos" : 65249,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 4252,
"match" : 0,
"next_doc_count" : 3,
"score_count" : 2,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 0,
"advance_count" : 0,
"score" : 1840,
"build_scorer_count" : 2,
"create_weight" : 2899,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 56250
}
}
],
"rewrite_time" : 2565,
"collector" : [
{
"name" : "CancellableCollector",
"reason" : "search_cancelled",
"time_in_nanos" : 23396,
"children" : [
{
"name" : "SimpleTopScoreDocCollector",
"reason" : "search_top_hits",
"time_in_nanos" : 12622
}
]
}
]
}
],
"aggregations" : [ ]
}
]
}
}
#数字类型 terms
POST products/_search
{
"query": {
"constant_score": {
"filter": {
"terms": {
"price": [
"20",
"30"
]
}
}
}
}
}
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "products",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"price" : 20,
"avaliable" : true,
"date" : "2019-01-01",
"productID" : "KDKE-B-9947-#kL5"
}
},
{
"_index" : "products",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.0,
"_source" : {
"price" : 30,
"avaliable" : true,
"productID" : "JODL-X-1937-#pV7"
}
},
{
"_index" : "products",
"_type" : "_doc",
"_id" : "4",
"_score" : 1.0,
"_source" : {
"price" : 30,
"avaliable" : false,
"productID" : "QQPX-R-3956-#aD8"
}
}
]
}
}
#数字 Range 查询
gt ⼤于
lt ⼩于
gte ⼤于等于
lte ⼩于等于
GET products/_search
{
"query" : {
"constant_score" : {
"filter" : {
"range" : {
"price" : {
"gte" : 20,
"lte" : 30
}
}
}
}
}
}
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "products",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"price" : 20,
"avaliable" : true,
"date" : "2019-01-01",
"productID" : "KDKE-B-9947-#kL5"
}
},
{
"_index" : "products",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.0,
"_source" : {
"price" : 30,
"avaliable" : true,
"productID" : "JODL-X-1937-#pV7"
}
},
{
"_index" : "products",
"_type" : "_doc",
"_id" : "4",
"_score" : 1.0,
"_source" : {
"price" : 30,
"avaliable" : false,
"productID" : "QQPX-R-3956-#aD8"
}
}
]
}
}
# 日期 range
y 年
M 月
w 周
d 天
H / h 小时
m 分钟
s 秒
POST products/_search
{
"query" : {
"constant_score" : {
"filter" : {
"range" : {
"date" : {
"gte" : "now-1y"
}
}
}
}
}
}
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
}
}
范围查询中的日期格式
默认使用设置在日期字段中的format参数解析格式化日期,但是,此参数可以通过在范围查询中设置format参数来进行覆盖。
POST products/_search
{
"query": {
"range" : {
"date" : {
"gte": "2017",
"lt": "2019",
"format": "dd/MM/yyyy||yyyy"
}
}
}
}
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "products",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"price" : 10,
"avaliable" : true,
"date" : "2018-01-01",
"productID" : "XHDK-A-1293-#fJ3"
}
}
]
}
}
范围查询中的时区
通过在日期值中指定时区或使用time_zone参数指定时区,可以转换日期时区。
POST products/_search
{
"query": {
"range" : {
"timestamp" : {
"gte": "2015-01-01 00:00:00",
"lte": "now",
"time_zone": "+01:00"
}
}
}
}
gte参数的日期值将会被转化为2014-12-31T23:00:00 UTC
now不会被time_zone参数影响(日期必须存储为UTC)
#exists查询(空值排除)
POST products/_search
{
"query": {
"constant_score": {
"filter": {
"exists": {
"field": "date"
}
}
}
}
}
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "products",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"price" : 10,
"avaliable" : true,
"date" : "2018-01-01",
"productID" : "XHDK-A-1293-#fJ3"
}
},
{
"_index" : "products",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"price" : 20,
"avaliable" : true,
"date" : "2019-01-01",
"productID" : "KDKE-B-9947-#kL5"
}
}
]
}
}
#(不包含日期的)
POST products/_search
{
"query": {
"constant_score": {
"filter": {
"bool": {
"must_not":{
"exists":{
"field":"date"
}
}
}
}
}
}
}
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "products",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.0,
"_source" : {
"price" : 30,
"avaliable" : true,
"productID" : "JODL-X-1937-#pV7"
}
},
{
"_index" : "products",
"_type" : "_doc",
"_id" : "4",
"_score" : 1.0,
"_source" : {
"price" : 30,
"avaliable" : false,
"productID" : "QQPX-R-3956-#aD8"
}
}
]
}
}
#处理多值字段,term 查询是包含,而不是等于
POST movies/_search
{
"query": {
"constant_score": {
"filter": {
"term": {
"genre.keyword": "Comedy"
}
}
}
}
}
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3757,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "movies",
"_type" : "_doc",
"_id" : "583",
"_score" : 1.0,
"_source" : {
"genre" : [
"Comedy",
"Drama"
],
"title" : "Dear Diary",
"@version" : "1",
"id" : "583",
"year" : 0
}
},
{
"_index" : "movies",
"_type" : "_doc",
"_id" : "585",
"_score" : 1.0,
"_source" : {
"genre" : [
"Comedy"
],
"title" : "Brady Bunch Movie, The",
"@version" : "1",
"id" : "585",
"year" : 1995
}
},
{
"_index" : "movies",
"_type" : "_doc",
"_id" : "586",
"_score" : 1.0,
"_source" : {
"genre" : [
"Children",
"Comedy"
],
"title" : "Home Alone",
"@version" : "1",
"id" : "586",
"year" : 1990
}
},
{
"_index" : "movies",
"_type" : "_doc",
"_id" : "587",
"_score" : 1.0,
"_source" : {
"genre" : [
"Comedy",
"Drama",
"Fantasy",
"Romance",
"Thriller"
],
"title" : "Ghost",
"@version" : "1",
"id" : "587",
"year" : 1990
}
},
{
"_index" : "movies",
"_type" : "_doc",
"_id" : "588",
"_score" : 1.0,
"_source" : {
"genre" : [
"Adventure",
"Animation",
"Children",
"Comedy",
"Musical"
],
"title" : "Aladdin",
"@version" : "1",
"id" : "588",
"year" : 1992
}
},
{
"_index" : "movies",
"_type" : "_doc",
"_id" : "597",
"_score" : 1.0,
"_source" : {
"genre" : [
"Comedy",
"Romance"
],
"title" : "Pretty Woman",
"@version" : "1",
"id" : "597",
"year" : 1990
}
},
{
"_index" : "movies",
"_type" : "_doc",
"_id" : "600",
"_score" : 1.0,
"_source" : {
"genre" : [
"Action",
"Comedy",
"Crime"
],
"title" : "Love and a .45",
"@version" : "1",
"id" : "600",
"year" : 1994
}
},
{
"_index" : "movies",
"_type" : "_doc",
"_id" : "608",
"_score" : 1.0,
"_source" : {
"genre" : [
"Comedy",
"Crime",
"Drama",
"Thriller"
],
"title" : "Fargo",
"@version" : "1",
"id" : "608",
"year" : 1996
}
},
{
"_index" : "movies",
"_type" : "_doc",
"_id" : "612",
"_score" : 1.0,
"_source" : {
"genre" : [
"Comedy"
],
"title" : "Pallbearer, The",
"@version" : "1",
"id" : "612",
"year" : 1996
}
},
{
"_index" : "movies",
"_type" : "_doc",
"_id" : "615",
"_score" : 1.0,
"_source" : {
"genre" : [
"Comedy",
"Drama"
],
"title" : "Bread and Chocolate",
"@version" : "1",
"id" : "615",
"year" : 0
}
}
]
}
}
#字符类型 terms
POST products/_search
{
"query": {
"constant_score": {
"filter": {
"terms": {
"productID.keyword": [
"QQPX-R-3956-#aD8",
"JODL-X-1937-#pV7"
]
}
}
}
}
}
{
"took" : 20,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "products",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.0,
"_source" : {
"price" : 30,
"avaliable" : true,
"productID" : "JODL-X-1937-#pV7"
}
},
{
"_index" : "products",
"_type" : "_doc",
"_id" : "4",
"_score" : 1.0,
"_source" : {
"price" : 30,
"avaliable" : false,
"productID" : "QQPX-R-3956-#aD8"
}
}
]
}
}