转载自:https://blog.csdn.net/chengyuqiang/column/info/18392,ES版本号6.3.0
转载自:https://blog.csdn.net/qq_23536449/article/details/91364242
mapping概述
ElasticSearch提供了丰富的映射参数对字段的映射进行参数设计,比如字段的分词器、字段权重、日期格式、检索模型等。
analyzer
指定分词器(分析器更合理),随索引和查询都有效。如下,指定ik分词的配置
(1)定义索引
DELETE my_index
PUT my_index
(2)ik_smart分词
GET my_index/_analyze
{
"analyzer": "ik_smart",
"text": "安徽省长江流域"
}
返回
{
"tokens": [
{
"token": "安徽省",
"start_offset": 0,
"end_offset": 3,
"type": "CN_WORD",
"position": 0
},
{
"token": "长江流域",
"start_offset": 3,
"end_offset": 7,
"type": "CN_WORD",
"position": 1
}
]
}
(3)定义mapping
POST my_index/fulltext/_mapping
{
"properties": {
"content":{
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word"
}
}
}
(4)插入数据
POST my_index/fulltext/1
{"content":"美国留给伊拉克的是个烂摊子吗"}
POST my_index/fulltext/2
{"content":"公安部:各地校车将享最高路权"}
POST my_index/fulltext/3
{"content":"中韩渔警冲突调查:韩警平均每天扣1艘中国渔船"}
POST my_index/fulltext/4
{"content":"中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"}
(5)查询
POST my_index/fulltext/_search
{
"query": {
"match": {
"content": "中国"
}
}
}
返回
{
"took": 67,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0.6489038,
"hits": [
{
"_index": "my_index",
"_type": "fulltext",
"_id": "4",
"_score": 0.6489038,
"_source": {
"content": "中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"
}
},
{
"_index": "my_index",
"_type": "fulltext",
"_id": "3",
"_score": 0.2876821,
"_source": {
"content": "中韩渔警冲突调查:韩警平均每天扣1艘中国渔船"
}
}
]
}
}
normalizer
normalizer用于解析前的标准化配置,比如把所有字符串转换为小写
(1)创建索引
DELETE my_index
PUT my_index
{
"settings": {
"analysis": {
"normalizer":{
"my_normalizer":{
"type":"custom",
"char_filter":[],
"filter":["lowercase","asciifolding"]
}
}
}
},
"mappings": {
"type":{
"properties": {
"foo":{
"type": "keyword",
"normalizer": "my_normalizer"
}
}
}
}
}
(2)插入数据
PUT my_index/type/1
{"foo": "BÀR"}
PUT my_index/type/2
{"foo": "bar"}
PUT my_index/type/3
{"foo": "baz"}
POST my_index/_refresh
(3)查询
GET my_index/_search
{
"query": {
"match": {
"foo": "BAR"
}
}
}
返回
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0.2876821,
"hits": [
{
"_index": "my_index",
"_type": "type",
"_id": "2",
"_score": 0.2876821,
"_source": {
"foo": "bar"
}
},
{
"_index": "my_index",
"_type": "type",
"_id": "1",
"_score": 0.2876821,
"_source": {
"foo": "BÀR"
}
}
]
}
}
boost
我们可以通过指定一个boost值来控制每个查询子句的相对权重,该值默认为1,一个大于1的boost会增加该查询子句的相对权重
(1)创建索引
DELETE my_index
PUT my_index
PUT my_index/my_type/1
{
"title":"quick brown fox"
}
(2)查询
POST _search
{
"query": {
"match": {
"title": {
"query": "qucik brown fox",
"boost":2
}
}
}
}
返回
{
"took": 83,
"timed_out": false,
"_shards": {
"total": 39,
"successful": 39,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1.1507283,
"hits": [
{
"_index": "my_index",
"_type": "my_type",
"_id": "1",
"_score": 1.1507283,
"_source": {
"title": "quick brown fox"
}
}
]
}
}
boost参数被用来增加一个子句的相对权重(当boost大于1时),或者减小相对权重(当boost介于0到1),但是增加或者减小不是线性的。换言之,boost设置为2并不会让最终的_score加倍。相反,新的_score会在使用了boost后被归一化(Normalized)。每种查询都有自己的归一化算法(Normalization Algorithm)。但是能够说一个高的boost值会产生一个高的_score。
coerce
coerce属性用于清除脏数据,coerce的默认值是true。整形数字5有可能被写成“5”或者浮点数字5.0,coerce属性可以用来清除脏数据。
字符串会被强制转换为整数
浮点数被强制转换为整数
(1)重新创建my_index
DELETE my_index
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"number_one":{
"type": "integer"
},
"number_two":{
"type": "integer",
"coerce":false
}
}
}
}
}
(2)写入一条测试文档
PUT my_index/my_type/1
{
"number_one":"10"
}
(3)写入另外一条测试文档
PUT my_index/my_type/1
{
"number_two":"10"
}
返回
{
"error": {
"root_cause": [
{
"type": "mapper_parsing_exception",
"reason": "failed to parse [number_two]"
}
],
"type": "mapper_parsing_exception",
"reason": "failed to parse [number_two]",
"caused_by": {
"type": "illegal_argument_exception",
"reason": "Integer value passed as String"
}
},
"status": 400
}
copy_to
copy_to属性用于配置自定义的_all字段。换言之,就是多个字段可以被合并成一个超级字段。比如first_name和last_name可以被合并为full_name字段
(1)创建索引插入数据
DELETE my_index
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"first_name":{
"type": "text",
"copy_to": "full_name"
},
"last_name":{
"type": "text",
"copy_to": "full_name"
},
"full_name":{
"type": "text"
}
}
}
}
}
PUT my_index/my_type/1
{
"first_name":"John",
"last_name":"Smith"
}
(2)查询
GET my_index/_search
{
"query": {
"match": {
"full_name": {
"query": "John Smith",
"operator": "and"
}
}
}
}
返回
{
"took": 15,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.5753642,
"hits": [
{
"_index": "my_index",
"_type": "my_type",
"_id": "1",
"_score": 0.5753642,
"_source": {
"first_name": "John",
"last_name": "Smith"
}
}
]
}
}
doc_values
doc_values是为了加快排序、聚合操作,在建立倒排索引的时候,额外增加一个列式存储映射,是一个空间换时间的做法。默认是开启的,对于不确定聚合或者排序的字段可以关闭
(1)创建索引
DELETE my_index
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"status_code":{
"type": "keyword"
},
"session_id":{
"type": "keyword",
"doc_values":false
}
}
}
}
}
dynamic
dynamic属性用于检测新发现的字段,有三个取值:
true:新发现的字段添加到映射中。(默认)
flase:新检测的字段被忽略。必须显式添加新字段。
strict:如果检测到新字段,就会引发异常并拒绝文档
(1)新建索引
取值为strict,非布尔值要加引号
DELETE my_index
PUT my_index
{
"mappings": {
"my_type": {
"dynamic":"strict",
"properties": {
"title":{"type": "text"}
}
}
}
}
(2)插入新文档
PUT my_index/my_type/1
{
"title":"test",
"content":"test dynamic"
}
返回
{
"error": {
"root_cause": [
{
"type": "strict_dynamic_mapping_exception",
"reason": "mapping set to strict, dynamic introduction of [content] within [my_type] is not allowed"
}
],
"type": "strict_dynamic_mapping_exception",
"reason": "mapping set to strict, dynamic introduction of [content] within [my_type] is not allowed"
},
"status": 400
}
enabled
ElasticSearch默认会索引所有的字段,enabled设为false的字段,es会跳过字段内容,该字段只能从_source中获取,但是不可搜索。而且字段可以是任意类型。
(1)新建索引,插入文档
DELETE my_index
PUT my_index
{
"mappings": {
"my_type":{
"properties":{
"name":{"enabled":false}
}
}
}
}
PUT my_index/my_type/1
{
"title":"test enabled",
"name":"chengyuqiang"
}
(2)查看文档
GET my_index/my_type/1
返回
{
"_index": "my_index",
"_type": "my_type",
"_id": "1",
"_version": 1,
"found": true,
"_source": {
"title": "test enabled",
"name": "chengyuqiang"
}
}
(3)搜索字段
GET my_index/_search
{
"query": {
"match": {
"name": "chengyuqiang"
}
}
}
返回
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 0,
"max_score": null,
"hits": []
}
}
ignore_above
ignore_above用于指定字段索引和存储长度最大值,超过最大值会被忽略
(1)创建索引添加数据
DELETE my_index
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"message":{
"type": "keyword",
"ignore_above": 20
}
}
}
}
}
PUT my_index/my_type/1
{
"message":"Syntax error"
}
PUT my_index/my_type/2
{
"message": "Syntax error with some long stacktrace"
}
(2)查询
GET my_index/_search
{
"size": 0,
"aggs": {
"messages": {
"terms": {
"field": "message"
}
}
}
}
返回结果
{
"took": 326,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"messages": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Syntax error",
"doc_count": 1
}
]
}
}
}
mpping中指定了ignore_above字段的最大长度为20,第一个文档的字段长小于20,因此索引成功,第二个超过20,因此不索引,返回结果只有“Syntax error”。
ignore_malformed(忽略不规则的)
ignore_malformed可以忽略不规则数据。对于账号userid字段,有人可能填写的是 整数类型,也有人填写的是邮件格式。给一个字段索引不合适的数据类型发生异常,导致整个文档索引失败。如果ignore_malformed参数设为true,异常会被忽略,出异常的字段不会被索引,其它字段正常索引。
(1)创建索引
DELETE my_index
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"number_one":{
"type": "integer",
"ignore_malformed": true
},
"number_two":{
"type": "integer"
}
}
}
}
}
(2)添加数据
PUT my_index/my_type/1
{
"text": "Some text value",
"number_one": "foo"
}
PUT my_index/my_type/2
{
"text": "Some text value",
"number_two": "foo"
}
结果返回
{
"error": {
"root_cause": [
{
"type": "mapper_parsing_exception",
"reason": "failed to parse [number_two]"
}
],
"type": "mapper_parsing_exception",
"reason": "failed to parse [number_two]",
"caused_by": {
"type": "number_format_exception",
"reason": "For input string: \"foo\""
}
},
"status": 400
}
上面的例子number_one接受integer类型,ignore_malformed属性设置为true,因此文档一种能写入成功;number_two接受integer类型,类型ignore_malformed属性为false,因此写入失败
index
index属性指定字段是否索引,不所以也就不可搜索,取值可以为true或者false。
fields
fields可以让同一文本有多种不同的索引方式,比如一个String类型的字段,可以使用text类型做全文检索,使用keyword类型做聚合和排序
(1)放入数据
DELETE my_index
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"city":{
"type": "text",
"fields": {
"raw":{
"type": "keyword"
}
}
}
}
}
}
}
PUT my_index/my_type/1
{
"city": "New York"
}
PUT my_index/my_type/2
{
"city": "York"
}
(2)查询
GET my_index/_search
{
"query": {
"match": {
"city": "york"
}
},
"sort": {
"city.raw": {
"order": "asc"
}
},
"aggs":{
"Cities":{
"terms": {
"field": "city.raw"
}
}
}
}
返回结果
{
"took": 43,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 2,
"max_score": null,
"hits": [
{
"_index": "my_index",
"_type": "my_type",
"_id": "1",
"_score": null,
"_source": {
"city": "New York"
},
"sort": [
"New York"
]
},
{
"_index": "my_index",
"_type": "my_type",
"_id": "2",
"_score": null,
"_source": {
"city": "York"
},
"sort": [
"York"
]
}
]
},
"aggregations": {
"Cities": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "New York",
"doc_count": 1
},
{
"key": "York",
"doc_count": 1
}
]
}
}
}