Elasticsearch动态模板

weixin_43315211

已于 2022-12-05 09:28:34 修改

阅读量346

点赞数

分类专栏： Elasticsearch 文章标签： elasticsearch 动态模板

于 2019-08-12 18:00:03 首次发布

本文链接：https://blog.csdn.net/weixin_43315211/article/details/99317935

版权

Elasticsearch 专栏收录该内容

12 篇文章 1 订阅

订阅专栏

模板规则

{
  "order": 0,                               // 模板优先级
  "template": "sample_info*",               // 模板匹配的名称方式
  "settings": {...},                        // 索引设置
  "mappings": {...},                        // 索引中各字段的映射定义
  "aliases": {...}                          // 索引的别名
}

1、模板的优先级：

"order":0

{
    "order": 0
    "template" : "te*",
    "settings" : {
        "number_of_shards" : 1
    },
    "mappings" : {
        "type1" : {
            "_source" : { "enabled" : false }
        }
    }
}

{
    "order" : 1,
    "template" : "tete*",
    "settings" : {
        "number_of_shards" : 2
    },
    "mappings" : {
        "type1" : {
            "_all" : { "enabled" : false }
        }
    }
}

第一个模板的 order 为0，第二个模板的 order 为1，优先级高于第一个模板，其会覆盖第一个模板中的相同项。

2、索引模板匹配：

"template" : "te*"

当新建索引时，所有以 te 开头的索引都会自动匹配到该索引模板。利用该模板进行相应的设置和字段添加等。

3、setting部分

"settings": {
    "index": {
      "analysis": {...},                // 自定义的分析器
      "number_of_shards": "3",         // 主分片的个数
      "number_of_replicas": "2",        // 主分片的拷贝分片个数
      "refresh_interval": "5s"          // 刷新时间
    }
  }

 
 "analysis": {
           "char_filter": { ... },              // 用户自定义字符过滤器
            "tokenizer":   { ... },             // 用户自定义分词器
            "filter":      { ... },             // 用户自定义标记过滤器
            "analyzer":    { ... }              // 用户自定义分析器
      }

3.1 字符过滤器

映射字符过滤器（mapping char filter），将 & 替换成 and

"char_filter": {
    "&_to_and": {
        "type":       "mapping",
        "mappings": [ "&=> and "]
    }
}

HTML过滤器（HTML Strip char filter），html_strip 字符过滤器去除所有的 HTML 标签。
格式替换过滤器（Pattern Replace char filter），将点 “.” 替换成空格

"char_filter": {
    "replace_dot": {
        "pattern": "\\.",
        "type": "pattern_replace",
        "replacement": " "
    }
}

3.2 分词器

分词器将字符串分割成单独的字词，删除大部分标点符号。keyword分词器输出和它接收到的相同的字符串，不做任何分词处理。

3.3 标记过滤器

常用的标记过滤器有 lowercase 和 stop 。lowercase 标记过滤器将词转换为小写，stop 标记过滤器用户去除一些自定义停用词或者是语言内定义的停用词。

"filter": {
    "my_stopwords": {
        "type":        "stop",
        "stopwords": [ "the", "a" ]
    }
}

分析器组合

"analyzer": {
    "my_analyzer": {
        "type":           "custom",
        "char_filter":  [ "html_strip", "&_to_and", "replace_dot" ],
        "tokenizer":      "standard",
        "filter":       [ "lowercase", "my_stopwords", "my_stop" ]
    }
}


"settings": {
    "index": {
      "analysis": {
           "char_filter": {
                "&_to_and": {
                    "type":       "mapping",
                    "mappings": [ "&=> and "]
                },
                "replace_dot": {
                    "pattern": "\\.",
                    "type": "pattern_replace",
                    "replacement": " "
                }
            },
            "filter":      {
                "my_stop": {
                    "type":        "stop",
                    "stopwords": _spanish_
                },
                "my_stopwords": {
                    "type":        "stop",
                    "stopwords": [ "the", "a" ]
                }
            },
            "analyzer":    {
                "my_analyzer": {
                    "type":           "custom",
                    "char_filter":  [ "html_strip", "&_to_and", "replace_dot" ],
                    "tokenizer":      "standard",
                    "filter":       [ "lowercase", "my_stopwords", "my_stop" ]
                }
            }
      },
      ...
    }
  }

4、索引类型的字段映射

 "mappings": {
    "my_type": {                            // 索引下的类型 my_type 应用该映射
      "dynamic_templates": [ ... ],         // 动态映射部分，用于未定义的 my_type 下字段
      "properties": { ... }                 // 自定义字段的响应映射
    }
}

4.1 动态映射dynamic_templates：

模板：

{
    "string_fields": {                                  // 字段映射模板的名称，一般为"类型_fields"的命名方式
        "match": "*",                                   // 匹配的字段名为所有
        "match_mapping_type": "string",                 // 限制匹配的字段类型，只能是 string 类型
        "mapping": { ... }                              // 字段的处理方式
	}
 }

实例：

"mappings": {
    "my_type": {
      "dynamic_templates": [
         {
            "string_fields": {                                  // 字段映射模板的名称，一般为"类型_fields"的命名方式
                "match": "*",                                   // 匹配的字段名为所有
                "match_mapping_type": "string",                 // 限制匹配的字段类型，只能是 string 类型
                "mapping": {
                    "fielddata": { "format": "disabled" },      // fielddata 不可用，对于分析字段，其默认值是可用
                    "analyzer": "only_words_analyzer",          // 字段采用的分析器名，默认值为 standard 分析器
                    "index": "analyzed",                        // 索引方式定义为索引，默认值是分析
                    "omit_norms": true,                         // omit_norms 为真表示考虑字段的加权，可分析字段默认值 false
                    "type": "string",                           // 字段类型限定为 string
                    "fields": {                                 // 定义一个嵌套字段，将该字段应用于不分析的场景
                        "raw": {
                            "ignore_above": 256,                // 忽略字段对应的值长度大于256的字段
                            "index": "not_analyzed",            // 索引方式为不分析
                            "type": "string",                   // 字段的类型为 string
                            "doc_values": true                  // 对于不分析字段，doc_values 对应的是一种列式存储结构，默认false
                        }
                    }
                }
            }
        },
        {
			"float_fields": {                               // 命名方式 "类型_fields"
				"match": "*"，
				"match_mapping_type": "float",
				"mapping": {
					"type": "float",
					"doc_values": true                      // doc_values 定义为 true，便于排序与聚合分析
				}
			}
		},
		...
      ],
      "properties": { ... }
    }
}

4.2 自定义字段映射

"mappings": {
    "my_type": {
      "dynamic_templates": [ ... ],
      "properties": {
          "user_city": {                                // 字段名
             "analyzer": "lowercase_analyzer",          // 字段分析器
             "index": "analyzed",                       // 字段索引方式定义索引
             "type": "string",                          // 字段数据类型定义为 string
             "fields": {                                // 定义一个名为 user_city.raw 的嵌入的不分析字段
                "raw": {
                    "ignore_above": 512,
                    "index": "not_analyzed",
                    "type": "string"
                }
            }
         }，
         "money":{
            "type": "double",
            "doc_values": true
         }
         ...
      }
    }
}

5、别名

{
  "order": 0,                               // 模板优先级
  "template": "sample_info*",               // 模板匹配的名称方式
  "settings": {...},                        // 索引设置
  "mappings": {...},                        // 索引中各字段的映射定义
  "aliases": {
     "my_index":{}
  }
}

零停机时间实现重新索引方式

POST /_aliases
{
    "actions": [
        { "remove": { "index": "my_index_v1", "alias": "my_index" }},
        { "add":    { "index": "my_index_v2", "alias": "my_index" }}
    ]
}

weixin_43315211

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
Elasticsearch动态模板

模板规则{ "order": 0, // 模板优先级 "template": "sample_info*", // 模板匹配的名称方式 "settings": {...}, // 索引设置 "mappings": {...}, ...
复制链接

扫一扫