Elasticsearch优秀的索引设置技巧

下面的索引设置是从国外一家优秀的图片搜索公司获取,可以从其设置索引的设计上得到一些如何提高索引设计技巧的方式:

{
  "500px.photos-2016-05-06-20-09": {
    "aliases": {
      "500px.photos": {}
    },
    "mappings": {
      "photo": {
        "_all": {
          "enabled": false
        },
        "_routing": {
          "required": true,
          "path": "user_id"
        },
        "properties": {
          "camera": {
            "type": "string",
            "fields": {
              "exact": {
                "type": "string",
                "analyzer": "exact"
              },
              "not_analyzed": {
                "type": "string",
                "index": "not_analyzed"
              },
              "prefix": {
                "type": "string",
                "index_analyzer": "prefix",
                "search_analyzer": "prefix_search"
              }
            }
          },
          "category": {
            "type": "integer"
          },
          "collections_count": {
            "type": "long"
          },
          "comments_count": {
            "type": "integer"
          },
          "context_tags": {
            "type": "nested",
            "properties": {
              "context": {
                "type": "string",
                "index": "not_analyzed"
              },
              "id": {
                "type": "integer"
              },
              "name": {
                "type": "string",
                "fields": {
                  "exact": {
                    "type": "string",
                    "analyzer": "exact"
                  },
                  "exact_stemmed_synonyms": {
                    "type": "string",
                    "index_analyzer": "exact_stemmed_synonyms",
                    "search_analyzer": "exact_stemmed_synonyms_search"
                  },
                  "not_analyzed": {
                    "type": "string",
                    "index": "not_analyzed"
                  },
                  "prefix": {
                    "type": "string",
                    "index_analyzer": "prefix",
                    "search_analyzer": "prefix_search"
                  },
                  "stemmed": {
                    "type": "string",
                    "analyzer": "stemmed"
                  },
                  "stemmed_synonyms": {
                    "type": "string",
                    "index_analyzer": "stemmed_synonyms",
                    "search_analyzer": "stemmed_synonyms_search"
                  }
                }
              },
              "name_de": {
                "type": "string",
                "fields": {
                  "exact": {
                    "type": "string",
                    "analyzer": "exact"
                  },
                  "not_analyzed": {
                    "type": "string",
                    "index": "not_analyzed"
                  },
                  "prefix": {
                    "type": "string",
                    "index_analyzer": "prefix",
                    "search_analyzer": "prefix_search"
                  },
                  "stemmed": {
                    "type": "string",
                    "analyzer": "stemmed_de"
                  }
                }
              },
              "weight": {
                "type": "float"
              },
              "weight_new": {
                "type": "float"
              }
            }
          },
          "context_tags_tags_count": {
            "type": "integer"
          },
          "converted": {
            "type": "integer"
          },
          "created_at": {
            "type": "date",
            "format": "dateOptionalTime"
          },
          "delivery_type_current": {
            "type": "nested",
            "properties": {
              "delivery_type": {
                "type": "string",
                "index": "not_analyzed"
              },
              "enabled": {
                "type": "boolean"
              }
            }
          },
          "description": {
            "type": "string",
            "boost": 5,
            "fields": {
              "prefix": {
                "type": "string",
                "index_analyzer": "prefix",
                "search_analyzer": "prefix_search"
              },
              "stemmed": {
                "type": "string",
                "analyzer": "stemmed"
              },
              "stemmed_synonyms": {
                "type": "string",
                "index_analyzer": "stemmed_synonyms",
                "search_analyzer": "stemmed_synonyms_search"
              }
            }
          },
          "favorites_count": {
            "type": "integer"
          },
          "feature": {
            "type": "nested",
            "properties": {
              "end_at": {
                "type": "date",
                "format": "dateOptionalTime"
              },
              "name": {
                "type": "string",
                "index": "not_analyzed"
              },
              "start_at": {
                "type": "date",
                "format": "dateOptionalTime"
              }
            }
          },
          "for_sale": {
            "type": "boolean"
          },
          "galleries": {
            "type": "nested",
            "properties": {
              "id": {
                "type": "long"
              },
              "position": {
                "type": "long"
              }
            }
          },
          "geo_coordinates": {
            "type": "geo_point",
            "lat_lon": true
          },
          "height": {
            "type": "integer"
          },
          "hi_res_uploaded": {
            "type": "integer"
          },
          "highest_rating": {
            "type": "float"
          },
          "id": {
            "type": "integer"
          },
          "image_format": {
            "type": "integer"
          },
          "lens": {
            "type": "string",
            "fields": {
              "exact": {
                "type": "string",
                "analyzer": "exact"
              },
              "not_analyzed": {
                "type": "string",
                "index": "not_analyzed"
              },
              "prefix": {
                "type": "string",
                "index_analyzer": "prefix",
                "search_analyzer": "prefix_search"
              }
            }
          },
          "license_requests_enabled": {
            "type": "boolean"
          },
          "license_type": {
            "type": "integer"
          },
          "licensed_at": {
            "type": "date",
            "format": "dateOptionalTime"
          },
          "licensing_status": {
            "type": "integer"
          },
          "licensing_status_organizer_index": {
            "type": "integer"
          },
          "name": {
            "type": "string",
            "fields": {
              "exact": {
                "type": "string",
                "analyzer": "exact"
              },
              "exact_stemmed_synonyms": {
                "type": "string",
                "index_analyzer": "exact_stemmed_synonyms",
                "search_analyzer": "exact_stemmed_synonyms_search"
              },
              "not_analyzed": {
                "type": "string",
                "index": "not_analyzed"
              },
              "prefix": {
                "type": "string",
                "index_analyzer": "prefix",
                "search_analyzer": "prefix_search"
              },
              "stemmed": {
                "type": "string",
                "analyzer": "stemmed"
              },
              "stemmed_synonyms": {
                "type": "string",
                "index_analyzer": "stemmed_synonyms",
                "search_analyzer": "stemmed_synonyms_search"
              }
            }
          },
          "nsfw": {
            "type": "boolean"
          },
          "photo_sets": {
            "type": "nested",
            "properties": {
              "id": {
                "type": "long"
              },
              "position": {
                "type": "long"
              }
            }
          },
          "privacy": {
            "type": "integer"
          },
          "rating": {
            "type": "float"
          },
          "sales_count": {
            "type": "integer"
          },
          "status": {
            "type": "integer"
          },
          "taken_at": {
            "type": "date",
            "format": "dateOptionalTime"
          },
          "times_viewed": {
            "type": "integer"
          },
          "updated_at": {
            "type": "date",
            "format": "dateOptionalTime"
          },
          "user_firstname": {
            "type": "string",
            "boost": 4,
            "fields": {
              "exact": {
                "type": "string",
                "analyzer": "exact"
              },
              "not_analyzed": {
                "type": "string",
                "index": "not_analyzed"
              },
              "prefix": {
                "type": "string",
                "index_analyzer": "prefix",
                "search_analyzer": "prefix_search"
              }
            }
          },
          "user_id": {
            "type": "long"
          },
          "user_lastname": {
            "type": "string",
            "boost": 4,
            "fields": {
              "exact": {
                "type": "string",
                "analyzer": "exact"
              },
              "not_analyzed": {
                "type": "string",
                "index": "not_analyzed"
              },
              "prefix": {
                "type": "string",
                "index_analyzer": "prefix",
                "search_analyzer": "prefix_search"
              }
            }
          },
          "user_name": {
            "type": "string",
            "boost": 4,
            "fields": {
              "exact": {
                "type": "string",
                "analyzer": "exact"
              },
              "not_analyzed": {
                "type": "string",
                "index": "not_analyzed"
              },
              "prefix": {
                "type": "string",
                "index_analyzer": "prefix",
                "search_analyzer": "prefix_search"
              }
            }
          },
          "user_partner_optout": {
            "type": "boolean"
          },
          "user_status": {
            "type": "integer"
          },
          "user_store_on": {
            "type": "boolean"
          },
          "user_username": {
            "type": "string",
            "boost": 4,
            "fields": {
              "exact": {
                "type": "string",
                "analyzer": "exact"
              },
              "not_analyzed": {
                "type": "string",
                "index": "not_analyzed"
              },
              "prefix": {
                "type": "string",
                "index_analyzer": "prefix",
                "search_analyzer": "prefix_search"
              }
            }
          },
          "votes_count": {
            "type": "integer"
          },
          "width": {
            "type": "integer"
          }
        }
      }
    },
    "settings": {
      "index": {
        "creation_date": "1462579747496",
        "uuid": "V9kxOgQPR82FXpj-UN_Rdw",
        "analysis": {
          "char_filter": {
            "amp_and": {
              "type": "mapping",
              "mappings": [
                "&=> and "
              ]
            },
            "punctuation": {
              "type": "mapping",
              "mappings": [
                ".=> "
              ]
            }
          },
          "filter": {
            "preserved_asciifolding": {
              "type": "asciifolding",
              "preserve_original": "true"
            },
            "large_prefixer": {
              "max_gram": "100",
              "min_gram": "1",
              "type": "edgeNGram",
              "side": "front"
            },
            "prefixer": {
              "max_gram": "8",
              "type": "edgeNGram",
              "min_gram": "2",
              "side": "front"
            },
            "german_stemmer": {
              "type": "stemmer",
              "language": "light_german"
            },
            "german_stop": {
              "type": "stop",
              "stopwords": "_german_"
            },
            "fivegrammer": {
              "min_gram": "5",
              "type": "nGram",
              "max_gram": "5"
            },
            "synonyms": {
              "type": "synonym",
              "synonyms_path": "analysis/wn_s.pl",
              "format": "wordnet"
            },
            "trigrammer": {
              "type": "nGram",
              "min_gram": "3",
              "max_gram": "3"
            },
            "custom_stems": {
              "type": "stemmer_override",
              "rules_path": "analysis/custom_stems.txt"
            }
          },
          "analyzer": {
            "exact_stemmed_synonyms": {
              "type": "custom",
              "char_filter": [
                "amp_and"
              ],
              "filter": [
                "asciifolding",
                "lowercase",
                "trim",
                "custom_stems",
                "kstem",
                "synonyms",
                "custom_stems",
                "stop"
              ],
              "tokenizer": "keyword"
            },
            "stemmed": {
              "filter": [
                "standard",
                "lowercase",
                "custom_stems",
                "stop",
                "kstem"
              ],
              "tokenizer": "standard"
            },
            "exact_stemmed_synonyms_search": {
              "type": "custom",
              "char_filter": [
                "amp_and"
              ],
              "filter": [
                "standard",
                "asciifolding",
                "lowercase",
                "trim",
                "custom_stems",
                "stop",
                "kstem"
              ],
              "tokenizer": "standard"
            },
            "synonyms": {
              "type": "custom",
              "char_filter": [
                "amp_and"
              ],
              "filter": [
                "standard",
                "lowercase",
                "synonyms"
              ],
              "tokenizer": "standard"
            },
            "partial": {
              "filter": [
                "preserved_asciifolding",
                "large_prefixer"
              ],
              "tokenizer": "lowercase"
            },
            "prefix_search": {
              "tokenizer": "lowercase"
            },
            "stemmed_synonyms": {
              "type": "custom",
              "char_filter": [
                "amp_and"
              ],
              "filter": [
                "standard",
                "asciifolding",
                "lowercase",
                "trim",
                "custom_stems",
                "kstem",
                "synonyms",
                "custom_stems",
                "stop"
              ],
              "tokenizer": "standard"
            },
            "fivegram_ascii": {
              "filter": [
                "standard",
                "asciifolding",
                "lowercase",
                "trim",
                "fivegrammer"
              ],
              "tokenizer": "standard"
            },
            "prefix": {
              "filter": [
                "preserved_asciifolding",
                "prefixer"
              ],
              "tokenizer": "lowercase"
            },
            "exact": {
              "type": "custom",
              "char_filter": [
                "amp_and"
              ],
              "filter": [
                "asciifolding",
                "lowercase",
                "trim"
              ],
              "tokenizer": "keyword"
            },
            "stemmed_synonyms_search": {
              "type": "custom",
              "char_filter": [
                "amp_and"
              ],
              "filter": [
                "standard",
                "asciifolding",
                "lowercase",
                "trim",
                "custom_stems",
                "stop",
                "kstem"
              ],
              "tokenizer": "standard"
            },
            "trigram": {
              "filter": [
                "lowercase",
                "trim",
                "trigrammer"
              ],
              "tokenizer": "keyword"
            },
            "stemmed_de": {
              "filter": [
                "standard",
                "asciifolding",
                "lowercase",
                "german_stop",
                "german_normalization",
                "german_stemmer"
              ],
              "tokenizer": "standard"
            },
            "partial_search": {
              "tokenizer": "lowercase"
            }
          }
        },
        "number_of_replicas": "2",
        "number_of_shards": "20",
        "refresh_interval": "1",
        "version": {
          "created": "1040499"
        }
      }
    },
    "warmers": {}
  }
}
  • 500px.photos-2016-05-06-20-09 通过具体日期来命名索引名称,来记录某次索引修改的日期,小细节
  • 所有修改的索引都会设置别名:500px.photos
  • 参数 “_all”: {
  • “enabled”: false
  • },有效的阻止进行全文系统在索引阶段对CPU和存储空间资源的开销
  • 设置_routing,均以user_id为路由路径,可以使每个用户的图片都会存储到相同的shard中,当进行用户个人图片相关搜索,都会提高搜索效率
  • 针对camera进行查询,设置了三种不同的analyzer,1.exact 2.not_analyzed 3.prefix,分别对应精确查找,前缀查
  • 设置多条能判断图片质量好坏的计数,如collections_count,comments_count,favorites_count,context_tags_tags_count,sales_count,votes_count
  • context_tags是最精华的地方,详细在另一个文档中参考,参考地址:
  • context_tags_tags_count单独拿出来说,通过设置这个计数来参看某张图片的权重,关键词越多,说明内容越丰富,相对的每个关键词的权重应该偏低。
  • converted,图片被编辑的次数,暂时不清楚其目的,猜测为某张图片被编辑的次数越多,该图片质量应该会相对偏高,只有好的图片,摄影师会对其用心对待(修改内容)
阅读更多
想对作者说点什么? 我来说一句

没有更多推荐了,返回首页