在生产环境中elasticsearch.yml配置项主要如下:
cluster.name: elasticsearch
集群名称
node.name: “35″
节点名称
node.tag: “tag21″
节点标签
node.data: true
节点是否存储数据
index.number_of_shards: 10
索引分片数
index.number_of_replicas: 2
索引副本数
path.data: /data/elasticsearch/data
数据目录存放位置
path.logs: /data/elasticsearch/log
日志数据存放位置
bootstrap.mlockall: true
内存
index.cache.field.max_size: 50000
索引缓存
index.cache.field.expire: 5m
索引缓引过期时间
其它配置基本上不用调,具体可参考附录。另外需要的调配置是分词具体例子如下:
index:
analysis:
tokenizer:
my_pinyin:
type: pinyin
first_letter: "prefix"
padding_char: ""
pinyin_first_letter:
type: pinyin
first_letter: "only"
mmseg_maxword:
type: mmseg
seg_type: "max_word"
mmseg_complex:
type: mmseg
seg_type: "complex"
mmseg_simple:
type: mmseg
seg_type: "simple"
semicolon_spliter:
type: pattern
pattern: ";"
pct_spliter:
type: "pattern"
pattern: "[%]+"
filter:
ngram_min_2:
max_gram: 10
min_gram: 2
type: nGram
ngram_min_1:
max_gram: 10
min_gram: 1
type: nGram
min2_length:
min: 2
max: 4
type: length
analyzer:
lowercase_keyword:
type: custom
filter: [standard,lowercase]
tokenizer: standard
lowercase_keyword_ngram_min_size1:
type: custom
filter: [ngram_min_1,standard,lowercase]
tokenizer: nGram
lowercase_keyword_ngram_min_size2:
type: custom
filter: [ngram_min_2,standard,lowercase,min2_length,stop]
tokenizer: nGram
lowercase_keyword_ngram:
type: custom
filter: [ngram_min_1,standard,lowercase]
tokenizer: nGram
lowercase_keyword_without_standard:
type: custom
filter: [lowercase]
tokenizer: keyword
lowercase_whitespace:
type: custom
filter: [lowercase]
tokenizer: whitespace
ik:
alias: [ik_analyzer]
type: org.elasticsearch.index.analysis.IkAnalyzerProvider
ike:
alias: [ike_analyzer]
type: org.elastichsearch.ik.index.IkAnalyzerProvider
usermode: true
mmseg:
alias: [mmseg_analyzer]
type: org.elasticsearch.index.analysis.MMsegAnalyzerProvider
comma_spliter:
type: "pattern"
pattern: "[,|\\s]+"
pct_spliter:
type: "pattern"
pattern: "[%]+"
custom_snowball_analyzer:
type: "snowball"
language: "English"
simple_english_analyzer:
type: "custome"
tokenizer: whitespace
filter: [standard,lowercase,snowball]
edge_ngram:
type: custom
tokenizer: edgeNGram
filter: [lowercase]
pinyin_ngram_analyzer:
type: custom
tokenizer: my_pinyin
filter: [standard,lowercase,nGram]
pinyin_first_letter_analyzer:
type: custom
tokenizer: pinyin_first_letter
filter: [standard,lowercase]
custom_auth_en_analyzer:
type: custom
tokenizer: semicolon_spliter
filter: [standard,snowball,lowercase,trim]
进行完Config的配置后还需要对bin目录下面的elastichsearch进行配置,此处主要是控制JVM的一些参数
ES_MIN_MEM=16G
ES_MAX_MEM=16G
index.analysis.analyzer.default.type : "keyword"
elasticsearch.xml
#gateway类型,表示持久化数据存放位置,默认local,推荐的方式,此外还有NFS、HDFS、S3
gateway.type : local
#集群名称,区分集群的唯一名称
cluster.name : 'TEST'
#索引文件存放目录
#path.data : '/var/elasticsearch/data'
#日志文件存放目录
#path.logs : '/var/elasticsearch/logs'
#网络配置
#network.tcp.keep_alive : true
#network.tcp.send_buffer_size : 8192
#network.tcp.receive_buffer_size : 8192
#gateway.recover_after_nodes : 1
#gateway.recover_after_time : 10s
#gateway.expected_nodes : 2
#自动发现相关配置
#discovery.zen.fd.connect_on_network_disconnect : true
#discovery.zen.initial_ping_timeout : 10s
#discovery.zen.fd.ping_interval : 2s
#discovery.zen.fd.ping_retries : 10
#索引snapshot时间只对当gateway设置为NFS时有效
#index.gateway.snapshot_interval : 1s
#刷新时间间隔
#index.engine.robin.refresh_interval : -1
#默认索引碎片数
index.number_of_shards : 3
#默认索引副本数
index.number_of_replicas : 1
#默认索引合并因子
#index.merge.policy.merge_factor : 100
#index.merge.policy.min_merge_docs : 1000
#index.merge.policy.use_compound_file : true
#indices.memory.index_buffer_size : 5%
#Gateway相关配置
# Gateway Settings
#gateway:
# recover_after_nodes: 1
# recover_after_time: 5m
# expected_nodes: 2
#提示:当集群期望节点达不到的时候,集群就会处于block,无法正常索引和查询,说明集群中某个节点未能正常启动,这正是我们期望的效果,block住,避免照成数据的不一致
#强制所有内存锁定,不要没事搞个swap什么的来影响性能
# Force all memory to be locked, forcing JVM to never swap
# (make sure to set MIN and MAX mem to the same value)
#bootstrap:
# mlockall: true
#当禁用multcast广播的时候,可以手动设置集群的节点ip
# Unicast Discovery (disable multicast)
#discovery:
# zen:
# multicast.enabled: false
# unicast.hosts: ["host1", "host2"]