Elasticsearch安装与配置

最新推荐文章于 2024-04-29 16:34:45 发布

发呆的比目鱼

最新推荐文章于 2024-04-29 16:34:45 发布

阅读量492

点赞数 1

分类专栏：运维管理文章标签： elasticsearch docker

本文链接：https://blog.csdn.net/weixin_42486623/article/details/119620236

版权

运维管理专栏收录该内容

44 篇文章 3 订阅

订阅专栏

Elasticsearch安装与配置

注意：为了跟jieba插件（最新7.7.0）统一，需要使用ES7.7.0版本

安装

docker pull docker.elastic.co/elasticsearch/elasticsearch:7.10.0

docker run --detach \
    --name es \
    --publish 9200:9200 --publish 9300:9300 \
    --restart always \
    --env "discovery.type=single-node" \
    --volume /home/dyz/dockers/elasticsearch/elasticsearchdata:/usr/share/elasticsearch/data \
    --volume /home/dyz/dockers/elasticsearch/elasticsearchplugins:/usr/share/elasticsearch/plugins \
    --volume /home/dyz/dockers/elasticsearch/elasticsearchconfig/stopwords.txt:/usr/share/elasticsearch/config/stopwords.txt \
    --volume /home/dyz/dockers/elasticsearch/elasticsearchconfig/synonyms.txt:/usr/share/elasticsearch/config/synonyms.txt \
    --volume /home/dyz/dockers/elasticsearch/elasticsearchconfig/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml \
    docker.elastic.co/elasticsearch/elasticsearch:7.10.0

docker run --detach \
    --name es \
    --publish 9200:9200 --publish 9300:9300 \
    --restart always \
    --env "node.name=elasticsearch" \
    --env "cluster.name=test-elasticsearch" \
    --env "bootstrap.memory_lock=true" \
    --env "ES_JAVA_OPTS=-Xms512m -Xmx512m" \
    --env "discovery.type=single-node" \
    --env "script.painless.regex.enabled=true" \
    --volume /home/dyz/dockers/elasticsearch/elasticsearchdata:/usr/share/elasticsearch/data \
    --volume /home/dyz/dockers/elasticsearchplugins:/usr/share/elasticsearch/plugins \
    --volume /home/dyz/dockers/elasticsearch/elasticsearchconfig/stopwords.txt:/usr/share/elasticsearch/config/stopwords.txt \
    --volume /home/dyz/dockers/elasticsearch/elasticsearchconfig/synonyms.txt:/usr/share/elasticsearch/config/synonyms.txt \
    --volume /home/dyz/dockers/elasticsearch/elasticsearchconfig/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml \
    --volume /etc/localtime:/etc/localtime \
    docker.elastic.co/elasticsearch/elasticsearch:7.10.0

#注意
#chmod -R 777 /home/dyz/dockers/elasticsearch    安装Elasticsearch时提示权限不够，更改权限即可（chmod -R 777 els）
#  --env cluster.initial_master_nodes=elasticsearch   #7.7.0 不需要加
#  --env "discovery.type=single-node"                  #7.7.0 需要加上

# 跨域问题
-e http.cors.enabled=true 
-e http.cors.allow-origin="*" 
-e http.cors.allow-headers=X-Requested-With,X-Auth-Token,Content-Type,Content-Length,Authorization 
-e http.cors.allow-credentials=true

# 配置 stopwords.txt
# 配置 synonyms.txt
# 配置 elasticsearch.yml

http.cors.enabled: true
http.cors.allow-origin: "*"
http.cors.allow-headers: Authorization
xpack.security.enabled: true
xpack.security.transport.ssl.enabled: true

## 设置密码
docker exec -it es bash
cd bin
elasticsearch-setup-passwords interactive
#Initiating the setup of passwords for reserved users elastic,apm_system,kibana,kibana_system,logstash_system,beats_system,remote_monitoring_user.
#You will be prompted to enter passwords as the process progresses.
# Please confirm that you would like to continue [y/N]Y

### 安装Kibana
docker pull docker.elastic.co/kibana/kibana:7.10.0

# YOUR_ELASTICSEARCH_CONTAINER_NAME_OR_ID：Elasticsearch容器的名字或容器ID
#连接当前机器的docker里面的es
docker run --detach \
  --name kibana \
  --publish 5601:5601 \
  --restart=always \
  --link es:elasticsearch \
  --env "ELASTICSEARCH_HOSTS=http://es:9200" \
  --env "LS_JAVA_OPTS=-Xmx256m -Xms256m" \
  --volume /etc/localtime:/etc/localtime \
  docker.elastic.co/kibana/kibana:7.10.0

## 修改配置
docker exec -it kibana bash
cd config
vi kibana.yml

## IpAddress：docker inspect es查看es容器内部的ip地址
server.name: kibana
server.host: "0.0.0.0"
elasticsearch.hosts: [ "http://{IpAddress}:9200" ]
monitoring.ui.container.elasticsearch.enabled: true
elasticsearch.username: "elastic"
elasticsearch.password: "password"
i18n.locale: "zh-CN"

## 退出
exit
docker restart kibana


### elasticsearch-head的安装
docker pull mobz/elasticsearch-head:5
docker run -d -p 9100:9100 \
    --restart=always \
    -e ELASTICSEARCH_URL=http://10.0.30.2:9200 \
    --name elasticsearch-head \
    mobz/elasticsearch-head:5

docker-compose安装

# （一）准备
# 安装好docker
# 安装好docker-compose
# 安装JDK(版本号尽量高点，至少jdk9)###也可用elk自带的
# （二） 准备esAndKibana.yml文件
version: '3'
services:
  elasticsearch:
    image: elasticsearch:7.7.0
    container_name: elasticsearch
    environment:
      - "cluster.name=elasticsearch" #设置集群名称为elasticsearch
      - "discovery.type=single-node" #以单一节点模式启动
      - "ES_JAVA_OPTS=-Xms4096m -Xmx4096m" #设置使用jvm内存大小
    volumes:
      - /data/elasticsearch/plugins:/usr/share/elasticsearch/plugins #插件文件挂载
      - /data/elasticsearch/data:/usr/share/elasticsearch/data #数据文件挂载
      - /data/elasticsearch/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml #配置文件挂载
    ports:
      - 9200:9200
      - 9300:9300
  kibana:
    image: kibana:7.7.0
    container_name: kibana
    depends_on:
      - elasticsearch #kibana在elasticsearch启动之后再启动
    environment:
      - "elasticsearch.hosts=http://127.0.0.1:9200" #设置访问elasticsearch的地址
    volumes:
      - /data/kibana/config:/usr/share/kibana/config #配置文件挂载
    ports:
      - 5601:5601

# （三）elasticsearch.yml配置文件
cluster.name: "docker-cluster"
network.host: 0.0.0.0

#（四） kibana.yml配置文件
server.name: kibana
server.host: "0"
elasticsearch.hosts: [ "http://elasticsearch:9200" ]
i18n.locale: "zh-CN"
#（五）创建es用户  在root用户下执行如下命令：
groupadd es
useradd es -g es

vi /etc/sudoers
#在root ALL=(ALL) ALL 下面一行添加：
es ALL=(ALL) ALL
#设置es用户的密码为es
passwd es 

#（六） 给es用户附上文件夹权限 在root用户下给es用户赋权
#esAndKibana.yml所在文件夹的权限
chown -R es.es  /opt/app/kibana/
#插件文件挂载的权限
chown -R es.es  /data/elasticsearch/plugins/
#数据文件挂载
chown -R es.es  /data/elasticsearch/data/
#docker-compose的操作权限，这一步不知道是否是必须的
chown -R es.es  /usr/local/bin/docker-compose

#（七）在esAndKibana.yml所在文件夹里执行安装命令  在es用户下执行
sudo /usr/local/bin/docker-compose -f esAndKibana.yml up -d

结巴安装

下载地址
https://github.com/sing1ee/elasticsearch-jieba-plugin

## 下载
git clone https://github.com/sing1ee/elasticsearch-jieba-plugin.git --recursive
./gradlew clean pz
## copy the zip file to plugin directory
cp build/distributions/elasticsearch-jieba-plugin-7.7.0.zip ${path.home}/plugins
unzip and rm zip file
## unzip elasticsearch-jieba-plugin-7.7.0.zip
rm elasticsearch-jieba-plugin-7.7.0.zip
start elasticsearch
./bin/elasticsearch

注意报错 ElasticSearch启动报错：plugin-descriptor.properties: Not a directory
解决方法：来自文章： https://www.freesion.com/article/9797557802/
很多文章只写了解压jieba分词器后删除zip包就可以了，但是lz却报了这个错。解决办法就是在plugins目录下创建elasticsearch-jieba-plugin-7.7.0的文件夹，将zip包解压后的文件全部放在elasticsearch-jieba-plugin-7.7.0的目录下，还有一点的是（虽然lz没出问题，但是看到有些博客还是将jar的文件名中的版本改成了与es一致，所以这里也改一下吧）。

结巴使用方法

jieba_index 所有可能，有重复位置

POST _analyze
{
  "analyzer":"jieba_index",
  "text":"高通量测序？"
}

jieba_search 顺序切词

POST _analyze
{
  "analyzer":"jieba_search",
  "text":"高通量测序？"
}

## Custom User Dict
Just put you dict file with suffix .dict into ${path.home}/plugins/jieba/dic. Your dict file should like this:
小清新 3
百搭 3
显瘦 3
隨身碟 100
your_word word_freq

## Using stopwords
find stopwords.txt in ${path.home}/plugins/jieba/dic.
create folder named stopwords under ${path.home}/config
mkdir -p {path.home}/config/stopwords
## copy stopwords.txt into the folder just created
cp ${path.home}/plugins/jieba/dic/stopwords.txt {path.home}/config/stopwords
## create index:
PUT http://localhost:9200/jieba_index
{
  "settings": {
    "analysis": {
      "filter": {
        "jieba_stop": {
          "type":        "stop",
          "stopwords_path": "stopwords/stopwords.txt"
        },
        "jieba_synonym": {
          "type":        "synonym",
          "synonyms_path": "synonyms/synonyms.txt"
        }
      },
      "analyzer": {
        "my_ana": {
          "tokenizer": "jieba_index",
          "filter": [
            "lowercase",
            "jieba_stop",
            "jieba_synonym"
          ]
        }
      }
    }
  }
}
PUT http://localhost:9200/jieba_index/_analyze
{
  "analyzer" : "my_ana",
  "text" : "黄河之水天上来"
}

ElasticSearch中文分词器

在线安装

# https://github.com/medcl/elasticsearch-analysis-ik/releases
./bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v7.10.0/elasticsearch-analysis-ik-7.10.0.zip

请求测试

默认分词器

POST _analyze
{
  "analyzer":"standard",
  "text":"我是中国人"
}

安装的中文分词器
ik_smart:做最粗粒度的拆分

POST _analyze
{
  "analyzer":"ik_smart",
  "text":"我是中国人"
}

ik_max_word:做最粗粒度的拆分

POST _analyze
{
  "analyzer":"ik_max_word",
  "text":"我是中国人"
}

自定义扩展词典

拷贝相关停用词, 同义词,词典的文件

从宿主机拷文件到容器里面
docker cp 要拷贝的文件路径容器名：要拷贝到容器里面对应的路径
docker cp /home/dyz/elasticsearch/config/stopwords.txt es:/usr/share/elasticsearch/config/stopwords.txt

从容器里面拷文件到宿主机
docker cp 容器名：要拷贝的文件在容器里面的路径要拷贝到宿主机的相应路径
docker cp es:/usr/share/elasticsearch/config/stopwords.txt /home/dyz/elasticsearch/config/stopwords.txt

# 第一步 在/usr/share/elasticsearch/config/analysis-ik下创建目录：mkdir custom
# 第二步 在/usr/share/elasticsearch/config/analysis-ik/custom下创建文件：vi new_word.dic，并在该文件内输入新词：
            王者荣耀
            联通云
            硅谷亮城
第三步 在/usr/share/elasticsearch/config/analysis-ik下执行 vi IKAnalyzer.cfg.xml
            <?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
        <comment>IK Analyzer 扩展配置</comment>
        <!--用户可以在这里配置自己的扩展字典 -->
        <entry key="ext_dict">custom/new_word.dic</entry>
         <!--用户可以在这里配置自己的扩展停止词字典-->
        <entry key="ext_stopwords"></entry>
        <!--用户可以在这里配置远程扩展字典 -->
        <!-- <entry key="remote_ext_dict">words_location</entry> -->
        <!--用户可以在这里配置远程扩展停止词字典-->
        <!-- <entry key="remote_ext_stopwords">words_location</entry> -->
</properties>

docker restart es

解决容器内中文乱码
添加中文环境编码，安装两个包
yum install kde-l10n-Chinese -y
yum install glibc-common -y
转化语言环境和字符集
localedef -c -f UTF-8 -i zh_CN zh_CN.utf8
添加定义到系统环境变量
vi /etc/profile
export LC_ALL=zh_CN.utf8
执行生效
source /etc/profile

自定义分词器

添加Token Filters 常见的token filter 包括：Stop token filter、Synonym token filter 等。

PUT custom_ik_smart-000003
{
  "settings": {
		"analysis": {
        "filter": {
            "synonym_word_filter": {
                "type": "synonym", 
                "updateable": "true",
                "expand": "true", 
                "synonyms_path": "analysis-ik/custom/es_synonym_2021-08-12.txt"
            },
            "stop_word_filter": {
                "type": "stop", 
                "updateable": "true",
                "synonyms_path": "analysis-ik/custom/es_stopwords_fine_grained_2021-08-12.txt"
            }
        },
        "analyzer": {
            "custom_ik_smart": { // analyzer名称
                "filter": [
                "synonym_word_filter", //指定filter
                 "stop_word_filter"
                 ],
                "type": "custom", 
                "tokenizer": "ik_smart" // 指定tokenizer，使用的是IK分词器的tokenizer
            }
        }
    }
  }
}

POST custom_ik_smart-000003/_analyze
{
  "analyzer": "custom_ik_smart",
  "text": "非小细胞肺癌检测到EGFR 19 del 推荐什么药物"
}

配置同义词

创建索引时创建自己的 analyzer
mapping 时指定 search_analyzer
创建同义词文件
步骤1的索引可使用上述的自定义analyzer；

mapping时，一般指定search_analyzer 使用自定义同义词analyzer，在数据存入Elasticsearch创建索引时，一般使用标准的 ik_smart 或者 ik_max_word。一个案例如下：

PUT /custom_ik_smart-000003/_mapping
 {
      "properties": {
          "question": {
              "type": "text",
              "analyzer": "ik_smart",
              "search_analyzer": "custom_ik_smart",
              "index": "true"
          },
          "answer": {
              "type": "text",
              "index": "false"
          },
          "id": {
          "type": "integer"
          }
      }
}

插入数据

POST /custom_ik_smart-000003/_doc/1
{
  "id": 1,
  "question": "瑞朗安Plus产品具有哪些优势？",
  "answer": "基于二代测序技术，一次检测NCCN指南推荐的全部基因以及其他与非小细胞肺癌密切相关的重要基因，外显子100%覆盖；全面解析非小细胞肺癌密切相关的敏感、耐药、罕见突变，精准指导靶向用药；报告周期短，仅需5-7个工作日；对于无法获取组织样本的患者，提供无创液体活检，也可通过液体活检进行动态监测，有"
}

GET /custom_ik_smart-000003/_doc/1

## 关键词搜索
GET /custom_ik_smart-000003/_search
{
   "query": {
      "match": {
        "question":"RLAs"
      }
  }
}

注意到 synonym_word_filter 中设置了 expand 为 false。以上述同义词为例，如果设置 expand 为 false，则只会将token中的“马铃薯”、“potato”、“土豆”替换成“土豆”，即以第一个词为准。如果 expand 为 true，则如果 token中出现 “土豆”,“马铃薯”,“potato”任一词，都会解析成三个token。

由于在创建索引时使用了 “updateable”: “true” 配置，你可以在修改了 synonym.txtx 之后调用 API: POST /my-index/_reload_search_analyzers 更新 search_analyzer 的配置。

发呆的比目鱼

关注

1
点赞
踩
2

收藏

觉得还不错? 一键收藏
打赏
0
评论
Elasticsearch安装与配置

Elasticsearch安装#docker pull elasticsearch:7.7.0docker run --detach \ --name elasticsearch \ --publish 19200:9200 --publish 19300:9300 \ --restart always \ --env "node.name=elasticsearch" \ --env "cluster.name=test-elasticsearch" \
复制链接

扫一扫