Elasticsearch入门

最新推荐文章于 2024-08-15 14:15:34 发布

冯百萬

最新推荐文章于 2024-08-15 14:15:34 发布

阅读量74

点赞数

文章标签： elasticsearch 数据库大数据

本文链接：https://blog.csdn.net/qq_36066553/article/details/112302897

版权

一：安装

1.1 使用docker安装

docker pull elasticsearch:7.4.2  存储可检索数据
docker pull kibana:7.4.2            可视化检索数据

mkdir -p /home/elasticsearch/config    创建配置目录
mkdir -p /home/elasticsearch/data      创建数据存储目录

echo "http.host : 0.0.0.0" >>  /home/elasticsearch/config/elasticsearch.yml  写入配置：es可以被任何机器访问

 docker run --name elasticsearch -p 9200:9200 -p 9300:9300 \
> -e ES_JAVA_OPTS="-Xms64m -Xmx128m" \
> -v 	/home/elasticsearch/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml \
> -v /home/elasticsearch/data:/usr/share/elasticsearch/data \
> -v /home/elasticsearch/plugins:/usr/share/elasticsearch/plugins \
> -d elasticsearch:7.4.2

二：检索

2.1 基础查询 match_all QueryDSL

{
  "query": {               查询条件
     "match_all": {}
   },
  "sort": [                 排序
          {
            "account_number": "asc"
          },
         {
           "balance": "desc"
         }
  ],
  "from": 10,           分页参数
  "size": 10,
  "_source": ["balance","firstname"]      只返回某些字段
}

2.2 全文检索 match查询

GET bank/_search
{
	"query": {
		"match": {
  			"account_number": 20     数字时精确查询，字符串时模糊查询。结果会按照得分倒排。
		  }
	}
}

2.3 全文检索 match_phrase 短语查询

GET bank/_search
{
	"query": {
		"match_phrase": {
  			"address": "282 Kings"
		}
	}
}

2.4 全文检索 multi_match 多字段匹配

{
	 "query": {
			"multi_match": {
 				"query": "mill",   需要包含的内容
				 "fields": ["address","firstname"]  哪些字段可以包含(任意一个字段) 是分词查询,结果按得分倒排
			}
	}
}

2.5 全文检索 bool 复合查询

GET bank/_search
{
	"query": {
		"bool": {                组合多个条件
 			 "must": [           必须满足
    			{
      				"match": {
       					 "gender": "M"
      				}
   				 },
    			{
     				"match": {
        				"address": "282"
      				}
    			}
 			 ],
 			 "must_not": [     必须不满足
    			{
      				"match": {
        				"age": 36
      				}
    			}
  			  ],
  			"should": [     应该满足(得分更高)，不满足也没事
    				{
      					"match": {
        					"lastname": "Holland"
      					}
    				}
  			],
  			"filter": {       结果过滤 不影响得分
    			"range": {    区间
      				"age": {
        				"gte": 10,
        				"lte": 20
      				}
   				 }
  			}
		}
	}
}

2.6 term 非text类型的字段用term

GET /bank/_search
{
	"query": {
		"term": {
 	 		"age": 23
		}
	}
}

三：聚合

# 搜索address中包含mill的所有人的年龄分布以及平均年龄

GET /bank/_search
{
  "query": {
    "match": {
      "address": "mill"
    }
  },
  "aggs": {                     聚合
    "ageAgg": {               自己起的名字
      "terms": {                聚合类型
        "field": "age",        聚合字段
        "size": 10
      }
    },
    "ageAvg":{
      "avg":{
        "field": "age"
      }
    },
    "balanceAvg":{
      "avg": {
        "field": "balance"
      }
    }
  }
}


## 按照年龄聚合，并且求出这些年龄段的这些人的平均薪资
GET /bank/_search
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "ageAgg": {
      "terms": {
        "field": "age",
        "size": 100
      },
      "aggs": {
        "balanceAvg": {
          "avg": {
            "field": "balance"
          }
        }
      }
    }
  }
}


## 查出所有年龄分布，并且这些年龄段中M的平均薪资和F的平均薪资以及这个年龄段的总体平均薪资
GET /bank/_search
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "ageAgg": {
      "terms": {
        "field": "age",
        "size": 100
      },
      "aggs": {
        "genderAvg": {
          "terms": {
            "field": "gender.keyword",
            "size": 10
          },
          "aggs": {
            "balanceAvg": {
              "avg": {
                "field": "balance"
              }
            }
          }
        },
        "ageBalance":{
          "avg": {
            "field": "balance"
          }
        }
      }
    }
  }
}

四：映射：类似于关系型数据库的建表并设置数据类型

4.1 创建索引并指定映射
PUT /my-index-000001   
{
 "mappings": {
   "properties": {
     "age":    { "type": "integer" },  
     "email":  { "type": "keyword"  }, 
     "name":   { "type": "text"  }     
   }
 }
}



4.2 添加新的字段映射
PUT /my-index-000001/_mapping
{
 "properties": {
   "employee-id": {         新增加的字段
     "type": "keyword",     类型
     "index": false           false:不参与检索   true:参与检索
   }
 }
}


4.3  修改已经存在的映射
    *不能修改已经存在的映射

   如果想修改映射，需要新建索引 指定好映射， 然后进行数据迁移
   
4.3.1  新建映射
PUT /newbank
{
 "mappings": {
   "properties": {
     "account_number": {
       "type": "long"
     },
     "address": {
       "type": "text"
     },
     "age": {
       "type": "integer"
     },
     "balance": {
       "type": "long"
     },
     "city": {
       "type": "keyword"
     },
     "email": {
       "type": "keyword"
     },
     "employer": {
       "type": "keyword"
     },
     "firstname": {
       "type": "text"
     },
     "gender": {
       "type": "keyword"
     },
     "lastname": {
       "type": "text",
       "fields": {
         "keyword": {
           "type": "keyword",
           "ignore_above": 256
         }
       }
     },
     "state": {
       "type": "keyword"
     }
   }
 }
}

4.3.2 数据迁移   
POST _reindex  [固定写法]
{
 "source": {           
   "index": "bank",    老索引
   "type": "account"
 },
 "dest": {             目标索引
   "index": "newbank"
 }
 
}

五：安装ik分词器

下载ik分词器，解压并上传至es的plugins目录，重启es即可

六：自定义扩展词库

   	安装nginx，把需要的扩展的分词写入文档，每个词单独占一行，用nginx映射出来，
   	然后修改ik分词器的配置文件/home/elasticsearch/plugins/ik/config/IKAnalyzer.cfg.xml
   	把文件地址写入<entry key="remote_ext_dict">http://192.168.0.107/es/fenci.txt</entry>
   	重启es

七：建表

PUT product
{
  
  "mappings": {
    "properties": {
      "skuId":{
        "type": "long"
      },
      "spuId":{
        "type": "keyword"
      },
      "skuTitle":{
        "type": "text",
        "analyzer": "ik_smart"
      },
      "skuPrice":{
        "type": "keyword"
      },
      "skuImg":{
        "type": "keyword",
        "index": false,            不需要检索
        "doc_values": false   不需要聚合
      },
      "saleCount":{
        "type": "long"
      },
      "hasStock":{
        "type": "boolean"
      },
      "brandId":{
        "type": "long"
      },
      "catalogId":{
        "type": "long"
      },
      "brandName":{
        "type": "keyword",
        "index": false,
        "doc_values": false
      },
      "brandIme":{
        "type": "keyword",
        "index": false,
        "doc_values": false
      },
      "catalogName":{
        "type": "keyword",
        "index": false,
        "doc_values": false
      },
      "attrs":{
        "type": "nested",    内部属性，嵌入式，数据不会被扁平化处理
        "properties": {
          "attrId":{
            "type": "long"
          },
          "attrName":{
            "type": "keyword",
            "index": false,
            "doc_values": false
          },
          "attrValue": {
            "type": "keyword"
          }
        }
      }
      
    }
    
  }
  
  
}

ES官方文档：https://www.elastic.co/guide/index.html
ik分词器文档：https://github.com/medcl/elasticsearch-analysis-ik
ik分词器不同版本下载：https://github.com/medcl/elasticsearch-analysis-ik/releases