ElasticSearch学习笔记
学习参考
视频参考: 狂神说elasticSarch 7.6.x
学习使用版本:elasticSearch 7.10.2
概括
1 .Elasticsearch 是一个分布式的开源全文搜索和分析引擎,适用于所有类型的数据,包括文本、数字、地理空间、结构化和非结构化数据。
2. Elasticsearch 是一个分布式的 RESTful 风格的搜索和数据分析引擎。
关系数据库对比
ES | 关系数据库 |
---|---|
index(索引) | 数据库 |
type(类型 高版本已经不用 默认’_doc’) | 表 |
document(文档·) | 行数据 |
file(字段) | 字段 |
开始软件安装
安装
elasticSearch 安装启动
- 下载
- 解压(最好不要解压在/root下面)/opt/es
启动问题
- 不支持root 用户启动 新建用户启动
- add user esuser //新建用户
- passwd esuser //设置密码
- chown -R esuser /opt/es //赋予文件夹权限
- su esuser //切换用户
- 启动报错
[1]: max file descriptors [4096] for elasticsearch process is too low, increase to at least [65535] [2]: max virtual memory areas vm.max_map_count [65530] is too low, increase to at least [262144] [3]: the default discovery settings are unsuitable for production use; at least one of [discovery.seed_hosts, discovery.seed_providers, cluster.initial_master_nodes] must be configured
- max file descriptors [4096] for elasticsearch process is too low, increase to at least [65535]
每个进程最大同时打开文件数太小
修改**/etc/security/limits.conf**文件,增加配置,用户退出后重新登录生效
* soft nofile 65536 * hard nofile 65536 * soft nproc 4096 * hard nproc 4096
- max virtual memory areas vm.max_map_count [65530] is too low, increase to at least [262144]
修改/etc/sysctl.conf文件,增加配置vm.max_map_count=262144
vi /etc/sysctl.conf sysctl -p
- the default discovery settings are unsuitable for production use; at least one of [discovery.seed_hosts,
discovery.seed_providers, cluster.initial_master_nodes] must be configured
修改 elasticsearch.yml 取消注释保留一个节点
cluster.initial_master_nodes: [“node-1”]
- max file descriptors [4096] for elasticsearch process is too low, increase to at least [65535]
测试
[root@localhost ~]# curl 192.168.56.12:9200
{
"name" : "localhost.localdomain",
"cluster_name" : "elasticsearch",
"cluster_uuid" : "_na_",
"version" : {
"number" : "7.10.2",
"build_flavor" : "default",
"build_type" : "tar",
"build_hash" : "747e1cc71def077253878a59143c1f785afa92b9",
"build_date" : "2021-01-13T00:42:12.435326Z",
"build_snapshot" : false,
"lucene_version" : "8.7.0",
"minimum_wire_compatibility_version" : "6.8.0",
"minimum_index_compatibility_version" : "6.0.0-beta1"
},
"tagline" : "You Know, for Search"
}
ES可视化工具 ES Head
官网地址 https://github.com/mobz/elasticsearch-head
git clone git://github.com/mobz/elasticsearch-head.git
cd elasticsearch-head
npm install
npm run start
open http://localhost:9100/
链接报跨域问题 修改elasticsearch.yml 重启
http.cors.enabled: true
http.cors.allow-origin: "*"
连接报错:org.elasticsearch.discovery.MasterNotDiscoveredException
修改一下 elasticsearch.yml 放开:node.name
ES可视化工具 Kibana
安装
- 下载
- 解压 修改配置
elasticsearch.hosts: ["http://192.168.56.12:9200"]
i18n.locale: "zh-CN"
- 启动 bin目录 ./kibana --allow-root
- ps -ef|grep node 查询端口占用
外网访问:
修改 config/kibaba.yml 下的 server.host 为 0.0.0.0, 默认是注释掉的或者是localhost
server.host: “0.0.0.0”
中文分词器
- 下载
- 解压到 elastic Search 安装包 plugins 中
- 重启es
简单命令学习
分词器测试
Ik analyzer:
standard:标准拆分 默认
keyword: 不拆分
ik_smart:最少切分
ik_max_word:最细粒度切分
GET _analyze
{
"analyzer": "standard",
"text": "我在学习es"
}
GET _analyze
{
"analyzer": "ik_max_word",
"text": "我在学习es"
}
#standard : 我、在、学、习、es
#keyword :我在学习es
#ik_smart: 我 、在、学习 、es
#ik_max_word:我、在学、学习、es
kibana 控制台命令
索引操作
创建索引 与字段
PUT test
{
"mappings": {
"properties": {
"name":{
"type": "text"
},
"age":{
"type": "integer"
}
}
}
}
新增字段
PUT test/_mapping
{
"properties": {
"desc":{
"type": "text"
}
}
}
插入一掉数据 指定id Put
_doc 默认为Type
PUT test/_doc/1
{
"name":"测试es学习1",
"age":10,
"desc":"这是一个描述"
}
//成功提示
// _doc 默认为Type
{
"_index" : "test",
"_type" : "_doc",
"_id" : "1",
"_version" : 1,
"result" : "created",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 0,
"_primary_term" : 1
}
创建一条数据 默认生成id Post
POST test/_doc
{
"name":"测试es学习 默认id",
"age":20,
"desc":"这是一个描述默认id"
}
删除
#删除索引
DELETE test2
#删除文档
DELETE test1/_doc/3
获取数据
GET test/_doc/1
修改数据
POST /test/_update/1
{
"doc": {
"name":"测试es学习自定义id"
}
}
查询搜索
查询所有
GET /test/_search
resrful 查询
GET /test/_search?q=name:学习
条件查询:
查询所有
GET /test/_search
或
GET /test/_search{
"query": {
"match_all": {}
}
}
单条件查询模糊 使用分词器
GET /test/_search
{
"query": {
"match": {
"name": "自定义"
}
}
}
match_phrase 不拆分 不用分词器
GET /test/_search
{
"query": {
"match_phrase": {
"name": "学习定义"
}
}
}
精准查询 term 使用默认分词器 ’学习无法查询到‘
GET /test/_search
{
"query": {
"term": {
"name": "学"
}
}
}
多条件 and 查询 bool must
GET /test/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"name":"自定义"
}
},
{
"match": {
"age": "10"
}
}
]
}
}
}
多条件 or 查询 bool must
GET /test/_search
{
"query": {
"bool": {
"should": [
{
"match": {
"name":"自定义"
}
},
{
"match": {
"age": "20"
}
}
]
}
}
}
分页 from 开始 size:
GET /test/_search
{
"query": {
"match": {
"name": "学习"
}
},
"from": 0,
"size": 3
}
范围过滤 filter
GET /test/_search
{
"query": {
"bool": {
"should": [
{
"match": {
"name":"自定义"
}
},
{
"match": {
"age": "20"
}
}
],
"filter": [
{
"range": {
"age": {
"gte": 10,
"lte": 30
}
}
}
]
}
}
}
排序 sort
GET /test/_search
{
"query": {
"match": {
"name": "学习定义"
}
},
"from": 0,
"size": 3,
"sort": [
{
"age": {
"order": "desc"
}
}
]
}
fuzzy 更具词查询 分词单个词
GET /test/_search
{
"query": {
"fuzzy": {
"name":"测试"
}
}
}
指定返回字段 _source
GET /test/_search
{
"query": {
"match": {
"name": "学习"
}
},
"from": 0,
"size": 3,
"_source": ["name"]
}
聚合
数据
PUT /movie/_doc/1
{ "id":1,
"name":"红海行动",
"doubanScore":8.5,
"age":10,
"actorList":[
{"id":1,"name":"张译"},
{"id":2,"name":"海清"},
{"id":3,"name":"张涵予"}
]
}
PUT /movie/_doc/2
{
"id":2,
"name":"湄公河行动",
"doubanScore":8.0,
"age":10,
"actorList":[
{"id":3,"name":"张涵予"}
]
}
PUT /movie/_doc/3
{
"id":3,
"name":"红海事件",
"doubanScore":5.0,
"age":20,
"actorList":[
{"id":4,"name":"张晨"}
]
}
最大最下平均值
//最小 min
//最大 max
// 平均avg
GET /movie/_search
{
"aggs": {
"max_score": {
"min": {
"field": "doubanScore"
}
}
}
}
GET /movie/_search
{
"query": {
"match": {
"name": "行动"
}
},
"aggs": {
"max_score": {
"min": {
"field": "doubanScore"
}
}
}
}
统计 stats
GET /movie/_search
{
"aggs": {
"max_score": {
"stats": {
"field": "doubanScore"
}
}
}
}
"aggregations" : {
"max_score" : {
"count" : 3,
"min" : 5.0,
"max" : 8.5,
"avg" : 7.166666666666667,
"sum" : 21.5
}
}
每个人参演作品
GET /movie/_search
{
"aggs": {
"actor_name": {
"terms": {
"field":"actorList.name.keyword"
}
}
}
}
//部分结果
"aggregations" : {
"actor_name" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "张涵予",
"doc_count" : 2
},
{
"key" : "张晨",
"doc_count" : 1
},
{
"key" : "张译",
"doc_count" : 1
},
{
"key" : "海清",
"doc_count" : 1
}
]
}
}
ES 整合springboot
依赖
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
注意修改版本号与安装不版本相同
<properties>
<elasticsearch.version>7.10.2</elasticsearch.version>
</properties>
配置客户端
根据官方文档 推荐使用
package com.example.dc.config;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.data.elasticsearch.client.ClientConfiguration;
import org.springframework.data.elasticsearch.client.RestClients;
import org.springframework.data.elasticsearch.config.AbstractElasticsearchConfiguration;
@Configuration
public class ElasticSearchConfig extends AbstractElasticsearchConfiguration {
//配置文件配置 只需 ip:端口
@Value("${es.url}")
String esUrl;
//springboot 官方文档
@Override
@Bean("restHighLevelClient")
public RestHighLevelClient elasticsearchClient() {
String[] split = esUrl.split(",");
ClientConfiguration clientConfiguration = ClientConfiguration.builder().connectedTo(split).build();
RestHighLevelClient rest = RestClients.create(clientConfiguration).rest();
return rest;
}
// 或者 kibana 文档
// @Bean
// public RestHighLevelClient restHighLevelClient() {
// RestHighLevelClient client = new RestHighLevelClient(
// RestClient.builder(
// new HttpHost("127.0.0.1", 9200, "http")
// )
// );
// return client;
// }
}
索引操作
//创建索引
@Test
public void test1() throws IOException {
CreateIndexRequest createIndexRequest = new CreateIndexRequest();
createIndexRequest.index("jb-book");
// createIndexRequest.mapping()
restHighLevelClient.indices().create(createIndexRequest,RequestOptions.DEFAULT);
}
//查询所以存在
@Test
public void getIndex() throws IOException {
GetIndexRequest request = new GetIndexRequest("jb-book");
boolean exists = restHighLevelClient.indices().exists(request, RequestOptions.DEFAULT);
System.out.println(exists);
}
//删除索引
@Test
public void deleteIndex() throws IOException {
DeleteIndexRequest request = new DeleteIndexRequest("jb-book");
AcknowledgedResponse delete = restHighLevelClient.indices().delete(request, RequestOptions.DEFAULT);
System.out.println(JSONObject.toJSONString(delete));
}
//新加测试数据
@Test
public void test() throws IOException {
IndexRequest request = new IndexRequest("jb-book1");
// request.id("1");
StoreDto storeDto = new StoreDto();
storeDto.setName("dc");
storeDto.setIcon("111");
String json = JSONObject.toJSONString(storeDto);
request.source(json, XContentType.JSON);
IndexResponse index = restHighLevelClient.index(request, RequestOptions.DEFAULT);
System.out.println(JSONObject.toJSONString(index));
}
// id得到 数据
@Test
public void getIndexData() throws IOException {
GetRequest getRequest = new GetRequest("jb-book1","NPGLQncB8YQFXPeZ9kN7");
GetResponse documentFields = restHighLevelClient.get(getRequest, RequestOptions.DEFAULT);
System.out.println(JSONObject.toJSONString(documentFields));
}
//修改数据
@Test
public void updateData() throws IOException {
UpdateRequest updateRequest = new UpdateRequest("jb-book1","NPGLQncB8YQFXPeZ9kN7");
StoreDto storeDto = new StoreDto();
storeDto.setName("112");
storeDto.setIcon("");
updateRequest.doc(JSON.toJSONString(storeDto),XContentType.JSON);
UpdateResponse update = restHighLevelClient.update(updateRequest,RequestOptions.DEFAULT);
System.out.println(JSONObject.toJSONString(update));
}
//批量
@Test
public void batchData() throws IOException {
BulkRequest bulkRequest = new BulkRequest();
List<StoreDto> list = new ArrayList<>();
for (int i = 10; i <20 ; i++) {
StoreDto storeDto = new StoreDto();
storeDto.setName("批量"+i);
storeDto.setIcon("qqwww"+i);
list.add(storeDto);
}
for (StoreDto storeDto : list) {
IndexRequest request = new IndexRequest("jb-book1");
request.source(JSON.toJSONString(storeDto),XContentType.JSON);
bulkRequest.add(request);
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
// BulkItemResponse[] items = bulk.getItems();
// for (BulkItemResponse item : items) {
// item.getId();
//
// }
System.out.println(JSONObject.toJSONString(bulk));
}
简单查询
简单爬取京东数据
依赖
<!-- 解析网页-->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>
方法
public List<BookVo> jiexi(String keyWord) throws IOException {
String url = "https://search.jd.com/Search?keyword="+keyWord;
Document parse = Jsoup.parse(new URL(url),30000);
Element j_goodsList = parse.getElementById("J_goodsList");
Elements li = j_goodsList.getElementsByTag("li");
List<BookVo> bookVos = new ArrayList<>();
for (Element el : li) {
String image = el.getElementsByTag("img").eq(0).attr("data-lazy-img");
String price = el.getElementsByClass("p-price").eq(0).text();
String title = el.getElementsByClass("p-name").eq(0).text();
BookVo bookVo = new BookVo();
bookVo.setTitle(title);
bookVo.setPrice(price);
bookVo.setImg(image);
bookVos.add(bookVo);
}
return bookVos;
}
bookVo
@Data
public class BookVo {
private String title;
private String price;
private String img;
}
查询爬取数据 初始化
@RequestMapping("/init")
public void bookVoList (String keyWord) throws IOException {
//爬取数据
List<BookVo> jiexi = htmlPageUtil.jiexi(keyWord);
BulkRequest bulkRequest = new BulkRequest();
for (BookVo bookVo : jiexi) {
IndexRequest request = new IndexRequest("jd-book");
request.source(JSONObject.toJSONString(bookVo), XContentType.JSON);
bulkRequest.add(request);
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
System.out.println(!bulk.hasFailures());
}
查询
通过ElasticsearchRepository<T, ID> 的Jpa操作
创建es实体
必须要有主键@id
@Document(indexName = “jd-book”) es包注解 绑定索引
@Data
@Document(indexName = "jd-book")
public class BookVo {
@Id
private String id;
private String title;
private String price;
private String img;
}
创建接口实体继承ElasticsearchRepository
public interface BookVoRepository extends ElasticsearchRepository<BookVo,String> {
List<BookVo> findByTitle(String title);
}
开启EnableElasticsearchRepositories
basePackages :es实体包地址
@SpringBootApplication
@EnableElasticsearchRepositories(basePackages = "com.example.dc.es")
public class DcApplication {
public static void main(String[] args) {
SpringApplication.run(DcApplication.class, args);
}
}
使用
@Autowired
BookVoRepository bookVoRepository;
@Test
public void repositoryTest(){
List<BookVo> java = bookVoRepository.findByTitle("java");
System.out.println(JSONObject.toJSONString(java));
}
@Test
public void repositorySave(){
BookVo bookVo = new BookVo();
bookVo.setTitle("bookVoRepository 保存测试");
bookVo.setPrice("100");
bookVo.setImg("测试");
//保存
BookVo save = bookVoRepository.save(bookVo);
System.out.println(JSONObject.toJSONString(save));
//id查询
BookVo bookVo1 = bookVoRepository.findById(save.getId()).get();
System.out.println(JSONObject.toJSONString(bookVo1));
//排序字段一般未keyword 不然会报错
//Sort sort = Sort.by(Sort.Direction.DESC,"price.keyword");
//PageRequest pageRequest = PageRequest.of(0,20,sort); //分页
PageRequest pageRequest = PageRequest.of(0,20);
Page<BookVo> all = bookVoRepository.findAll(pageRequest);
List<BookVo> content = all.getContent();
System.out.println("总条数:"+all.getTotalElements());
System.out.println(JSONObject.toJSONString(content));
}
原生ES 查询
普通查询
@Test
public void EsSearch() throws IOException {
SearchRequest searchRequest = new SearchRequest();
searchRequest.indices("jd-book");
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
//查询所有
//MatchAllQueryBuilder builder = QueryBuilders.matchAllQuery();
//MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery("title", "java");
//范围查询
//RangeQueryBuilder title = QueryBuilders.rangeQuery("title").gt("12");
TermQueryBuilder builder = QueryBuilders.termQuery("title", "java");
sourceBuilder.query(builder);
//排序
sourceBuilder.sort("price.keyword",SortOrder.DESC);
//分页
sourceBuilder.from(2);
sourceBuilder.size(10);
//高亮
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("title");
highlightBuilder.preTags("<span style='color:red'>");
highlightBuilder.postTags("</span>");
//查询字段高亮
highlightBuilder.requireFieldMatch(false);
sourceBuilder.highlighter(highlightBuilder) ;
searchRequest.source(sourceBuilder);
SearchResponse search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
//得到数据
SearchHit[] hits = search.getHits().getHits();
List<Map<String,Object>> maps = new ArrayList<>();
for (SearchHit hit : hits) {
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
//替换高亮
Map<String, HighlightField> highlightFields = hit.getHighlightFields();
HighlightField title1 = highlightFields.get("title");
if(null!=title1){
Text[] fragments = title1.fragments();
String text ="";
for (Text fragment : fragments) {
text+=fragment;
}
sourceAsMap.put("title",text);
}
maps.add(sourceAsMap);
}
System.out.println(JSONObject.toJSONString(maps));
}
聚合
@Test
public void aggrTest() throws IOException {
SearchRequest searchRequest = new SearchRequest();
searchRequest.indices("movie");
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
//统计字段数量
// ValueCountAggregationBuilder field = AggregationBuilders.count("age1").field("age");
// 去重查询
CardinalityAggregationBuilder field = AggregationBuilders.cardinality("age1").field("age");
//过滤统计
// FilterAggregationBuilder field = AggregationBuilders.filter("age1",
// QueryBuilders.rangeQuery("age").gt(10));
//分组统计个数
// TermsAggregationBuilder field = AggregationBuilders.terms("age1").field("age");
//求值统计
// AggregationBuilders.avg()
// AggregationBuilders.min()
// SumAggregationBuilder field = AggregationBuilders.sum("age1").field("age");
//StatsAggregationBuilder field = AggregationBuilders.stats("age1").field("age");
//聚合获取数据
//FilterAggregationBuilder field = AggregationBuilders.filter("age1",
// QueryBuilders.rangeQuery("age").gt(10))
// .subAggregation(AggregationBuilders.topHits("top_data"));
sourceBuilder.aggregation(field);
searchRequest.source(sourceBuilder);
SearchResponse search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
Aggregations aggregations = search.getAggregations();
System.out.println(JSONObject.toJSONString(aggregations));
}