ES学习

ES学习

ES安装

jdk必须是1.8或以上

ES是基于java开发的 java核心的jar包必须一致

ES默认端口:9200

head默认端口:9300

kibana默认端口:5601

1.启动

cnpm install
npm run start

设置跨域:

http.cors.enabled: true
http.cors.allow-origin: "*"

ik分词器

ik_smart和ik_max_word,ik_smart是最少切分,ik_max_word是最细粒度划分。

GET _analyze
{
  "analyzer": "ik_smart",
  "text": "中国共产党"
}
GET _analyze
{
  "analyzer": "ik_max_word",
  "text": "中国共产党"
}

ik分词器增加自己的配置

在IKAnalyzer.cfg.xml中加入自己写的dic配置文件注意编码格式要是utf-8才会生效中文

Rest风格

基本Rest命令

methodurl地址描述
PUTlocalhost:9200/索引名字/类型名字/文档id创建文档(制定文档ID)
POSTlocalhost:9200/索引名称/类型名称创建文档(随机ID)
POSTlocalhost:9200/索引名称/类型名称/文档id/_Update修改文档
DELETElocalhost:9200/索引名称/类型名称/文档id删除文档
GETlocalhost:9200/索引名称/类型名称/文档id通过文档ID查询文档
POSTlocalhost:9200/索引名称/类型名称/_search查询所有数据

基本数据类型:

字符串类型:

text,keyword。

数值类型

long,integer,short,byte,double,float,half float,scaled float

日期类型

date

布尔类型

boolean

二进制类型

binart

等等

1.创建索引(新版以后type默认为_doc)

PUT /test1/type1/1
{
  "name":"高帆",
  "age":""
}

1.1指定字段类型创建索引规则

PUT /test2
{
  "mappings": {
    "properties": { 
    "name":{
      "type": "text"
    },
    "age":{
      "type": "integer"
    },
    "brith":{
      "type": "date"
    
    }
    }
  }
}

2.获得规则

GET请求获得索引信息

GET test2
//获取默认信息
GET _cat/indices?v

3.修改值第一种方法

PUT /test3/_doc/1
{
  "name":"123",
  "age":12,
  "birth": "1999-7-7"
}
## 修改后返回值
{
  "_index" : "test3",
  "_type" : "_doc",
  "_id" : "1",
    ##版本号增加了
  "_version" : 3,
    ##状态为update
  "result" : "updated",
  "_shards" : {
    "total" : 2,
    "successful" : 1,
    "failed" : 0
  },
  "_seq_no" : 2,
  "_primary_term" : 1
}

3.1修改值第二种方法

POST /test3/_doc/1/_update
{
  "doc":{
    "name":"李四"
  }
}

4.删除索引

DELETE /test1

关于文档的操作

基本操作:

简单查询

/_search 代表查询 q代表搜索 name代表字段,gf代表查询的值
GET gf/user/_search?q=name:gf

复杂操作查询(排序,分页,高亮,模糊查询,精准查询):

查询

GET gf/user/_search
{
  //查询参数体 向mysql里的where
  "query":{
    "match": {
      "name": "高帆"
    }
  },
  //需要查询的列,mysql查询后跟上列名称
  "_source": ["name","desc"],
  //sort 进行排序 
  "sort":[{
      //排序字段
    "age":{
        //排序方式 desc降序,asc升序
      "order":"desc"
    }
  }
 ],
 //分页参数,form相当于index,size:一页多少条数据
 "from":0,
 "size":1
}


{
  "took" : 528,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 2,
      "relation" : "eq"
    },
    "max_score" : 1.5884793,
      //hits 索引和文档信息,查询结果总数 ,和一些基本信息
    "hits" : [
      {
        "_index" : "gf",
        "_type" : "user",
        "_id" : "1",
           //score,匹配度越高分值越高 
        "_score" : 1.5884793,
        "_source" : {
          "name" : "高帆12"
        }
      },
      {
        "_index" : "gf",
        "_type" : "user",
        "_id" : "4",
        "_score" : 1.5884793,
        "_source" : {
          "name" : "高帆21"
        }
      }
    ]
  }
}

多条件查询

match and操作


GET gf/user/_search
{

  "query":{
  //多条件查询 must 相当于sql中的and  where name=“gf” and age=“1”
    "bool": {
      "must": [
        {
      "match": {
          "name": "高帆"
      }
        },
        {
          "match": {
            "age": 1
          }
        }
      ]
    }
  },
  "_source": ["name","desc","age"],
  "sort": [{
    "age":{
      "order":"desc"
    }
  }
  ]
}


should or操作


GET gf/user/_search
{

  "query":{
    "bool": {
      //多条件查询 must 相当于sql中的or  where name=“gf” or age=“1”
      "should": [
        {
      "match": {
          "name": "高帆"
      }
        },
        {
          "match": {
            "age": 1
          }
        }
      ]
    }
  },
  "_source": ["name","desc","age"],
  "sort": [{
    "age":{
      "order":"desc"
    }
  }
  ]
}


must_not 不等于


GET gf/user/_search
{

  "query":{
    "bool": {
        //不等于操作 where age <>1
      "must_not": [
        {
          "match": {
            "age": 1
          }
        }
      ]
    }
  },
  "_source": ["name","desc","age"],
  "sort": [{
    "age":{
      "order":"desc"
    }
  }
  ]
}


fiter 过滤器

GET gf/user/_search
{

  "query":{
    "bool": {
      "must": [
        {
          "match": {
            "name": "高帆"
          }
        }
      ],
        //过滤器 过滤 名字大于(gt)1并且小于(lt)10的数据
        //gt 大于,gte 大于等于,lt 小于,lte:小于等于
      "filter": [
        {
          "range": {
            "age": {
              "lt": 10,
              "gt": 1
            }
          }
        }
      ]
    }
  },
  "_source": ["name","desc","age"],
  "sort": [{
    "age":{
      "order":"desc"
    }
  }
  ]
}


匹配多个条件

GET gf/user/_search
{

  "query":{
  
          "match": {
              //多个条件用空格隔开只要满足其中一个结果就可以查询所有出来取分值最高的就可以了
            "tags": "大 游"
          }
  }
}


精确查找

term查询是直接通过倒排索引指定的词条进程精确查找

term,直接查询准确的不使用分词器,

match,会使用分词器解析

两个类型

text:会被拆开查询如like

keyWord:不会被拆开查询如=

GET testdb/_search
{
  "query": {
    "term": {
        //desc 为 keyWord查询的是精准的
      "desc":"张三在唱歌 desc"
    }
  }
}
//查询结果
{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 0.6931471,
    "hits" : [
      {
        "_index" : "testdb",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 0.6931471,
        "_source" : {
          "name" : "张三在唱歌 name",
          "desc" : "张三在唱歌 desc"
        }
      }
    ]
  }
}
//使用text查询
GET testdb/_search
{
  "query": {
    "term": {
      "name":"张"
    }
  }
}
//查询结果
{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 2,
      "relation" : "eq"
    },
    "max_score" : 0.18232156,
    "hits" : [
      {
        "_index" : "testdb",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 0.18232156,
        "_source" : {
          "name" : "张三在唱歌 name",
          "desc" : "张三在唱歌 desc"
        }
      },
      {
        "_index" : "testdb",
        "_type" : "_doc",
        "_id" : "2",
        "_score" : 0.18232156,
        "_source" : {
          "name" : "张三在唱歌 name",
          "desc" : "张三在唱歌 desc2"
        }
      }
    ]
  }
}

精确查询多个值

GET testdb/_search
{
  "query": {
    "bool": {
      "should": [
        {
          "term": {
            "desc":"张三在唱歌 desc"
          }
        },
        {
          "term": {
            "desc":"张三在唱歌 desc2"
          }
        }
      ]
    }
  }
}

高亮查询

GET testdb/_search
{
  "query": {
    "match": {
      "name":"张"
    }
  },
   //高亮查询
  "highlight": {
    //自定义高亮标签
    "pre_tags": "<p class='key' style='color:red'>", 
    //结束标签
    "post_tags": "</p>", 
    //高亮字段选择
    "fields": { 
    "name":{}
    }
  }
}

高级客户端索引api

配置es

@Configuration
public class ElasticSearchConfig {
    @Bean
    public RestHighLevelClient restHighLevelClient() {
        return new RestHighLevelClient(
                RestClient.builder(
                        new HttpHost("localhost", 9200, "http")
                )
        );
    }
}

创建索引(表)

//创建索引请求 
CreateIndexRequest gaofan = new CreateIndexRequest("gaofan");
//执行创建请求IndicesClient
CreateIndexResponse createIndexResponse = restHighLevelClient.indices().create(gaofan, RequestOptions.DEFAULT);
System.out.println(createIndexResponse);

判断索引是否存在

//获取索引是否存在
@Test
void testExistIndex() throws IOException {
    GetIndexRequest getIndexRequest = new GetIndexRequest("gaofan2");
    //判断索引是否存在
    boolean exists = restHighLevelClient.indices().exists(getIndexRequest, RequestOptions.DEFAULT);
    System.out.println(exists);
}

删除索引

//删除索引
@Test
void testDeleteIndex() throws IOException {
    DeleteIndexRequest deleteIndexRequest = new DeleteIndexRequest("ilm-history-2-000001");
    AcknowledgedResponse delete = restHighLevelClient.indices().delete(deleteIndexRequest, RequestOptions.DEFAULT);
    System.out.println(delete.isAcknowledged());
}

添加文档

//添加文档
@Test
void testAddDocument() throws IOException {
    ObjectMapper objectMapper = new ObjectMapper();
    //创建对象
    User user = new User("gf", 1);
    //创建请求
    IndexRequest request = new IndexRequest("gaofan");
    //创建规则 PUT/gaofan/_doc/1
    request.id("1");
    //过期时间1s
    request.timeout(TimeValue.timeValueSeconds(1));
    //放入我们的数据json格式
    request.source(objectMapper.writeValueAsString(user), XContentType.JSON);
    //客户端发送请求获取响应结果
    IndexResponse index = restHighLevelClient.index(request, RequestOptions.DEFAULT);
    System.out.println(index.toString());
    System.out.println(index.status());//对应我们命令返回的状态

}

判断文档是否存在

@Test
void testIsExists() throws IOException {
    GetRequest getRequest = new GetRequest("gaofan", "1");
    //不获取返回的_source的上下文
    getRequest.fetchSourceContext(new FetchSourceContext(false));
    getRequest.storedFields("_none_");
    boolean exists = restHighLevelClient.exists(getRequest, RequestOptions.DEFAULT);
    System.out.println(exists);
}

获取文档消息

//获取文档消息
@Test
void testGetDocument() throws IOException {
    GetRequest getRequest = new GetRequest("gaofan", "1");
    GetResponse documentFields = restHighLevelClient.get(getRequest, RequestOptions.DEFAULT);
    //打印文档内容
    System.out.println(documentFields.getSourceAsString());
    //打印全部内容
    System.out.println(documentFields);
}

更新文档

@Test
void testUpdateDocument() throws IOException {
    ObjectMapper objectMapper = new ObjectMapper();
    UpdateRequest updateRequest = new UpdateRequest("gaofan", "1");
    updateRequest.timeout(TimeValue.timeValueSeconds(1));
    User user = new User("张三", 12);
    updateRequest.doc(objectMapper.writeValueAsString(user), XContentType.JSON);
    UpdateResponse update = restHighLevelClient.update(updateRequest, RequestOptions.DEFAULT);
    System.out.println(update.status());
}

删除文档

//删除文档
@Test
void testDeleteDocument() throws IOException {
    DeleteRequest deleteRequest = new DeleteRequest("gaofan", "1");
    deleteRequest.timeout("1s");
    DeleteResponse delete = restHighLevelClient.delete(deleteRequest, RequestOptions.DEFAULT);
    System.out.println(delete.status());
}

批量插入

//批量插入
@Test
void testBulkRequest() throws IOException {
    //转换json使用
    ObjectMapper objectMapper = new ObjectMapper();
    BulkRequest bulkRequest = new BulkRequest();
    bulkRequest.timeout("10s");
    List<User> users = new ArrayList<>();
    users.add(new User("张三", 1));
    users.add(new User("李四", 2));
    users.add(new User("王五", 3));
    users.add(new User("刘六", 4));
    for (int i = 0; i < users.size(); i++) {
        //批量更新和批类删除一样的操作修改对应的请求
        bulkRequest.add(new IndexRequest("gaofan")
                .id(i + 1 + "").source(objectMapper.writeValueAsString(users.get(i)), XContentType.JSON));

    }
    BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
    //是否失败返回false代表成功
    System.out.println(bulk.hasFailures());
}

查询

//查询
@Test
void testSearch() throws IOException {
    ObjectMapper objectMapper=new ObjectMapper();
    SearchRequest searchRequest = new SearchRequest("gaofan");
    //构建搜索条件
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    //构建查询(查询条件)
    //QueryBuilders 工具类
    //termQuery 精确查询
    //QueryBuilders.matchAllQuery() 匹配所有 加上.keyword避免搜索不到中文
    TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("name.keyword", "张三");
    searchSourceBuilder.query(termQueryBuilder);
    //设置超时
    searchSourceBuilder.timeout(TimeValue.timeValueSeconds(60));
    //分页有默认值
    searchSourceBuilder.size();
    searchSourceBuilder.from();
    searchRequest.source(searchSourceBuilder);
    SearchResponse search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
    //所有内容都在Hits里
    System.out.println(objectMapper.writeValueAsString(search.getHits()));
    System.out.println("-----------------------------");

    for (SearchHit hit : search.getHits().getHits()) {
        System.out.println(hit.getSourceAsMap());
    }
}

实战模仿jd搜索

爬虫

导入解析网页的包

 <!--解析网页-->
        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.10.2</version>
        </dependency>
package com.gf.utils;

import com.gf.pojo.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;

import java.net.URL;
import java.util.ArrayList;
import java.util.List;

/**
 * 解析网页
 *
 * @author Administrator
 */
@Component
public class HtmlParseUtil {
    public List<Content> parseJd(String keyWords) throws Exception {
        //获取请求 https://search.jd.com/Search?keyword=java
        String url = "https://search.jd.com/Search?keyword=" + keyWords;
        //解析网页  (document就是js页面对象)所有js中的方法都能用
        Document document = Jsoup.parse(new URL(url), 30000);
        Element element = document.getElementById("J_goodsList");
        //获取li元素
        Elements li = element.getElementsByTag("li");
        ArrayList<Content> goodsList = new ArrayList<>();
        //获取元素中的内容
        for (Element el : li) {
            Content content = new Content();
            //如果有懒加载模式就找到source-data-lazy-img 这个节点获取
            //图片
            String img = el.getElementsByTag("img").eq(0).attr("src");
            //价格
            String price = el.getElementsByClass("p-price").eq(0).text();
            //标题
            String title = el.getElementsByClass("p-name").eq(0).text();
            content.setImg(img);
            content.setTitle(title);
            content.setPrice(price);
            goodsList.add(content);
        }
        return goodsList;
    }
}

将爬虫爬到的数据放入ES并搜索

package com.gf.service;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.gf.pojo.Content;
import com.gf.utils.HtmlParseUtil;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.stereotype.Service;

import javax.annotation.Resource;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;

/**
 * @author Administrator
 */
@Service
public class ContentService {
    @Resource
    private RestHighLevelClient restHighLevelClient;
    @Resource
    private HtmlParseUtil htmlParseUtil;

    /**
     * 爬取jd数据放入es
     *
     * @param keyWords
     * @return
     * @throws Exception
     */
    public Boolean parseContent(String keyWords) throws Exception {
        ObjectMapper objectMapper = new ObjectMapper();
        List<Content> contents = htmlParseUtil.parseJd(keyWords);
        //批量插入es
        BulkRequest bulkRequest = new BulkRequest();
        //设置超时时间
        bulkRequest.timeout("2m");
        for (Content content : contents) {
            bulkRequest.add(new IndexRequest("jd-goods").source(objectMapper.writeValueAsString(content), XContentType.JSON));
        }
        BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
        return !bulk.hasFailures();
    }

    /**
     * 查询
     *
     * @param keywords
     * @param pageNo
     * @param pageSize
     * @return
     * @throws IOException
     */
    public List<Map<String, Object>> searchPage(String keywords, int pageNo, int pageSize) throws IOException {
        if (pageNo <= 1) {
            pageNo = 1;
        }
        //条件搜索
        SearchRequest searchRequest = new SearchRequest("jd-goods");
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        //分页
        searchSourceBuilder.from(pageNo);
        searchSourceBuilder.size(pageSize);
        //精准匹配
        TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keywords);
        searchSourceBuilder.query(termQueryBuilder);
        searchSourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
        //构建高亮
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        //要高亮的字段
        highlightBuilder.field("title");
        //是否多个高亮显示
        highlightBuilder.requireFieldMatch(false);
        //标签
        highlightBuilder.preTags("<span style='color:red'>");
        highlightBuilder.postTags("</span>");
        searchSourceBuilder.highlighter(highlightBuilder);
        //查询
        searchRequest.source(searchSourceBuilder);
        SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        //解析结果
        ArrayList<Map<String, Object>> mapArrayList = new ArrayList<>();
        for (SearchHit hit : searchResponse.getHits().getHits()) {
            //解析高亮的字段
            Map<String, HighlightField> highlightFields = hit.getHighlightFields();
            HighlightField title = highlightFields.get("title");
            Map<String, Object> sourceAsMap = hit.getSourceAsMap();
            if (title != null) {
                Text[] fragments = title.fragments();
                StringBuilder newTitle = new StringBuilder();
                for (Text fragment : fragments) {
                    newTitle.append(fragment);
                }
                sourceAsMap.put("title", newTitle);
            }
            mapArrayList.add(hit.getSourceAsMap());
        }
        return mapArrayList;
    }
}
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值