springboot整合elasticsearch进行基本操作,将爬取的京东搜索信息批量插入到elasticsearch中并进行查询与高亮查询。

springboot整合elasticsearch进行基本操作,并将爬取的京东搜索信息批量插入到elasticsearch中并进行查询与高亮查询。

1. 创建项目引入elasticsearch依赖(注意版本要一致)

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.70</version>
        </dependency>

2. 编写elasticsearch连接配置文件

@Configuration
public class elasticsearchConfig {
    @Bean
    public RestHighLevelClient restHighLevelClient(){
        RestHighLevelClient client = new RestHighLevelClient(
                RestClient.builder(new HttpHost("127.0.0.1",9200,"http")));
        return client;
    }
}

3. 在测试类中进行测试

  • 创建索引
@SpringBootTest
class EsApiApplicationTests {
    @Autowired
    private RestHighLevelClient restHighLevelClient;

 //创建索引
    @Test
    void createTest() throws IOException {
//创建索引请求
        CreateIndexRequest indexRequest = new CreateIndexRequest("jd_db");
//客户端执行请求,请求后获得响应
        CreateIndexResponse createIndexResponse =  restHighLevelClient.indices().create(indexRequest, RequestOptions.DEFAULT);

        System.out.println(createIndexResponse);
        }
    }
  • 获取索引
//    获取索引
    @Test
    void getTest() throws IOException {
    
        GetIndexRequest getIndexRequest = new GetIndexRequest("db_test");
        
        boolean exists = restHighLevelClient.indices().exists(getIndexRequest, RequestOptions.DEFAULT);
        
        System.out.println(exists);
    }
  • 删除索引
//    删除索引
    @Test
    void deleteTest() throws IOException{

        DeleteIndexRequest deleteRequest = new DeleteIndexRequest("db_test");

        AcknowledgedResponse acknowledgedResponse  = restHighLevelClient.indices().delete(deleteRequest,RequestOptions.DEFAULT);

        System.out.println(acknowledgedResponse);
    }
  • 向文档插入数据
//    操作文档
    @Test
    void addDocument() throws IOException {
//创建对象
        Book book = new Book("java",12);
//创建请求
        IndexRequest indexRequest = new IndexRequest("db_test");
//操作的id
        indexRequest.id("1");
//        设置超时时间
        indexRequest.timeout("2s");
//        将数据转成json并放入请求
        indexRequest.source(JSON.toJSONString(book), XContentType.JSON);
//        向客户端发送请求
        IndexResponse indexResponse = restHighLevelClient.index(indexRequest,RequestOptions.DEFAULT);

        System.out.println(indexResponse.toString());
        
        System.out.println(indexResponse.status());
    }
  • 判断文档是否储存在
//    获取文档判断是否存在\
    @Test
    void exitsDcoument() throws IOException {
        GetRequest getRequest = new GetRequest("db_test","1");
//        不获取返回的上下文
        getRequest.fetchSourceContext(new FetchSourceContext(false)); 
         
        getRequest.storedFields("_none_");
        
        boolean exists = restHighLevelClient.exists(getRequest, RequestOptions.DEFAULT);

        System.out.println(exists);

    }
  • 获取文档信息
//    获取文档信息
    @Test
    void getDocument() throws IOException {
        GetRequest getRequest = new GetRequest("db_test","1");

        GetResponse getResponse = restHighLevelClient.get(getRequest, RequestOptions.DEFAULT);

        System.out.println(getResponse.getSourceAsString());  //打印文档信息

        System.out.println(getResponse);
    }

  • 更新文档信息
//    更新文档信息
    @Test
    void updateDocument() throws IOException {
        UpdateRequest updateRequest = new UpdateRequest("db_test","1");

        Book book = new Book("C++",13);

        updateRequest.doc(JSON.toJSONString(book),XContentType.JSON);

        UpdateResponse update = restHighLevelClient.update(updateRequest, RequestOptions.DEFAULT);

        System.out.println(update.status());
    }
  • 删除文档信息
//    删除文档信息

    @Test
    void deleteDocument() throws IOException {
        DeleteRequest deleteRequest = new DeleteRequest("db_test","1");

        DeleteResponse deleteResponse = restHighLevelClient.delete(deleteRequest, RequestOptions.DEFAULT);

        System.out.println(deleteResponse.status());

    }
  • 批量插入数据
//    批量插入数据
    @Test
    void bulkDocument() throws IOException {

        BulkRequest bulkRequest = new BulkRequest();

        ArrayList arrayList = new ArrayList();

        arrayList.add(new Book("python",55));
        arrayList.add(new Book("php",53));
        arrayList.add(new Book("Go",34));

        for (int i = 0; i < arrayList.size(); i++) {
            bulkRequest.add(
                    new IndexRequest("db_test").id(""+i+1).source(JSON.toJSONString(arrayList.get(i)),XContentType.JSON)
            );
        }

//        客户端发送请求
        BulkResponse itemResponses = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);

        System.out.println(itemResponses.hasFailures());

    }
  • 查询文档信息
//    查询
    @Test
    void searchDocument() throws IOException {

//        查询请求
        SearchRequest searchRequest = new SearchRequest();
//        构建查询请求
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();

        TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("name", "php");

        searchSourceBuilder.query(termQueryBuilder);

        searchRequest.source(searchSourceBuilder);

        SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

        System.out.println(JSON.toJSONString(searchResponse.getHits()));

        System.out.println("------------------------------------");

        for (SearchHit searchHit: searchResponse.getHits().getHits()) {
            System.out.println(searchHit.getSourceAsMap());
        }
        
    }

4. 将爬取的京东搜索信息批量插入到elasticsearch中并进行查询与高亮查询

  • 引入依赖
        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.11.2</version>
        </dependency>
        
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <optional>true</optional>
        </dependency>
        
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.70</version>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
        </dependency>

  • 编写实体类
@Data
@AllArgsConstructor
@NoArgsConstructor
public class Content {

    private String img;
    private String price;
    private String pname;
}
  • 编写html解析工具类
@Component
public class HtmlParseUtil {


    public static void main(String[] args) throws IOException {

        new HtmlParseUtil().getHtmlByJd("java").forEach(System.out::println);
        
    }


    public  ArrayList<Content> getHtmlByJd(String keyword) throws IOException {

        ArrayList<Content> arrayList = new ArrayList<>();

        //https://search.jd.com/Search?keyword=java

        String url = "https://search.jd.com/Search?keyword="+keyword;

//        解析网页
        Document document = Jsoup.parse(new URL(url), 30000);


        Element goodsList = document.getElementById("J_goodsList");

        Elements goodsListElementsByTag = goodsList.getElementsByTag("li");

        for (Element el:goodsListElementsByTag) {

            String img = el.getElementsByTag("img").eq(0).attr("data-lazy-img");

            String price = el.getElementsByClass("p-price").eq(0).text();

            String pname = el.getElementsByClass("p-name").eq(0).text();
            

            Content content = new Content();
            content.setImg(img);
            content.setPrice(price);
            content.setPname(pname);

            arrayList.add(content);


        }

        return arrayList;
    }

}

  • 编写controller
@RestController
public class ContentController {

    @Autowired
    private ContentService contentService;

    @GetMapping("/parseHtml/{keyword}")
    public boolean parseHtml(@PathVariable String keyword) throws Exception {

            Boolean aBoolean =  this.contentService.parseHtml(keyword);

            return aBoolean;

    }

    @GetMapping("/searchByJd/{keyword}/{pageNum}/{pageSize}")
    public List<Map<String,Object>> searchByJd(@PathVariable String keyword,
                                                @PathVariable int pageNum,
                                                @PathVariable int pageSize) throws IOException {

        List<Map<String,Object>> searchByJdResult = this.contentService.searchByJd(keyword,pageNum,pageSize);


        return searchByJdResult;

    }

}
  • 编写service 将数据批量插入到elasticsearch
@Service
public class ContentServiceimpl implements ContentService {

    @Autowired
    private HtmlParseUtil htmlParseUtil;

    @Autowired
    private RestHighLevelClient restHighLevelClient;

    public boolean parseHtml(String keyword) throws IOException {

        ArrayList<Content> htmlByJds = htmlParseUtil.getHtmlByJd(keyword);


        BulkRequest bulkRequest = new BulkRequest();

        for (int i = 0; i < htmlByJds.size(); i++) {
            bulkRequest.add(
                    new IndexRequest("jd_db").source(JSON.toJSONString(htmlByJds.get(i)), XContentType.JSON)
            );
        }

        BulkResponse bulkResponse = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);

        return !bulkResponse.hasFailures();
    }

    /**
     * 查询数据
     * @param keyword
     * @param pageNum
     * @param pageSize
     * @return
     */
    @Override
    public List<Map<String, Object>> searchByJd(String keyword, int pageNum, int pageSize) throws IOException {

        /**
         * 构建搜索条件
         */

        SearchRequest searchRequest = new SearchRequest("jd_db");
//        条件构建
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();

        //设置分页
        searchSourceBuilder.from(pageNum);

        searchSourceBuilder.size(pageSize);

        //精确匹配
        TermQueryBuilder queryBuilder = QueryBuilders.termQuery("pname", keyword);

        searchSourceBuilder.query(queryBuilder);
        //设置超时时间
        searchSourceBuilder.timeout(new TimeValue(30, TimeUnit.SECONDS));

        searchRequest.source(searchSourceBuilder);

        //客户端执行结果
        SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

        List<Map<String, Object>> htiList = new ArrayList<>();

        for (SearchHit hit:searchResponse.getHits().getHits()) {
            htiList.add(hit.getSourceAsMap());
        }

        return htiList;
    }


    /**
     * 高亮显示 highlighter
     */
    public List<Map<String, Object>> searchByJdHighlighter(String keyword, int pageNum, int pageSize) throws IOException {

        /**
         * 构建搜索条件
         */

        SearchRequest searchRequest = new SearchRequest("jd_db");
//        条件构建
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();

        //设置分页
        searchSourceBuilder.from(pageNum);

        searchSourceBuilder.size(pageSize);

        //精确匹配
        TermQueryBuilder queryBuilder = QueryBuilders.termQuery("pname", keyword);

        /**
         * 设置高亮
         */

        HighlightBuilder highlightBuilder = new HighlightBuilder();
        highlightBuilder.field("pname");
        highlightBuilder.requireFieldMatch(false); //是否重复高亮
        highlightBuilder.preTags("<span style='color:red'>");
        highlightBuilder.postTags("</span>");
        searchSourceBuilder.highlighter(highlightBuilder);



        searchSourceBuilder.query(queryBuilder);
        //设置超时时间
        searchSourceBuilder.timeout(new TimeValue(30, TimeUnit.SECONDS));

        searchRequest.source(searchSourceBuilder);

        //客户端执行结果
        SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

        List<Map<String, Object>> htiList = new ArrayList<>();

        for (SearchHit hit:searchResponse.getHits().getHits()) {

            Map<String, HighlightField> highlightFields = hit.getHighlightFields();
            HighlightField pname = highlightFields.get("pname");

            Map<String, Object> sourceAsMap = hit.getSourceAsMap();//原结果

            if (pname!=null){
                Text[] fragments = pname.fragments();
                String n_text = "";
                for (Text text:fragments) {

                    n_text += text;
                }
//                    替换原结果
                sourceAsMap.put("pname",n_text);
            }


            htiList.add(sourceAsMap);
        }

        return htiList;
    }


}
  • 查看索引中插入的数据
    在这里插入图片描述

  • 启动项目后输入关键字进行查询
    在这里插入图片描述

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值