首先配置es地址
package com.example.li_es_api.li.config;
import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
/**
* @author Li Zongyang
* @Date 2022/5/30
*/
@Configuration
public class ElasticSearchClientConfig {
@Bean
public RestHighLevelClient restHighLevelClient() {
HttpHost host = new HttpHost("127.0.0.1", 9200, "http");
RestClientBuilder builder = RestClient.builder(host);
CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials("elastic", "520996"));
builder.setHttpClientConfigCallback(f -> f.setDefaultCredentialsProvider(credentialsProvider));
return new RestHighLevelClient(builder);
}
}
依赖
<!-- 引入图形验证码依赖 --> <dependency> <groupId>com.github.penggle</groupId> <artifactId>kaptcha</artifactId> <version>2.3.2</version> </dependency> <!-- 网页解析 --> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.11.3</version> </dependency>
通过解析网页获取数据存储到es索引实现搜索
HtmlUtil
package com.example.li_es_api.li.utils;
import com.example.li_es_api.li.model.HtmlModel;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;
import javax.swing.text.html.parser.ContentModel;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
/**
* @author Li Zongyang
* @Date 2022/5/31
*/
@Component
public class HtmlUtils {
public List<HtmlModel> getHtmlData(String keyWords) throws IOException {
String url =
"https://search.jd.com/Search?keyword="+keyWords;
//页面对象document
Document document = Jsoup.parse(new URL(url), 3000);
Element element = document.getElementById("J_goodsList");
//System.out.println(element.html());
Elements elements = element.getElementsByTag("li");
List<HtmlModel> list = new ArrayList<>();
for (Element el : elements) {
String img = el.getElementsByTag("img").eq(0).attr("data-lazy-img");
String title = el.getElementsByClass("p-name").eq(0).text();
String price = el.getElementsByClass("p-price").eq(0).text();
list.add(new HtmlModel(img,price,title));
}
return list;
}
}
Controller
package com.example.li_es_api.li.controller;
import com.example.li_es_api.li.model.TestEnum;
import com.example.li_es_api.li.service.ContentService;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import javax.annotation.Resource;
import java.io.IOException;
import java.util.List;
import java.util.Map;
/**
* @author Li Zongyang
* @Date 2022/5/31
*/
@RestController
public class ContentController {
@Resource
private ContentService contentService;
@GetMapping("/getDate")
public Boolean getData(
@RequestParam String keyWord) throws IOException {
return contentService.getData(keyWord);
}
@GetMapping("/search")
public List<Map<String, Object>> search(
@RequestParam String keyWord,
@RequestParam int pageNo,
@RequestParam int pageSize) throws IOException {
return contentService.search(keyWord,pageNo,pageSize);
}
}
ServiceImpl
package com.example.li_es_api.li.service.impl;
import com.alibaba.fastjson2.JSON;
import com.example.li_es_api.li.controller.ContentController;
import com.example.li_es_api.li.model.HtmlModel;
import com.example.li_es_api.li.service.ContentService;
import com.example.li_es_api.li.utils.HtmlUtils;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequestBuilder;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.elasticsearch.search.sort.SortOrder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.elasticsearch.core.ElasticsearchTemplate;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import javax.swing.text.Highlighter;
import javax.swing.text.html.parser.ContentModel;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
* @author Li Zongyang
* @Date 2022/5/31
*/
@Service("contentServiceImpl")
public class ContentServiceImpl implements ContentService {
@Resource
private HtmlUtils htmlUtils;
@Resource
private RestHighLevelClient restHighLevelClient;
@Override
public Boolean getData(String keyWords) throws IOException {
List<HtmlModel> list = htmlUtils.getHtmlData(keyWords);
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("2m");
for (HtmlModel htmlModel : list) {
bulkRequest.add(
new IndexRequest("js_goods")
.source(JSON.toJSONString(htmlModel), XContentType.JSON));
}
BulkResponse b = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return !b.hasFailures();
}
@Override
public List<Map<String, Object>> search(String keyWord, int pageNo, int pageSize) throws IOException {
//AnalyzeRequestBuilder
//AnalyzeRequestBuilder analyzeRequestBuilder = new AnalyzeRequestBuilder();
//analyzeRequestBuilder.setAnalyzer("ik_smart");
SearchRequest searchRequest = new SearchRequest("js_goods");
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
//sourceBuilder.sort("_score", SortOrder.ASC);
sourceBuilder.size(pageSize);
sourceBuilder.from(pageNo);
//TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyWord);
QueryBuilder queryBuilder = QueryBuilders.matchQuery("title",keyWord);
//QueryBuilder q = QueryBuilders.matchQuery("title",keyWord);
sourceBuilder.query(queryBuilder);
//sourceBuilder.query(q);
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
//构建高亮显示
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("title");
highlightBuilder.requireFieldMatch(true);
highlightBuilder.preTags("<span style='color: red'>");
highlightBuilder.postTags("</span>");
sourceBuilder.highlighter(highlightBuilder);
searchRequest.source(sourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
List<Map<String, Object>> list = new ArrayList<>();
for (SearchHit hit : searchResponse.getHits().getHits()) {
//解析高亮字段
Map<String, HighlightField> h = hit.getHighlightFields();
HighlightField highlightField = h.get("title");
Map<String, Object> map = hit.getSourceAsMap();
StringBuilder nTitle = new StringBuilder();
if (highlightField.getFragments() != null) {
for (Text fragment : highlightField.getFragments()) {
nTitle.append(fragment);
}
map.put("title", nTitle.toString());
}
list.add(map);
}
return list;
}
}
model
package com.example.li_es_api.li.model;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import javax.servlet.annotation.WebFilter;
/**
* @author Li Zongyang
* @Date 2022/5/31
*/
@Data
@WebFilter(urlPatterns = {"",""})
@AllArgsConstructor
@NoArgsConstructor
public class HtmlModel {
private String img;
private String price;
private String title;
}
package com.example.li_es_api.li.model;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
/**
* @author Li Zongyang
* @Date 2022/5/30
*/
@Data
@NoArgsConstructor
@AllArgsConstructor
public class User {
//private static final String STRING = "null";
private String name;
private Integer age;
private String address;
}