ES仿京东搜索
爬取京东数据
public class HtmlParseUtil {
/* public static void main(String[] args) throws IOException {
//jsoup不能抓取ajax请求,除非自己模拟浏览器进行请求
String url = "https://search.jd.com/Search?keyword=java";
//解析网页
Document document = Jsoup.parse(new URL(url), 30000);
//抓取搜索到的数据
//Document就相当于JS的document对象
Element j_goodsList = document.getElementById("J_goodsList");
//找到所有的li属性,一个li就对应一个商品的所有属性
Elements lis = j_goodsList.getElementsByTag("li");
//获取京东的商品信息
for (Element li : lis) {
// String img = li.getElementsByTag("img").eq(0).attr("src");
//<img width="" height="" data-img="1" data-lazy-img="done" source-data-lazy-img="" src="//img11.360buyimg.com/n1/s200x200_jfs/t1/48392/1/3204/92554/5d0edfe5E31f5da10/6a9a2b4e49d66a27.jpg" style="">
String img = li.getElementsByTag("img").eq(0).attr("data-lazy-img");
String price = li.getElementsByClass("p-price").eq(0).text();
String name = li.getElementsByClass("p-name").eq(0).text();
System.out.println(img);
System.out.println(price);
System.out.println(name);
System.out.println("==============================");
}
}*/
public List<Product> parseJD(String keywords) throws IOException {
//jsoup不能抓取ajax请求,除非自己模拟浏览器进行请求
String url = "https://search.jd.com/Search?keyword="+keywords;
//解析网页
Document document = Jsoup.parse(new URL(url), 30000);
//抓取搜索到的数据
//Document就相当于JS的document对象
Element j_goodsList = document.getElementById("J_goodsList");
//找到所有的li属性,一个li就对应一个商品的所有属性
Elements lis = j_goodsList.getElementsByTag("li");
List<Product> products = new ArrayList<>();
//获取京东的商品信息
for (Element li : lis) {
// String img = li.getElementsByTag("img").eq(0).attr("src");
//<img width="" height="" data-img="1" data-lazy-img="done" source-data-lazy-img="" src="//img11.360buyimg.com/n1/s200x200_jfs/t1/48392/1/3204/92554/5d0edfe5E31f5da10/6a9a2b4e49d66a27.jpg" style="">
String img = li.getElementsByTag("img").eq(0).attr("data-lazy-img");
String price = li.getElementsByClass("p-price").eq(0).text();
String name = li.getElementsByClass("p-name").eq(0).text();
Product product = new Product();
product.setImg(img);
product.setTitle(name);
product.setPrice(price);
products.add(product);
}
return products;
}
/* public static void main(String[] args) throws IOException {
new HtmlParseUtil().parseJD("口红").forEach(System.out::println);
}*/
}
elasticsearch注入
@Configuration
public class ElasticSearchClientConfig {
@Bean
public RestHighLevelClient restHighLevelClient(){
RestHighLevelClient restHighLevelClient = new RestHighLevelClient(RestClient.builder(new HttpHost("127.0.0.1", 9200, "http")));
return restHighLevelClient;
}
}
相应的service和controller
@Service
public class ProductService {
@Autowired
private RestHighLevelClient restHighLevelClient;
private final String INDEXNAME = "jd_goods";
/**
*解析数据存入es
*/
public Boolean parseProduct(String keywords) throws IOException {
//解析查询出来的数据
List<Product> products = new HtmlParseUtil().parseJD(keywords);
if(CollectionUtils.isEmpty(products)){
throw new RuntimeException("未查询到"+keywords+"的相关信息");
}
//封装数据到索引库中
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout(TimeValue.timeValueSeconds(20));
for (Product product : products) {
bulkRequest.add(new IndexRequest(INDEXNAME).source(JSON.toJSONString(product), XContentType.JSON));
}
RequestOptions options;
BulkResponse bulkResponse = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return !bulkResponse.hasFailures();
}
/**
*解析数据存入es
*/
public Boolean deleteIndex() throws IOException {
GetIndexRequest request2 = new GetIndexRequest(INDEXNAME);
boolean exists = restHighLevelClient.indices().exists(request2,RequestOptions.DEFAULT);
if(exists){
DeleteIndexRequest request = new DeleteIndexRequest(INDEXNAME);
AcknowledgedResponse delete =restHighLevelClient.indices().delete(request, RequestOptions.DEFAULT);
return delete.isAcknowledged();
}
return false;
}
public List<Map<String,Object>> searchContentPage(String keyword,int pageNo,int pageSize) throws IOException {
//基本的参数判断
if(pageNo<=1){
pageNo=1;
}
//基本的条件搜索
SearchRequest searchRequest = new SearchRequest(INDEXNAME);
SearchSourceBuilder builder = new SearchSourceBuilder();
//分页
builder.from(pageNo);
builder.size(pageSize);
//组装查询条件
//精准匹配
//TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
MatchQueryBuilder termQueryBuilder = QueryBuilders.matchQuery("title", keyword);
builder.query(termQueryBuilder);
builder.timeout(TimeValue.timeValueSeconds(20));
searchRequest.source(builder);
//搜索
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
//解析结果
List<Map<String, Object>> resultList = new ArrayList<>();
for (SearchHit hit : searchResponse.getHits().getHits()) {
resultList.add(hit.getSourceAsMap());
}
return resultList;
}
public List<Map<String,Object>> searchContentHighLighterPage(String keyword,int pageNo,int pageSize) throws IOException {
//基本的参数判断
if(pageNo<=1){
pageNo=1;
}
//基本的条件搜索
SearchRequest searchRequest = new SearchRequest(INDEXNAME);
SearchSourceBuilder builder = new SearchSourceBuilder();
//分页
builder.from(pageNo);
builder.size(pageSize);
//组装查询条件
//精准匹配
//TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
MatchQueryBuilder termQueryBuilder = QueryBuilders.matchQuery("title", keyword);
builder.query(termQueryBuilder);
builder.timeout(TimeValue.timeValueSeconds(20));
//高亮构建
//生成高亮查询
HighlightBuilder highlightBuilder = new HighlightBuilder();
//高亮查询字段
highlightBuilder.field("title");
//如果要多个字段高亮这项需为false
highlightBuilder.requireFieldMatch(false);
highlightBuilder.preTags("<span style =\"color:red\">");
highlightBuilder.postTags("</span>");
builder.highlighter(highlightBuilder);
searchRequest.source(builder);
//搜索
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
//解析结果
List<Map<String, Object>> resultList = new ArrayList<>();
for (SearchHit hit : searchResponse.getHits().getHits()) {
//获取高亮字段
Map<String, HighlightField> highlightFieldMap = hit.getHighlightFields();
HighlightField title = highlightFieldMap.get("title");
Map<String, Object> source = hit.getSourceAsMap();
//千万记得要记得判断是不是为空,不然你匹配的第一个结果没有高亮内容,那么就会报空指针异常,这个错误一开始真的搞了很久
//解析高亮的字段,将原来的字段替换为我们高亮的字段
if(title!=null){
Text[] fragments = title.fragments();
String name = "";
for (Text fragment : fragments) {
name += fragment;
}
source.put("title",name);
}
resultList.add(source);
}
return resultList;
}
}
前端实现
#安装vue和axios
cnpm install vue
cnpm install axios
将安装目录下的js拷贝到项目
引入js并写相应的javascript方法
<script th:src="@{/js/vue.min.js}"></script>
<script th:src="@{/js/axios.min.js}"></script>
<script type="text/javascript">
new Vue({
el:'#app',
data:{
keyword:'',//搜索关键字
results:[]//搜索结果
},
methods:{
searchKey(){
var keyword = this.keyword;
console.log(keyword);
axios.get('searchhighlighter/'+keyword+'/1/10').then(
response=>{
console.log(response);
this.results = response.data;//绑定数据
});
}
}
})
</script>
搜索按钮绑定事件searchKey,搜索内容绑定keyword
将查询内容展示到页面
根据看狂神说视屏学习的,需要的bilibili搜索狂神说