1.10.2
实体类
@Data
@NoArgsConstructor
@AllArgsConstructor
@ToString
public class Content {
private String src;
private String price;
private String name;
}
工具类
package com.kuang.utils;
import com.kuang.pojo.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
public class HtmlParseUtil{
public static List parseJd(String keyword) throws IOException {
List list = new ArrayList<>();
String url = “https://search.jd.com/Search?keyword=”+keyword+“&enc=utf-8”;
Document root = Jsoup.parse(new URL(new String(url.getBytes(),“utf-8”)), 30000);
Element j_goodsList = root.getElementById(“J_goodsList”);
Elements li = j_goodsList.getElementsByTag(“li”);
for (Element element : li) {
String src = element.getElementsByTag(“img”).eq(0).attr(“data-lazy-img”);
String price = element.getElementsByClass(“p-price”).eq(0).text();
String name = element.getElementsByClass(“p-name”).eq(0).text();
list.add(new Content(src,price,name));
}
return list;
}
public static void main(String[] args) throws IOException {
List list = parseJd(“java”);
list.forEach(System.out::println);
}
}
1、连接elasticsearch
package com.kuang.config;
import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class ElasticSerachClientConfig {
@Bean
public RestHighLevelClient restHighLevelClient(){
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost(“127.0.0.1”,9200,“http”)));
return client;
}
}
2、controller
- contentController
@RestController
public class ContentController {
@Autowired
private ContentService contentService;
@GetMapping(“/parse/{keyword}”)
public Boolean parse(@PathVariable(“keyword”) String keyword) throws IOException {
return contentService.parseContent(keyword);
}
@GetMapping(“/search/{keyword}/{pageNo}/{pageSize}”)
public List<Map<String, Object>> search(@PathVariable(“keyword”) String keyword
,@PathVariable(“pageNo”) int pageNo,
@PathVariable(“pageSize”) int pageSize
) throws IOException {
return contentService.searchPage(keyword, pageNo, pageSize);
}
}
3、service
package com.kuang.service;
import com.alibaba.fastjson.JSON;
import com.kuang.pojo.Content;
import com.kuang.utils.HtmlParseUtil;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
@Service
@RequiredArgsConstructor
public class ContentService {
@Autowired
private RestHighLevelClient restHighLevelClient;
public Boolean parseContent(String keyword) throws IOException {
List contents = HtmlParseUtil.parseJd(keyword);
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout(“2m”);
for (int i = 0; i < contents.size(); i++) {
bulkRequest.add(new IndexRequest(“jd_goods”)
.source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return !bulk.hasFailures();
}
public static void main(String[] args) throws IOException {
new ContentService().parseContent(“java”);
}
public List<Map<String, Object>> searchPage(String keyword,int pageNo, int pageSize) throws IOException {
if (pageNo <= 0) {
pageNo = 1;
}
SearchRequest searchRequest = new SearchRequest(“jd_goods”);
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
sourceBuilder.from(pageNo);
sourceBuilder.size(pageSize);
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery(“name”, keyword);
sourceBuilder.query(termQueryBuilder);
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
searchRequest.source(sourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
List<Map<String, Object>> list = new ArrayList<>();
for (SearchHit documentFields : searchResponse.getHits().getHits()) {
list.add(documentFields.getSourceAsMap());
}
return list;
}
}
4、utils
public class HtmlParseUtil{
public static List parseJd(String keyword) throws IOException {
List list = new ArrayList<>();
String url = “https://search.jd.com/Search?keyword=”+keyword+“&enc=utf-8”;
Document root = Jsoup.parse(new URL(new String(url.getBytes(),“utf-8”)), 30000);
Element j_goodsList = root.getElementById(“J_goodsList”);
Elements li = j_goodsList.getElementsByTag(“li”);
for (Element element : li) {
String src = element.getElementsByTag(“img”).eq(0).attr(“data-lazy-img”);
String price = element.getElementsByClass(“p-price”).eq(0).text();
String name = element.getElementsByClass(“p-name”).eq(0).text();
list.add(new Content(src,price,name));
}
return list;
}
public static void main(String[] args) throws IOException {
List list = parseJd(“java”);
list.forEach(System.out::println);
}
}
新建索引jd_goods
访问localhost:9090/parse/java 把java数据填入进去
访问http://localhost:9090/search/java/1/10 展示java前10条数据
导入axios.js
vue.js
<input v-model=“keyword” type=“text” autocomplete=“off” value=“dd” id=“mq”
class=“s-combobox-input” aria-haspopup=“true”>
<button @click.prevent=“searchPage” type=“submit” id=“searchbtn”>搜索
本文介绍了如何使用Java编写京东搜索功能,包括连接Elasticsearch进行数据存储,以及如何通过HTTP请求与前端进行交互。内容涵盖了HTML解析库的使用、RESTfulAPI设计和数据抓取示例。
2166

被折叠的 条评论
为什么被折叠?



