爬虫
数据问题? 数据库获取,消息队列中获取,都可以成为数据源,爬虫!
爬取数据:(获取请求返回的页面信息,筛选出我们想要的数据就可以了!)
jsoup包!
前后端分离
搜索高亮
config:
@Configuration
public class ElasticSearchConfig {
@Bean
public RestHighLevelClient restHighLevelClient(){
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("localhost", 9200, "http")));
return client;
}
}
controller:
// 请求编写
@RestController
public class ContentController {
@Autowired
private ContentService contentService;
@GetMapping("/parse/{keyword}")
public boolean parse(@PathVariable("keyword")String keyword) throws Exception {
return contentService.parseContent(keyword);
}
@GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
public List<Map<String,Object>> search(@PathVariable("keyword") String keyword,
@PathVariable("pageNo") int pageNo,
@PathVariable("pageSize") int pageSize) throws IOException {
return contentService.searchPageHighlightBuilder(keyword,pageNo,pageSize);
}
}
service:
// 业务编写
@Service
public class ContentService {
@Autowired
RestHighLevelClient restHighLevelClient;
//1、解析数据放入 es 索引中
public boolean parseContent(String keyword) throws Exception {
List<Content> contents = new HtmlParseUtil().parseJD(keyword);
// 把查询到的数据放入es 中
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("2m");
for (int i = 0; i < contents.size(); i++) {
bulkRequest.add(
new IndexRequest("jd_goods")
.source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return !bulk.hasFailures();
}
//2、获取这些数据实现搜索功能
public List<Map<String,Object>> searchPage(String keyword,int pageNo,int pageSize) throws IOException {
if(pageNo<=1){
pageNo = 1;
}
// 条件搜索
SearchRequest searchRequest = new SearchRequest("jd_goods");
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
//分页
sourceBuilder.from(pageNo);
sourceBuilder.size(pageSize);
//精准匹配
TermQueryBuilder title = QueryBuilders.termQuery("title", keyword);
sourceBuilder.query(title);
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
//执行搜索
searchRequest.source(sourceBuilder);
SearchResponse search = restH