<!-- 爬虫相关 -->
<dependency>
<!-- jsoup HTML parser library @ http://jsoup.org/ -->
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
<!-- selenium -->
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-java</artifactId>
</dependency>
<!-- chrome浏览器驱动-->
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-chrome-driver</artifactId>
</dependency>
<!-- 使用类浏览器HtmlUnitDriver 需要用到的包-->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</dependency>
前端代码:ajax
//测试为ajax 成功,如果直接是网址里输入 会报错
//http://localhost:8881/http?http=http:www.baidu.com 会报错,这个问题害我花了2天时间没有解决;
输入网址:<input type="text" id="wwwdata" value="" >
<button id="wwwbtn">查询</button><br>
<div id="wwwok"> </div><br>
<hr>
<script type="text/javascript" src="${pageContext.request.contextPath}/js/jquery.min.js"></script>
//根据网址 查询网址 信息
$("#wwwbtn").click(function(){
var http = $("#wwwdata").val();
$.ajax({
url:"http://localhost:8881/http",
type:"get",
data:{http:http},
dataType:"json",
success:function(title){
if(title.status==200){
var obj = title.data;
$("#wwwok").html(http+"查询结果:"+obj.title) ;
}
}
});
});
后端:
//----SpringBoot 入口
@SpringBootApplication
//----------controller类
//导入jar包
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.RestController;
import com.lj.pa.server.entity.ResponseResult;
@RestController
@RequestMapping(value="/http",method=RequestMethod.GET)
public ResponseResult httpdata(@RequestParam String http) throws IOException{
//根据获取的网址 爬取内容
Document doc = Jsoup.connect("http://"+http).get(); //"http://www.yiibai.com" @PathVariable
String title = doc.title();
System.out.println("title is: " + title);
Map<String,Object> map= new HashMap<String,Object>();
ResponseResult result = new ResponseResult();
if(title.isEmpty()){
result.setStatus(0);
result.setMsg("未找到记录");
}else{
map.put("title", title);
result.setStatus(200);
result.setMsg("查询成功");
result.setData(map);
}
return result;
}
//------ResponseResult 类
public class ResponseResult implements Serializable{
/*
* 处理状态
* 200成功 0失败
*/
private int status;
/*
* 消息
*/
private String msg;
/*
* 数据
*/
private Object data;
/**
* 返回处理状态
* @return 状态值 1表示成功;2\3\4
*/
public int getStatus() {
return status;
}
/**
* 设置处理状态
* @param status 状态值
*/
public void setStatus(int status) {
this.status = status;
}
public String getMsg() {
return msg;
}
public void setMsg(String msg) {
this.msg = msg;
}
public Object getData() {
return data;
}
public void setData(Object data) {
this.data = data;
}
爬虫 抓取数据后 显示到前台页面,jsoup包,ajax,设置一个返回ResponseResult结果集
最新推荐文章于 2024-08-07 14:26:10 发布