1. maven引入jar包
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
2. 解析代码示例
public ServerResponse getInfo(String str) throws IOException
{
String url = "https://www.xxxxxxxx.com/xxxxxxxx?q="+str;
Connection conn = Jsoup.connect(url).timeout(5000);
conn.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
conn.header("Accept-Encoding", "gzip, deflate, sdch");
conn.header("Accept-Language", "zh-CN,zh;q=0.8");
conn.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
Document document = conn.get();
Elements bookNames = document.getElementsByClass("title");
Element book = bookNames.first();
String name = book.getElementsByTag("a").html();
String newUrl = book.select("a").attr("href");
System.out.println(name);
System.out.println(newUrl);
String splitUrl = newUrl.substring(newUrl.indexOf("url=")+4,newUrl.indexOf("&query="));
splitUrl=splitUrl.replace("%2F","/").replace("%3A",":");
System.out.println(splitUrl);
Connection connlink = Jsoup.connect(splitUrl).timeout(5000);
connlink.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
connlink.header("Accept-Encoding", "gzip, deflate, sdch");
connlink.header("Accept-Language", "zh-CN,zh;q=0.8");
connlink.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
Document documentLink = connlink.get();
Elements article = documentLink.getElementsByClass("article");
Element detailInfo = article.first();
String imgSrc = detailInfo.getElementById("mainpic").select("img").attr("src");
Element baseInfo = detailInfo.getElementById("info");
String intro = detailInfo.getElementsByClass("intro").get(0).select("p").html();
String authorDetail = detailInfo.getElementsByClass("intro").get(1).select("p").html();
String infoDetail = baseInfo.toString().replaceAll("</?[^>]+>", "");
Map<String,String> result = analyseBookInfo(infoDetail);
result.put("bookName",name);
result.put("info",infoDetail);
result.put("isbn",isbn);
result.put("imgSrc","https://images.weserv.nl/?url="+imgSrc);
result.put("intro",intro);
result.put("authorDetail",authorDetail);
return ServerResponse.createBySuccess(result);
}
public Map<String,String> analyseBookInfo(String oldStr){
Map<String,String> resultMap = new HashMap<>();
if(!StringUtils.isEmpty(oldStr)){
oldStr = oldStr.replace(" ","");
String[] infoSplit = oldStr.split("\n");
System.out.println(infoSplit[0]);
for(int i=0;i<infoSplit.length;i++){
if(infoSplit[i].indexOf(":") > 0){
if("文本1".equals(filedSplit[0])){
resultMap.put("page",filedSplit[1]);
}
}
}
}
return resultMap;
}