controller
@ApiOperation(value = "获取网页文字内容")
@SysLog("获取网页文字内容")
@GetMapping("/getUrlContent")
public Res<String> getUrlContent(String url) throws IOException {
Resp<JSONObject> resp = SpiderUtil.getActicle(url);
if (resp.isSuccess()) {
List<Map<String, Object>> tagsList=(List<Map<String, Object>>) resp.getBody().get("tags");
List<Map<String, Object>> returnList=Lists.newArrayList();
for (Map<String, Object> map : tagsList) {
if (map.get("name").equals("span") && map.get("text").equals("收录于话题")) {
break;
}
returnList.add(map);
}
resp.getBody().put("tags", returnList);
System.out.println(resp.getBody());
return Res.ok(resp.getBody().toString());
} else {
System.out.println(resp.getMsg());
return Res.failed(resp.getMsg());
}
}
文章爬取工具类 SpiderUtil
package com.clina.matron.util;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import