import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.select.Elements;
public class Testpc {
public static void main(String[] args) {
// TODO Auto-generated method stub
getUrl("https://www.2345.com/?38264-0011");
}
public static String getUrl(String url){
try {
org.jsoup.nodes.Document doc = Jsoup.connect(url).data("query", "Java")
.userAgent("Mozilla")
.cookie("auth", "token")
.timeout(3000)
.post();
//得到html的所有东西
//Document doc = Jsoup.parseBodyFragment(html);
org.jsoup.nodes.Element body = ((org.jsoup.nodes.Document) doc).body();
String title = ((org.jsoup.nodes.Document) doc).title();
System.out.println("title:"+title);
org.jsoup.nodes.Element content = ((org.jsoup.nodes.Element) doc).getElementById("content");
Elements links = content.getElementsByTag("a");
for (org.jsoup.nodes.Element link : links) {
String linkHref = link.attr("href");
String linkText = link.text();
System.out.println("linkHref:"+linkHref+"linkText:"+linkText);
}
org.jsoup.nodes.Element content2 = ((org.jsoup.nodes.Element) doc).getElementById("content");
//分离出html下<a>...</a>之间的所有东西
Elements links1 = content2.getElementsByTag("a");
//Elements links = doc.select("a[href]");
// 扩展名为.png的图片
Elements pngs = doc.select("img[src$=.png]");
// class等于masthead的div标签
org.jsoup.nodes.Element masthead = doc.select("div.masthead").first();
System.out.println(content2.toString()+links1+pngs+masthead);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return "";
}
}