通过Jsoup 提取 百度 的标题
创建一个maven 项目
在pom.xml贴上依赖
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.3</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.9.2</version>
</dependency>
案例
package com.gcx.test;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/**
* Hello world!
*
*/
public class App
{
public static void main(String[] args) throws Exception{
CloseableHttpClient httpclient = HttpClients.createDefault(); // 创建httpclient实例
HttpGet httpget = new HttpGet("https://www.baidu.com"); // 创建httpget实例
CloseableHttpResponse response = httpclient.execute(httpget); // 执行get请求
HttpEntity entity=response.getEntity(); // 获取返回实体
String webContent= EntityUtils.toString(entity, "utf-8");
// System.out.println("网页内容:"+webContent); // 指定编码打印网页内容
response.close(); // 关闭流和释放系统资源
Document doc= Jsoup.parse(webContent);
Elements elements=doc.getElementsByTag("title");
Element element=elements.get(0);
String title=element.text();
System.out.println("网页标题是:"+title);
}
}