jsoup jar包
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import org.apache.http.HttpStatus;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/**
* 根据URL获得所有的html信息
* @param url
* @return
* @throws IOException
* @throws ClientProtocolException
*/
public static String getHtmlByUrl(String url) throws ClientProtocolException, IOException{
String html = null;
//创建httpClient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//以get方式请求该URL
HttpGet httpget = new HttpGet(url);
CloseableHttpResponse response = httpClient.execute(httpget);
try {
//得到responce对象
//HttpResponse responce = httpClient.execute(httpget);
//返回码
int resStatu = response.getStatusLine().getStatusCode();
if (resStatu==HttpStatus.SC_OK) {//200正常 其他就不对
//获得输入流
InputStream entity = response.getEntity().getContent();
if (entity!=null) {
//通过输入流转为字符串获得html源代码 注:可以获得实体,然后通过 EntityUtils.toString方法获得html
//但是有可能出现乱码,因此在这里采用了这种方式
html=getStreamString(entity);
// System.out.println(html);
}
}
} catch (Exception e) {
//System.out.println("访问【"+url+"】出现异常!");
e.printStackTrace();
} finally {
//httpClient.getConnectionManager().shutdown();
response.close();
try {
httpClient.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return html;
}
/**
* 将一个输入流转化为字符串
*/
public static String getStreamString(InputStream tInputStream){
if (tInputStream != null){
try{
BufferedReader tBufferedReader = new BufferedReader(new InputStreamReader(tInputStream,"gb2312"));
StringBuffer tStringBuffer = new StringBuffer();
String sTempOneLine = new String("");
while ((sTempOneLine = tBufferedReader.readLine()) != null){
tStringBuffer.append(sTempOneLine+"\n");
}
return tStringBuffer.toString();
}catch (Exception ex){
ex.printStackTrace();
}
}
return null;
}
public static void main(String[] args) throws ClientProtocolException, IOException {
String htmlByUrl = getHtmlByUrl(url);
if(htmlByUrl!=null&&!"".equals(htmlByUrl)) {
//解析内容
Document doc = Jsoup.parse(htmlByUrl);
Elements linksElements = doc.select(".live_data_time");//标签 类...
}
}