思路:
通过HttpURLConnection对象,获取输入流下载网页
然后使用正则表达式在下载的网页中检索获得需要的代码部分
废话少说,上代码:
package HW0811;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class PaChong {
public static void main(String[] args) throws Exception{
// 获得网页地址
URL url = new URL("https://www.oneplus.com");
// 建立http连接
HttpURLConnection conn = (HttpURLConnection)url.openConnection();
conn.connect();
StringBuilder sb = new StringBuilder();
// 对网页状态进行判断 状态码 200 表示网页正常访问
if(conn.getResponseCode()==200) {
// 创建输入流
InputStream ips = conn.getInputStream();
BufferedReader br = new BufferedReader(new InputStreamReader(ips));
// 读取网页源代码
String s = br.readLine();
while (s != null) {
sb.append(s);
s = br.readLine();
}
String html = sb.toString();
String div = getDiv(html);
System.out.println(div);
}
}
//获得div部分
public static String getDiv(String html){
// 查找div代码部分
Pattern pattern = Pattern.compile("<div class=\"cookie-warn\">.*</div>");
Matcher matcher = pattern.matcher(html);
if(matcher.find()){
html = matcher.group();
return html;
}
return null;
}
}